Merge tag 'leds_for_4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszews...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 1 May 2017 20:12:49 +0000 (13:12 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 1 May 2017 20:12:49 +0000 (13:12 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 1 May 2017 20:12:49 +0000 (13:12 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 1 May 2017 20:12:49 +0000 (13:12 -0700)
diff --git a/.mailmap b/.mailmap

index 67dc22ffc9a80cb4fd6abeeba3d2ec7f7ae2ba19..1d6f4e7280dc67f630b79456b3f1e5a4c0a41343 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -99,6 +99,8 @@ Linas Vepstas <linas@austin.ibm.com>
  Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
  Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
  Mark Brown <broonie@sirena.org.uk>
+Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
+Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
  Matthieu CASTET <castet.matthieu@free.fr>
  Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
  Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>
@@ -171,6 +173,7 @@ Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
  Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
  Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
  Takashi YOSHII <takashi.yoshii.zj@renesas.com>
+Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
  Yusuke Goda <goda.yusuke@renesas.com>
  Gustavo Padovan <gustavo@las.ic.unicamp.br>
  Gustavo Padovan <padovan@profusion.mobi>
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block

index 2da04ce6aeef482645bfd9edd924ce195275ea26..dea212db9df3531f3dfc69204c8e4fcac8ec2bb5 100644 (file)
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -213,14 +213,8 @@ What:              /sys/block/<disk>/queue/discard_zeroes_data
  Date:          May 2011
  Contact:       Martin K. Petersen <martin.petersen@oracle.com>
  Description:
-               Devices that support discard functionality may return
-               stale or random data when a previously discarded block
-               is read back. This can cause problems if the filesystem
-               expects discarded blocks to be explicitly cleared. If a
-               device reports that it deterministically returns zeroes
-               when a discarded area is read the discard_zeroes_data
-               parameter will be set to one. Otherwise it will be 0 and
-               the result of reading a discarded area is undefined.
+               Will always return 0.  Don't rely on any specific behavior
+               for discards, and don't read this file.
  
  What:          /sys/block/<disk>/queue/write_same_max_bytes
  Date:          January 2012
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

index 986e44387dad493e268ab93253120df73abc3045..facc20a3f96280472396ad3f7d2e8f2dba62fecc 100644 (file)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -653,6 +653,9 @@
         cpuidle.off=1   [CPU_IDLE]
                         disable the cpuidle sub-system
  
+       cpufreq.off=1   [CPU_FREQ]
+                       disable the cpufreq sub-system
+
         cpu_init_udelay=N
                         [X86] Delay for N microsec between assert and de-assert
                         of APIC INIT to start processors.  This delay occurs
@@ -1183,6 +1186,12 @@
                         functions that can be changed at run time by the
                         set_graph_notrace file in the debugfs tracing directory.
  
+       ftrace_graph_max_depth=<uint>
+                       [FTRACE] Used with the function graph tracer. This is
+                       the max depth it will trace into a function. This value
+                       can be changed at run time by the max_graph_depth file
+                       in the tracefs tracing directory. default: 0 (no limit)
+
         gamecon.map[2|3]=
                         [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
                         support via parallel port (up to 5 devices per port)
@@ -1716,6 +1725,12 @@
                         kernel and module base offset ASLR (Address Space
                         Layout Randomization).
  
+       kasan_multi_shot
+                       [KNL] Enforce KASAN (Kernel Address Sanitizer) to print
+                       report on every invalid memory access. Without this
+                       parameter KASAN will print report only for the first
+                       invalid access.
+
         keepinitrd      [HW,ARM]
  
         kernelcore=     [KNL,X86,IA-64,PPC]
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt

index a71b8095dbd8df44603f18e7435b490d7b5c56c9..2f66683500b8e44e0ceb44bc877acccae39b35d7 100644 (file)
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -68,3 +68,4 @@ stable kernels.
  |                |                 |                 |                             |
  | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
  | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
+| Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX

index e55103ace382a093050ab70f5e0a6b92e4faa89a..8d55b4bbb5e2ef03344f3f4f8e920ad01f7d0e04 100644 (file)
--- a/Documentation/block/00-INDEX
+++ b/Documentation/block/00-INDEX
@@ -1,5 +1,7 @@
  00-INDEX
         - This file
+bfq-iosched.txt
+       - BFQ IO scheduler and its tunables
  biodoc.txt
         - Notes on the Generic Block Layer Rewrite in Linux 2.5
  biovecs.txt
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt

new file mode 100644 (file)

index 0000000..1b87df6
--- /dev/null
+++ b/Documentation/block/bfq-iosched.txt
@@ -0,0 +1,531 @@
+BFQ (Budget Fair Queueing)
+==========================
+
+BFQ is a proportional-share I/O scheduler, with some extra
+low-latency capabilities. In addition to cgroups support (blkio or io
+controllers), BFQ's main features are:
+- BFQ guarantees a high system and application responsiveness, and a
+  low latency for time-sensitive applications, such as audio or video
+  players;
+- BFQ distributes bandwidth, and not just time, among processes or
+  groups (switching back to time distribution when needed to keep
+  throughput high).
+
+On average CPUs, the current version of BFQ can handle devices
+performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
+reference, 30-50 KIOPS correspond to very high bandwidths with
+sequential I/O (e.g., 8-12 GB/s if I/O requests are 256 KB large), and
+to 120-200 MB/s with 4KB random I/O. BFQ has not yet been tested on
+multi-queue devices.
+
+The table of contents follow. Impatients can just jump to Section 3.
+
+CONTENTS
+
+1. When may BFQ be useful?
+ 1-1 Personal systems
+ 1-2 Server systems
+2. How does BFQ work?
+3. What are BFQ's tunable?
+4. BFQ group scheduling
+ 4-1 Service guarantees provided
+ 4-2 Interface
+
+1. When may BFQ be useful?
+==========================
+
+BFQ provides the following benefits on personal and server systems.
+
+1-1 Personal systems
+--------------------
+
+Low latency for interactive applications
+
+Regardless of the actual background workload, BFQ guarantees that, for
+interactive tasks, the storage device is virtually as responsive as if
+it was idle. For example, even if one or more of the following
+background workloads are being executed:
+- one or more large files are being read, written or copied,
+- a tree of source files is being compiled,
+- one or more virtual machines are performing I/O,
+- a software update is in progress,
+- indexing daemons are scanning filesystems and updating their
+  databases,
+starting an application or loading a file from within an application
+takes about the same time as if the storage device was idle. As a
+comparison, with CFQ, NOOP or DEADLINE, and in the same conditions,
+applications experience high latencies, or even become unresponsive
+until the background workload terminates (also on SSDs).
+
+Low latency for soft real-time applications
+
+Also soft real-time applications, such as audio and video
+players/streamers, enjoy a low latency and a low drop rate, regardless
+of the background I/O workload. As a consequence, these applications
+do not suffer from almost any glitch due to the background workload.
+
+Higher speed for code-development tasks
+
+If some additional workload happens to be executed in parallel, then
+BFQ executes the I/O-related components of typical code-development
+tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
+NOOP or DEADLINE.
+
+High throughput
+
+On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and
+up to 150% higher throughput than DEADLINE and NOOP, with all the
+sequential workloads considered in our tests. With random workloads,
+and with all the workloads on flash-based devices, BFQ achieves,
+instead, about the same throughput as the other schedulers.
+
+Strong fairness, bandwidth and delay guarantees
+
+BFQ distributes the device throughput, and not just the device time,
+among I/O-bound applications in proportion their weights, with any
+workload and regardless of the device parameters. From these bandwidth
+guarantees, it is possible to compute tight per-I/O-request delay
+guarantees by a simple formula. If not configured for strict service
+guarantees, BFQ switches to time-based resource sharing (only) for
+applications that would otherwise cause a throughput loss.
+
+1-2 Server systems
+------------------
+
+Most benefits for server systems follow from the same service
+properties as above. In particular, regardless of whether additional,
+possibly heavy workloads are being served, BFQ guarantees:
+
+. audio and video-streaming with zero or very low jitter and drop
+  rate;
+
+. fast retrieval of WEB pages and embedded objects;
+
+. real-time recording of data in live-dumping applications (e.g.,
+  packet logging);
+
+. responsiveness in local and remote access to a server.
+
+
+2. How does BFQ work?
+=====================
+
+BFQ is a proportional-share I/O scheduler, whose general structure,
+plus a lot of code, are borrowed from CFQ.
+
+- Each process doing I/O on a device is associated with a weight and a
+  (bfq_)queue.
+
+- BFQ grants exclusive access to the device, for a while, to one queue
+  (process) at a time, and implements this service model by
+  associating every queue with a budget, measured in number of
+  sectors.
+
+  - After a queue is granted access to the device, the budget of the
+    queue is decremented, on each request dispatch, by the size of the
+    request.
+
+  - The in-service queue is expired, i.e., its service is suspended,
+    only if one of the following events occurs: 1) the queue finishes
+    its budget, 2) the queue empties, 3) a "budget timeout" fires.
+
+    - The budget timeout prevents processes doing random I/O from
+      holding the device for too long and dramatically reducing
+      throughput.
+
+    - Actually, as in CFQ, a queue associated with a process issuing
+      sync requests may not be expired immediately when it empties. In
+      contrast, BFQ may idle the device for a short time interval,
+      giving the process the chance to go on being served if it issues
+      a new request in time. Device idling typically boosts the
+      throughput on rotational devices, if processes do synchronous
+      and sequential I/O. In addition, under BFQ, device idling is
+      also instrumental in guaranteeing the desired throughput
+      fraction to processes issuing sync requests (see the description
+      of the slice_idle tunable in this document, or [1, 2], for more
+      details).
+
+      - With respect to idling for service guarantees, if several
+       processes are competing for the device at the same time, but
+       all processes (and groups, after the following commit) have
+       the same weight, then BFQ guarantees the expected throughput
+       distribution without ever idling the device. Throughput is
+       thus as high as possible in this common scenario.
+
+  - If low-latency mode is enabled (default configuration), BFQ
+    executes some special heuristics to detect interactive and soft
+    real-time applications (e.g., video or audio players/streamers),
+    and to reduce their latency. The most important action taken to
+    achieve this goal is to give to the queues associated with these
+    applications more than their fair share of the device
+    throughput. For brevity, we call just "weight-raising" the whole
+    sets of actions taken by BFQ to privilege these queues. In
+    particular, BFQ provides a milder form of weight-raising for
+    interactive applications, and a stronger form for soft real-time
+    applications.
+
+  - BFQ automatically deactivates idling for queues born in a burst of
+    queue creations. In fact, these queues are usually associated with
+    the processes of applications and services that benefit mostly
+    from a high throughput. Examples are systemd during boot, or git
+    grep.
+
+  - As CFQ, BFQ merges queues performing interleaved I/O, i.e.,
+    performing random I/O that becomes mostly sequential if
+    merged. Differently from CFQ, BFQ achieves this goal with a more
+    reactive mechanism, called Early Queue Merge (EQM). EQM is so
+    responsive in detecting interleaved I/O (cooperating processes),
+    that it enables BFQ to achieve a high throughput, by queue
+    merging, even for queues for which CFQ needs a different
+    mechanism, preemption, to get a high throughput. As such EQM is a
+    unified mechanism to achieve a high throughput with interleaved
+    I/O.
+
+  - Queues are scheduled according to a variant of WF2Q+, named
+    B-WF2Q+, and implemented using an augmented rb-tree to preserve an
+    O(log N) overall complexity.  See [2] for more details. B-WF2Q+ is
+    also ready for hierarchical scheduling. However, for a cleaner
+    logical breakdown, the code that enables and completes
+    hierarchical support is provided in the next commit, which focuses
+    exactly on this feature.
+
+  - B-WF2Q+ guarantees a tight deviation with respect to an ideal,
+    perfectly fair, and smooth service. In particular, B-WF2Q+
+    guarantees that each queue receives a fraction of the device
+    throughput proportional to its weight, even if the throughput
+    fluctuates, and regardless of: the device parameters, the current
+    workload and the budgets assigned to the queue.
+
+  - The last, budget-independence, property (although probably
+    counterintuitive in the first place) is definitely beneficial, for
+    the following reasons:
+
+    - First, with any proportional-share scheduler, the maximum
+      deviation with respect to an ideal service is proportional to
+      the maximum budget (slice) assigned to queues. As a consequence,
+      BFQ can keep this deviation tight not only because of the
+      accurate service of B-WF2Q+, but also because BFQ *does not*
+      need to assign a larger budget to a queue to let the queue
+      receive a higher fraction of the device throughput.
+
+    - Second, BFQ is free to choose, for every process (queue), the
+      budget that best fits the needs of the process, or best
+      leverages the I/O pattern of the process. In particular, BFQ
+      updates queue budgets with a simple feedback-loop algorithm that
+      allows a high throughput to be achieved, while still providing
+      tight latency guarantees to time-sensitive applications. When
+      the in-service queue expires, this algorithm computes the next
+      budget of the queue so as to:
+
+      - Let large budgets be eventually assigned to the queues
+       associated with I/O-bound applications performing sequential
+       I/O: in fact, the longer these applications are served once
+       got access to the device, the higher the throughput is.
+
+      - Let small budgets be eventually assigned to the queues
+       associated with time-sensitive applications (which typically
+       perform sporadic and short I/O), because, the smaller the
+       budget assigned to a queue waiting for service is, the sooner
+       B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
+
+- If several processes are competing for the device at the same time,
+  but all processes and groups have the same weight, then BFQ
+  guarantees the expected throughput distribution without ever idling
+  the device. It uses preemption instead. Throughput is then much
+  higher in this common scenario.
+
+- ioprio classes are served in strict priority order, i.e.,
+  lower-priority queues are not served as long as there are
+  higher-priority queues.  Among queues in the same class, the
+  bandwidth is distributed in proportion to the weight of each
+  queue. A very thin extra bandwidth is however guaranteed to
+  the Idle class, to prevent it from starving.
+
+
+3. What are BFQ's tunable?
+==========================
+
+The tunables back_seek-max, back_seek_penalty, fifo_expire_async and
+fifo_expire_sync below are the same as in CFQ. Their description is
+just copied from that for CFQ. Some considerations in the description
+of slice_idle are copied from CFQ too.
+
+per-process ioprio and weight
+-----------------------------
+
+Unless the cgroups interface is used (see "4. BFQ group scheduling"),
+weights can be assigned to processes only indirectly, through I/O
+priorities, and according to the relation:
+weight = (IOPRIO_BE_NR - ioprio) * 10.
+
+Beware that, if low-latency is set, then BFQ automatically raises the
+weight of the queues associated with interactive and soft real-time
+applications. Unset this tunable if you need/want to control weights.
+
+slice_idle
+----------
+
+This parameter specifies how long BFQ should idle for next I/O
+request, when certain sync BFQ queues become empty. By default
+slice_idle is a non-zero value. Idling has a double purpose: boosting
+throughput and making sure that the desired throughput distribution is
+respected (see the description of how BFQ works, and, if needed, the
+papers referred there).
+
+As for throughput, idling can be very helpful on highly seeky media
+like single spindle SATA/SAS disks where we can cut down on overall
+number of seeks and see improved throughput.
+
+Setting slice_idle to 0 will remove all the idling on queues and one
+should see an overall improved throughput on faster storage devices
+like multiple SATA/SAS disks in hardware RAID configuration.
+
+So depending on storage and workload, it might be useful to set
+slice_idle=0.  In general for SATA/SAS disks and software RAID of
+SATA/SAS disks keeping slice_idle enabled should be useful. For any
+configurations where there are multiple spindles behind single LUN
+(Host based hardware RAID controller or for storage arrays), setting
+slice_idle=0 might end up in better throughput and acceptable
+latencies.
+
+Idling is however necessary to have service guarantees enforced in
+case of differentiated weights or differentiated I/O-request lengths.
+To see why, suppose that a given BFQ queue A must get several I/O
+requests served for each request served for another queue B. Idling
+ensures that, if A makes a new I/O request slightly after becoming
+empty, then no request of B is dispatched in the middle, and thus A
+does not lose the possibility to get more than one request dispatched
+before the next request of B is dispatched. Note that idling
+guarantees the desired differentiated treatment of queues only in
+terms of I/O-request dispatches. To guarantee that the actual service
+order then corresponds to the dispatch order, the strict_guarantees
+tunable must be set too.
+
+There is an important flipside for idling: apart from the above cases
+where it is beneficial also for throughput, idling can severely impact
+throughput. One important case is random workload. Because of this
+issue, BFQ tends to avoid idling as much as possible, when it is not
+beneficial also for throughput. As a consequence of this behavior, and
+of further issues described for the strict_guarantees tunable,
+short-term service guarantees may be occasionally violated. And, in
+some cases, these guarantees may be more important than guaranteeing
+maximum throughput. For example, in video playing/streaming, a very
+low drop rate may be more important than maximum throughput. In these
+cases, consider setting the strict_guarantees parameter.
+
+strict_guarantees
+-----------------
+
+If this parameter is set (default: unset), then BFQ
+
+- always performs idling when the in-service queue becomes empty;
+
+- forces the device to serve one I/O request at a time, by dispatching a
+  new request only if there is no outstanding request.
+
+In the presence of differentiated weights or I/O-request sizes, both
+the above conditions are needed to guarantee that every BFQ queue
+receives its allotted share of the bandwidth. The first condition is
+needed for the reasons explained in the description of the slice_idle
+tunable.  The second condition is needed because all modern storage
+devices reorder internally-queued requests, which may trivially break
+the service guarantees enforced by the I/O scheduler.
+
+Setting strict_guarantees may evidently affect throughput.
+
+back_seek_max
+-------------
+
+This specifies, given in Kbytes, the maximum "distance" for backward seeking.
+The distance is the amount of space from the current head location to the
+sectors that are backward in terms of distance.
+
+This parameter allows the scheduler to anticipate requests in the "backward"
+direction and consider them as being the "next" if they are within this
+distance from the current head location.
+
+back_seek_penalty
+-----------------
+
+This parameter is used to compute the cost of backward seeking. If the
+backward distance of request is just 1/back_seek_penalty from a "front"
+request, then the seeking cost of two requests is considered equivalent.
+
+So scheduler will not bias toward one or the other request (otherwise scheduler
+will bias toward front request). Default value of back_seek_penalty is 2.
+
+fifo_expire_async
+-----------------
+
+This parameter is used to set the timeout of asynchronous requests. Default
+value of this is 248ms.
+
+fifo_expire_sync
+----------------
+
+This parameter is used to set the timeout of synchronous requests. Default
+value of this is 124ms. In case to favor synchronous requests over asynchronous
+one, this value should be decreased relative to fifo_expire_async.
+
+low_latency
+-----------
+
+This parameter is used to enable/disable BFQ's low latency mode. By
+default, low latency mode is enabled. If enabled, interactive and soft
+real-time applications are privileged and experience a lower latency,
+as explained in more detail in the description of how BFQ works.
+
+DO NOT enable this mode if you need full control on bandwidth
+distribution. In fact, if it is enabled, then BFQ automatically
+increases the bandwidth share of privileged applications, as the main
+means to guarantee a lower latency to them.
+
+timeout_sync
+------------
+
+Maximum amount of device time that can be given to a task (queue) once
+it has been selected for service. On devices with costly seeks,
+increasing this time usually increases maximum throughput. On the
+opposite end, increasing this time coarsens the granularity of the
+short-term bandwidth and latency guarantees, especially if the
+following parameter is set to zero.
+
+max_budget
+----------
+
+Maximum amount of service, measured in sectors, that can be provided
+to a BFQ queue once it is set in service (of course within the limits
+of the above timeout). According to what said in the description of
+the algorithm, larger values increase the throughput in proportion to
+the percentage of sequential I/O requests issued. The price of larger
+values is that they coarsen the granularity of short-term bandwidth
+and latency guarantees.
+
+The default value is 0, which enables auto-tuning: BFQ sets max_budget
+to the maximum number of sectors that can be served during
+timeout_sync, according to the estimated peak rate.
+
+weights
+-------
+
+Read-only parameter, used to show the weights of the currently active
+BFQ queues.
+
+
+wr_ tunables
+------------
+
+BFQ exports a few parameters to control/tune the behavior of
+low-latency heuristics.
+
+wr_coeff
+
+Factor by which the weight of a weight-raised queue is multiplied. If
+the queue is deemed soft real-time, then the weight is further
+multiplied by an additional, constant factor.
+
+wr_max_time
+
+Maximum duration of a weight-raising period for an interactive task
+(ms). If set to zero (default value), then this value is computed
+automatically, as a function of the peak rate of the device. In any
+case, when the value of this parameter is read, it always reports the
+current duration, regardless of whether it has been set manually or
+computed automatically.
+
+wr_max_softrt_rate
+
+Maximum service rate below which a queue is deemed to be associated
+with a soft real-time application, and is then weight-raised
+accordingly (sectors/sec).
+
+wr_min_idle_time
+
+Minimum idle period after which interactive weight-raising may be
+reactivated for a queue (in ms).
+
+wr_rt_max_time
+
+Maximum weight-raising duration for soft real-time queues (in ms). The
+start time from which this duration is considered is automatically
+moved forward if the queue is detected to be still soft real-time
+before the current soft real-time weight-raising period finishes.
+
+wr_min_inter_arr_async
+
+Minimum period between I/O request arrivals after which weight-raising
+may be reactivated for an already busy async queue (in ms).
+
+
+4. Group scheduling with BFQ
+============================
+
+BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely
+blkio and io. In particular, BFQ supports weight-based proportional
+share. To activate cgroups support, set BFQ_GROUP_IOSCHED.
+
+4-1 Service guarantees provided
+-------------------------------
+
+With BFQ, proportional share means true proportional share of the
+device bandwidth, according to group weights. For example, a group
+with weight 200 gets twice the bandwidth, and not just twice the time,
+of a group with weight 100.
+
+BFQ supports hierarchies (group trees) of any depth. Bandwidth is
+distributed among groups and processes in the expected way: for each
+group, the children of the group share the whole bandwidth of the
+group in proportion to their weights. In particular, this implies
+that, for each leaf group, every process of the group receives the
+same share of the whole group bandwidth, unless the ioprio of the
+process is modified.
+
+The resource-sharing guarantee for a group may partially or totally
+switch from bandwidth to time, if providing bandwidth guarantees to
+the group lowers the throughput too much. This switch occurs on a
+per-process basis: if a process of a leaf group causes throughput loss
+if served in such a way to receive its share of the bandwidth, then
+BFQ switches back to just time-based proportional share for that
+process.
+
+4-2 Interface
+-------------
+
+To get proportional sharing of bandwidth with BFQ for a given device,
+BFQ must of course be the active scheduler for that device.
+
+Within each group directory, the names of the files associated with
+BFQ-specific cgroup parameters and stats begin with the "bfq."
+prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for
+BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group
+parameter to set the weight of a group with BFQ is blkio.bfq.weight
+or io.bfq.weight.
+
+Parameters to set
+-----------------
+
+For each group, there is only the following parameter to set.
+
+weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
+group inside its parent. Available values: 1..10000 (default 100). The
+linear mapping between ioprio and weights, described at the beginning
+of the tunable section, is still valid, but all weights higher than
+IOPRIO_BE_NR*10 are mapped to ioprio 0.
+
+Recall that, if low-latency is set, then BFQ automatically raises the
+weight of the queues associated with interactive and soft real-time
+applications. Unset this tunable if you need/want to control weights.
+
+
+[1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
+    Scheduler", Proceedings of the First Workshop on Mobile System
+    Technologies (MST-2015), May 2015.
+    http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
+
+[2] P. Valente and M. Andreolini, "Improving Application
+    Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of
+    the 5th Annual International Systems and Storage Conference
+    (SYSTOR '12), June 2012.
+    Slightly extended version:
+    http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-
+                                                       results.pdf
diff --git a/Documentation/block/kyber-iosched.txt b/Documentation/block/kyber-iosched.txt

new file mode 100644 (file)

index 0000000..e94feac
--- /dev/null
+++ b/Documentation/block/kyber-iosched.txt
@@ -0,0 +1,14 @@
+Kyber I/O scheduler tunables
+===========================
+
+The only two tunables for the Kyber scheduler are the target latencies for
+reads and synchronous writes. Kyber will throttle requests in order to meet
+these target latencies.
+
+read_lat_nsec
+-------------
+Target latency for reads (in nanoseconds).
+
+write_lat_nsec
+--------------
+Target latency for synchronous writes (in nanoseconds).
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt

index c0a3bb5a6e4eb291d077f10633001c439563ccc2..2c1e67058fd3bdf02336a71dc34ec885b9b84c2d 100644 (file)
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -43,11 +43,6 @@ large discards are issued, setting this value lower will make Linux issue
  smaller discards and potentially help reduce latencies induced by large
  discard operations.
  
-discard_zeroes_data (RO)
-------------------------
-When read, this file will show if the discarded block are zeroed by the
-device or not. If its value is '1' the blocks are zeroed otherwise not.
-
  hw_sector_size (RO)
  -------------------
  This is the hardware sector size of the device, in bytes.
@@ -192,5 +187,11 @@ scaling back writes. Writing a value of '0' to this file disables the
  feature. Writing a value of '-1' to this file resets the value to the
  default setting.
  
+throttle_sample_time (RW)
+-------------------------
+This is the time window that blk-throttle samples data, in millisecond.
+blk-throttle makes decision based on the samplings. Lower time means cgroups
+have more smooth throughput, but higher CPU overhead. This exists only when
+CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
  
  Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/Documentation/blockdev/mflash.txt b/Documentation/blockdev/mflash.txt

deleted file mode 100644 (file)

index f7e0505..0000000
--- a/Documentation/blockdev/mflash.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-This document describes m[g]flash support in linux.
-
-Contents
-  1. Overview
-  2. Reserved area configuration
-  3. Example of mflash platform driver registration
-
-1. Overview
-
-Mflash and gflash are embedded flash drive. The only difference is mflash is
-MCP(Multi Chip Package) device. These two device operate exactly same way.
-So the rest mflash repersents mflash and gflash altogether.
-
-Internally, mflash has nand flash and other hardware logics and supports
-2 different operation (ATA, IO) modes. ATA mode doesn't need any new
-driver and currently works well under standard IDE subsystem. Actually it's
-one chip SSD. IO mode is ATA-like custom mode for the host that doesn't have
-IDE interface.
-
-Following are brief descriptions about IO mode.
-A. IO mode based on ATA protocol and uses some custom command. (read confirm,
-write confirm)
-B. IO mode uses SRAM bus interface.
-C. IO mode supports 4kB boot area, so host can boot from mflash.
-
-2. Reserved area configuration
-If host boot from mflash, usually needs raw area for boot loader image. All of
-the mflash's block device operation will be taken this value as start offset.
-Note that boot loader's size of reserved area and kernel configuration value
-must be same.
-
-3. Example of mflash platform driver registration
-Working mflash is very straight forward. Adding platform device stuff to board
-configuration file is all. Here is some pseudo example.
-
-static struct mg_drv_data mflash_drv_data = {
-       /* If you want to polling driver set to 1 */
-       .use_polling = 0,
-       /* device attribution */
-       .dev_attr = MG_BOOT_DEV
-};
-
-static struct resource mg_mflash_rsc[] = {
-       /* Base address of mflash */
-       [0] = {
-               .start = 0x08000000,
-               .end = 0x08000000 + SZ_64K - 1,
-               .flags = IORESOURCE_MEM
-       },
-       /* mflash interrupt pin */
-       [1] = {
-               .start = IRQ_GPIO(84),
-               .end = IRQ_GPIO(84),
-               .flags = IORESOURCE_IRQ
-       },
-       /* mflash reset pin */
-       [2] = {
-               .start = 43,
-               .end = 43,
-               .name = MG_RST_PIN,
-               .flags = IORESOURCE_IO
-       },
-       /* mflash reset-out pin
-        * If you use mflash as storage device (i.e. other than MG_BOOT_DEV),
-        * should assign this */
-       [3] = {
-               .start = 51,
-               .end = 51,
-               .name = MG_RSTOUT_PIN,
-               .flags = IORESOURCE_IO
-       }
-};
-
-static struct platform_device mflash_dev = {
-       .name = MG_DEV_NAME,
-       .id = -1,
-       .dev = {
-               .platform_data = &mflash_drv_data,
-       },
-       .num_resources = ARRAY_SIZE(mg_mflash_rsc),
-       .resource = mg_mflash_rsc
-};
-
-platform_device_register(&mflash_dev);
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt

index 3b8449f8ac7e80a0ebeaf6dfe8c64b15503f3954..49d7c997fa1ee7f759b5ba319bb57be464f0bd47 100644 (file)
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -1142,16 +1142,17 @@ used by the kernel.
  
    pids.max
  
- A read-write single value file which exists on non-root cgroups.  The
- default is "max".
+       A read-write single value file which exists on non-root
+       cgroups.  The default is "max".
  
- Hard limit of number of processes.
+       Hard limit of number of processes.
  
    pids.current
  
- A read-only single value file which exists on all cgroups.
+       A read-only single value file which exists on all cgroups.
  
- The number of processes currently in the cgroup and its descendants.
+       The number of processes currently in the cgroup and its
+       descendants.
  
  Organisational operations are not blocked by cgroup policies, so it is
  possible to have pids.current > pids.max.  This can be done by either
diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst

index 2c41b713841fd497a95b57054ed9d6998625b71e..44886c91e112d4d21a41e0c4d1a96f37a584aa68 100644 (file)
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -10,7 +10,7 @@ Note that kcov does not aim to collect as much coverage as possible. It aims
  to collect more or less stable coverage that is function of syscall inputs.
  To achieve this goal it does not collect coverage in soft/hard interrupts
  and instrumentation of some inherently non-deterministic parts of kernel is
-disbled (e.g. scheduler, locking).
+disabled (e.g. scheduler, locking).
  
  Usage
  -----
diff --git a/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt b/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt

index 30c546900b6021d24d64e1033714d362f3152a0e..07dbb358182ccd255baf2ed1220d5df9044f9dd7 100644 (file)
--- a/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt
+++ b/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt
@@ -45,7 +45,7 @@ The following clocks are available:
     - 1 15      SATA
     - 1 16      SATA USB
     - 1 17      Main
-   - 1 18      SD/MMC
+   - 1 18      SD/MMC/GOP
     - 1 21      Slow IO (SPI, NOR, BootROM, I2C, UART)
     - 1 22      USB3H0
     - 1 23      USB3H1
@@ -65,7 +65,7 @@ Required properties:
         "cpm-audio", "cpm-communit", "cpm-nand", "cpm-ppv2", "cpm-sdio",
         "cpm-mg-domain", "cpm-mg-core", "cpm-xor1", "cpm-xor0", "cpm-gop-dp", "none",
         "cpm-pcie_x10", "cpm-pcie_x11", "cpm-pcie_x4", "cpm-pcie-xor", "cpm-sata",
-       "cpm-sata-usb", "cpm-main", "cpm-sd-mmc", "none", "none", "cpm-slow-io",
+       "cpm-sata-usb", "cpm-main", "cpm-sd-mmc-gop", "none", "none", "cpm-slow-io",
         "cpm-usb3h0", "cpm-usb3h1", "cpm-usb3dev", "cpm-eip150", "cpm-eip197";
  
  Example:
@@ -78,6 +78,6 @@ Example:
                 gate-clock-output-names = "cpm-audio", "cpm-communit", "cpm-nand", "cpm-ppv2", "cpm-sdio",
                         "cpm-mg-domain", "cpm-mg-core", "cpm-xor1", "cpm-xor0", "cpm-gop-dp", "none",
                         "cpm-pcie_x10", "cpm-pcie_x11", "cpm-pcie_x4", "cpm-pcie-xor", "cpm-sata",
-                       "cpm-sata-usb", "cpm-main", "cpm-sd-mmc", "none", "none", "cpm-slow-io",
+                       "cpm-sata-usb", "cpm-main", "cpm-sd-mmc-gop", "none", "none", "cpm-slow-io",
                         "cpm-usb3h0", "cpm-usb3h1", "cpm-usb3dev", "cpm-eip150", "cpm-eip197";
         };
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt b/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt

index a78265993665a65bae524bb19606a7c16f248770..ca5204b3bc218bfe8e04a4fd5ca70482b1ab8c6b 100644 (file)
--- a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
+++ b/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
@@ -4,7 +4,6 @@ Required properties:
    - compatible: value should be one of the following
                 "samsung,exynos3250-mipi-dsi" /* for Exynos3250/3472 SoCs */
                 "samsung,exynos4210-mipi-dsi" /* for Exynos4 SoCs */
-               "samsung,exynos4415-mipi-dsi" /* for Exynos4415 SoC */
                 "samsung,exynos5410-mipi-dsi" /* for Exynos5410/5420/5440 SoCs */
                 "samsung,exynos5422-mipi-dsi" /* for Exynos5422/5800 SoCs */
                 "samsung,exynos5433-mipi-dsi" /* for Exynos5433 SoCs */
diff --git a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt b/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt

index 18645e0228b054e1ec0a77658c78d30f3c7eed8e..5837402c3adeae526a4e6efa914fd43c64c10443 100644 (file)
--- a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
+++ b/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
@@ -11,7 +11,6 @@ Required properties:
                 "samsung,s5pv210-fimd"; /* for S5PV210 SoC */
                 "samsung,exynos3250-fimd"; /* for Exynos3250/3472 SoCs */
                 "samsung,exynos4210-fimd"; /* for Exynos4 SoCs */
-               "samsung,exynos4415-fimd"; /* for Exynos4415 SoC */
                 "samsung,exynos5250-fimd"; /* for Exynos5250 SoCs */
                 "samsung,exynos5420-fimd"; /* for Exynos5420/5422/5800 SoCs */
  
diff --git a/Documentation/devicetree/bindings/hwmon/ads7828.txt b/Documentation/devicetree/bindings/hwmon/ads7828.txt

new file mode 100644 (file)

index 0000000..fe0cc4a
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ads7828.txt
@@ -0,0 +1,25 @@
+ads7828 properties
+
+Required properties:
+- compatible: Should be one of
+              ti,ads7828
+              ti,ads7830
+- reg: I2C address
+
+Optional properties:
+
+- ti,differential-input
+  Set to use the device in differential mode.
+- vref-supply
+  The external reference on the device is set to this regulators output. If it
+  does not exists the internal reference will be used and output by the ads78xx
+  on the "external vref" pin.
+
+  Example ADS7828 node:
+
+  ads7828: ads@48 {
+          comatible = "ti,ads7828";
+          reg = <0x48>;
+          vref-supply = <&vref>;
+          ti,differential-input;
+  };
diff --git a/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt b/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt

new file mode 100644 (file)

index 0000000..cf44605
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt
@@ -0,0 +1,68 @@
+ASPEED AST2400/AST2500 PWM and Fan Tacho controller device driver
+
+The ASPEED PWM controller can support upto 8 PWM outputs. The ASPEED Fan Tacho
+controller can support upto 16 Fan tachometer inputs.
+
+There can be upto 8 fans supported. Each fan can have one PWM output and
+one/two Fan tach inputs.
+
+Required properties for pwm-tacho node:
+- #address-cells : should be 1.
+
+- #size-cells : should be 1.
+
+- reg : address and length of the register set for the device.
+
+- pinctrl-names : a pinctrl state named "default" must be defined.
+
+- pinctrl-0 : phandle referencing pin configuration of the PWM ports.
+
+- compatible : should be "aspeed,ast2400-pwm-tacho" for AST2400 and
+              "aspeed,ast2500-pwm-tacho" for AST2500.
+
+- clocks : a fixed clock providing input clock frequency(PWM
+          and Fan Tach clock)
+
+fan subnode format:
+===================
+Under fan subnode there can upto 8 child nodes, with each child node
+representing a fan. If there are 8 fans each fan can have one PWM port and
+one/two Fan tach inputs.
+
+Required properties for each child node:
+- reg : should specify PWM source port.
+       integer value in the range 0 to 7 with 0 indicating PWM port A and
+       7 indicating PWM port H.
+
+- aspeed,fan-tach-ch : should specify the Fan tach input channel.
+                integer value in the range 0 through 15, with 0 indicating
+               Fan tach channel 0 and 15 indicating Fan tach channel 15.
+               Atleast one Fan tach input channel is required.
+
+Examples:
+
+pwm_tacho_fixed_clk: fixedclk {
+       compatible = "fixed-clock";
+       #clock-cells = <0>;
+       clock-frequency = <24000000>;
+};
+
+pwm_tacho: pwmtachocontroller@1e786000 {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       reg = <0x1E786000 0x1000>;
+       compatible = "aspeed,ast2500-pwm-tacho";
+       clocks = <&pwm_tacho_fixed_clk>;
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_pwm0_default &pinctrl_pwm1_default>;
+
+       fan@0 {
+               reg = <0x00>;
+               aspeed,fan-tach-ch = /bits/ 8 <0x00>;
+       };
+
+       fan@1 {
+               reg = <0x01>;
+               aspeed,fan-tach-ch = /bits/ 8 <0x01 0x02>;
+       };
+};
diff --git a/Documentation/devicetree/bindings/hwmon/lm87.txt b/Documentation/devicetree/bindings/hwmon/lm87.txt

new file mode 100644 (file)

index 0000000..e1b7990
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/lm87.txt
@@ -0,0 +1,30 @@
+*LM87 hwmon sensor.
+
+Required properties:
+- compatible: Should be
+       "ti,lm87"
+
+- reg: I2C address
+
+optional properties:
+- has-temp3: This configures pins 18 and 19 to be used as a second
+             remote temperature sensing channel. By default the pins
+             are configured as voltage input pins in0 and in5.
+
+- has-in6: When set, pin 5 is configured to be used as voltage input
+           in6. Otherwise the pin is set as FAN1 input.
+
+- has-in7: When set, pin 6 is configured to be used as voltage input
+           in7. Otherwise the pin is set as FAN2 input.
+
+- vcc-supply: a Phandle for the regulator supplying power, can be
+              cofigured to measure 5.0V power supply. Default is 3.3V.
+
+Example:
+
+lm87@2e {
+       compatible = "ti,lm87";
+       reg = <0x2e>;
+       has-temp3;
+       vcc-supply = <&reg_5v0>;
+};
diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt

index 6f28969af9dcd03048ea75f82858664570531489..028268fd99eeba926424ae90ac73eab6b818f4be 100644 (file)
--- a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt
+++ b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt
@@ -6,7 +6,9 @@ perform in-band IPMI communication with their host.
  
  Required properties:
  
-- compatible : should be "aspeed,ast2400-ibt-bmc"
+- compatible : should be one of
+       "aspeed,ast2400-ibt-bmc"
+       "aspeed,ast2500-ibt-bmc"
  - reg: physical address and size of the registers
  
  Optional properties:
diff --git a/Documentation/devicetree/bindings/mailbox/brcm,iproc-flexrm-mbox.txt b/Documentation/devicetree/bindings/mailbox/brcm,iproc-flexrm-mbox.txt

new file mode 100644 (file)

index 0000000..752ae6b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/brcm,iproc-flexrm-mbox.txt
@@ -0,0 +1,59 @@
+Broadcom FlexRM Ring Manager
+============================
+The Broadcom FlexRM ring manager provides a set of rings which can be
+used to submit work to offload engines. An SoC may have multiple FlexRM
+hardware blocks. There is one device tree entry per FlexRM block. The
+FlexRM driver will create a mailbox-controller instance for given FlexRM
+hardware block where each mailbox channel is a separate FlexRM ring.
+
+Required properties:
+--------------------
+- compatible:  Should be "brcm,iproc-flexrm-mbox"
+- reg:         Specifies base physical address and size of the FlexRM
+               ring registers
+- msi-parent:  Phandles (and potential Device IDs) to MSI controllers
+               The FlexRM engine will send MSIs (instead of wired
+               interrupts) to CPU. There is one MSI for each FlexRM ring.
+               Refer devicetree/bindings/interrupt-controller/msi.txt
+- #mbox-cells: Specifies the number of cells needed to encode a mailbox
+               channel. This should be 3.
+
+               The 1st cell is the mailbox channel number.
+
+               The 2nd cell contains MSI completion threshold. This is the
+               number of completion messages for which FlexRM will inject
+               one MSI interrupt to CPU.
+
+               The 3nd cell contains MSI timer value representing time for
+               which FlexRM will wait to accumulate N completion messages
+               where N is the value specified by 2nd cell above. If FlexRM
+               does not get required number of completion messages in time
+               specified by this cell then it will inject one MSI interrupt
+               to CPU provided atleast one completion message is available.
+
+Optional properties:
+--------------------
+- dma-coherent:        Present if DMA operations made by the FlexRM engine (such
+               as DMA descriptor access, access to buffers pointed by DMA
+               descriptors and read/write pointer updates to DDR) are
+               cache coherent with the CPU.
+
+Example:
+--------
+crypto_mbox: mbox@67000000 {
+       compatible = "brcm,iproc-flexrm-mbox";
+       reg = <0x67000000 0x200000>;
+       msi-parent = <&gic_its 0x7f00>;
+       #mbox-cells = <3>;
+};
+
+crypto@672c0000 {
+       compatible = "brcm,spu2-v2-crypto";
+       reg = <0x672c0000 0x1000>;
+       mboxes = <&crypto_mbox 0 0x1 0xffff>,
+                <&crypto_mbox 1 0x1 0xffff>,
+                <&crypto_mbox 16 0x1 0xffff>,
+                <&crypto_mbox 17 0x1 0xffff>,
+                <&crypto_mbox 30 0x1 0xffff>,
+                <&crypto_mbox 31 0x1 0xffff>;
+};
diff --git a/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt b/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt

index 411ccf421584514b5d2128373115cd9443b6d3e8..0f3ee81d92c297022adcdbb1f9689f4b924ea143 100644 (file)
--- a/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt
+++ b/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt
@@ -1,9 +1,11 @@
  The PDC driver manages data transfer to and from various offload engines
  on some Broadcom SoCs. An SoC may have multiple PDC hardware blocks. There is
-one device tree entry per block.
+one device tree entry per block.  On some chips, the PDC functionality is
+handled by the FA2 (Northstar Plus).
  
  Required properties:
-- compatible : Should be "brcm,iproc-pdc-mbox".
+- compatible : Should be "brcm,iproc-pdc-mbox" or "brcm,iproc-fa2-mbox" for
+  FA2/Northstar Plus.
  - reg: Should contain PDC registers location and length.
  - interrupts: Should contain the IRQ line for the PDC.
  - #mbox-cells: 1
diff --git a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt

index ea9c1c9607f61239d2d0211d0cf5a607da406fb0..520d61dad6dd7ff4f65e5e8cddbf33a63b81b009 100644 (file)
--- a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
@@ -13,7 +13,7 @@ Required Properties:
         - "rockchip,rk2928-dw-mshc": for Rockchip RK2928 and following,
                                                         before RK3288
         - "rockchip,rk3288-dw-mshc": for Rockchip RK3288
-       - "rockchip,rk1108-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK1108
+       - "rockchip,rv1108-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RV1108
         - "rockchip,rk3036-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3036
         - "rockchip,rk3368-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3368
         - "rockchip,rk3399-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3399
diff --git a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt

index b7fa3b97986d5fff391bf4ff1a01bdd25ce096e9..a339dbb154933282ee06aaeec0571e0d95a72b42 100644 (file)
--- a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
@@ -44,13 +44,19 @@ Hip05 Example (note that Hip06 is the same except compatible):
         };
  
  HiSilicon Hip06/Hip07 PCIe host bridge DT (almost-ECAM) description.
+
+Some BIOSes place the host controller in a mode where it is ECAM
+compliant for all devices other than the root complex. In such cases,
+the host controller should be described as below.
+
  The properties and their meanings are identical to those described in
  host-generic-pci.txt except as listed below.
  
  Properties of the host controller node that differ from
  host-generic-pci.txt:
  
-- compatible     : Must be "hisilicon,pcie-almost-ecam"
+- compatible     : Must be "hisilicon,hip06-pcie-ecam", or
+                  "hisilicon,hip07-pcie-ecam"
  
  - reg            : Two entries: First the ECAM configuration space for any
                    other bus underneath the root bus. Second, the base
@@ -59,7 +65,7 @@ host-generic-pci.txt:
  
  Example:
         pcie0: pcie@a0090000 {
-               compatible = "hisilicon,pcie-almost-ecam";
+               compatible = "hisilicon,hip06-pcie-ecam";
                 reg = <0 0xb0000000 0 0x2000000>,  /*  ECAM configuration space */
                       <0 0xa0090000 0 0x10000>; /* host bridge registers */
                 bus-range = <0  31>;
diff --git a/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt b/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt

deleted file mode 100644 (file)

index e68ae5d..0000000
--- a/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Broadcom USB3 phy binding for northstar plus SoC
-The USB3 phy is internal to the SoC and is accessed using mdio interface.
-
-Required mdio bus properties:
-- reg: Should be 0x0 for SoC internal USB3 phy
-- #address-cells: must be 1
-- #size-cells: must be 0
-
-Required USB3 PHY properties:
-- compatible: should be "brcm,nsp-usb3-phy"
-- reg: USB3 Phy address on SoC internal MDIO bus and it should be 0x10.
-- usb3-ctrl-syscon: handler of syscon node defining physical address
-  of usb3 control register.
-- #phy-cells: must be 0
-
-Required usb3 control properties:
-- compatible: should be "brcm,nsp-usb3-ctrl"
-- reg: offset and length of the control registers
-
-Example:
-
-       mdio@0 {
-               reg = <0x0>;
-               #address-cells = <1>;
-               #size-cells = <0>;
-
-               usb3_phy: usb-phy@10 {
-                       compatible = "brcm,nsp-usb3-phy";
-                       reg = <0x10>;
-                       usb3-ctrl-syscon = <&usb3_ctrl>;
-                       #phy-cells = <0>;
-                       status = "disabled";
-               };
-       };
-
-       usb3_ctrl: syscon@104408 {
-               compatible = "brcm,nsp-usb3-ctrl", "syscon";
-               reg = <0x104408 0x3fc>;
-       };
diff --git a/Documentation/devicetree/bindings/power/reset/gemini-poweroff.txt b/Documentation/devicetree/bindings/power/reset/gemini-poweroff.txt

new file mode 100644 (file)

index 0000000..7fec3e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/gemini-poweroff.txt
@@ -0,0 +1,17 @@
+* Device-Tree bindings for Cortina Systems Gemini Poweroff
+
+This is a special IP block in the Cortina Gemini SoC that only
+deals with different ways to power the system down.
+
+Required properties:
+- compatible: should be "cortina,gemini-power-controller"
+- reg: should contain the physical memory base and size
+- interrupts: should contain the power management interrupt
+
+Example:
+
+power-controller@4b000000 {
+       compatible = "cortina,gemini-power-controller";
+       reg = <0x4b000000 0x100>;
+       interrupts = <26 IRQ_TYPE_EDGE_FALLING>;
+};
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt

index 1e2546f8b08a4aab989bbe8c04e76d6fd1df63e4..022ed1f3bc808351752ae1130dd226e7c7e7cb38 100644 (file)
--- a/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt
+++ b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt
@@ -3,13 +3,20 @@ Generic SYSCON mapped register poweroff driver
  This is a generic poweroff driver using syscon to map the poweroff register.
  The poweroff is generally performed with a write to the poweroff register
  defined by the register map pointed by syscon reference plus the offset
-with the mask defined in the poweroff node.
+with the value and mask defined in the poweroff node.
  
  Required properties:
  - compatible: should contain "syscon-poweroff"
  - regmap: this is phandle to the register map node
  - offset: offset in the register map for the poweroff register (in bytes)
-- mask: the poweroff value written to the poweroff register (32 bit access)
+- value: the poweroff value written to the poweroff register (32 bit access)
+
+Optional properties:
+- mask: update only the register bits defined by the mask (32 bit)
+
+Legacy usage:
+If a node doesn't contain a value property but contains a mask property, the
+mask property is used as the value.
  
  Default will be little endian mode, 32 bit access only.
  
diff --git a/Documentation/devicetree/bindings/power/supply/cpcap-charger.txt b/Documentation/devicetree/bindings/power/supply/cpcap-charger.txt

new file mode 100644 (file)

index 0000000..80bd873
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/cpcap-charger.txt
@@ -0,0 +1,37 @@
+Motorola CPCAP PMIC battery charger binding
+
+Required properties:
+- compatible: Shall be "motorola,mapphone-cpcap-charger"
+- interrupts: Interrupt specifier for each name in interrupt-names
+- interrupt-names: Should contain the following entries:
+                  "chrg_det", "rvrs_chrg", "chrg_se1b", "se0conn",
+                  "rvrs_mode", "chrgcurr1", "vbusvld", "battdetb"
+- io-channels: IIO ADC channel specifier for each name in io-channel-names
+- io-channel-names: Should contain the following entries:
+                   "battdetb", "battp", "vbus", "chg_isense", "batti"
+
+Optional properties:
+- mode-gpios: Optionally CPCAP charger can have a companion wireless
+             charge controller that is controlled with two GPIOs
+             that are active low.
+
+Example:
+
+cpcap_charger: charger {
+       compatible = "motorola,mapphone-cpcap-charger";
+       interrupts-extended = <
+               &cpcap 13 0 &cpcap 12 0 &cpcap 29 0 &cpcap 28 0
+               &cpcap 22 0 &cpcap 20 0 &cpcap 19 0 &cpcap 54 0
+       >;
+       interrupt-names =
+               "chrg_det", "rvrs_chrg", "chrg_se1b", "se0conn",
+               "rvrs_mode", "chrgcurr1", "vbusvld", "battdetb";
+       mode-gpios = <&gpio3 29 GPIO_ACTIVE_LOW
+                     &gpio3 23 GPIO_ACTIVE_LOW>;
+       io-channels = <&cpcap_adc 0 &cpcap_adc 1
+                      &cpcap_adc 2 &cpcap_adc 5
+                      &cpcap_adc 6>;
+       io-channel-names = "battdetb", "battp",
+                          "vbus", "chg_isense",
+                          "batti";
+};
diff --git a/Documentation/devicetree/bindings/power/supply/lego_ev3_battery.txt b/Documentation/devicetree/bindings/power/supply/lego_ev3_battery.txt

new file mode 100644 (file)

index 0000000..5485633
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/lego_ev3_battery.txt
@@ -0,0 +1,21 @@
+LEGO MINDSTORMS EV3 Battery
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+LEGO MINDSTORMS EV3 has some built-in capability for monitoring the battery.
+It uses 6 AA batteries or a special Li-ion rechargeable battery pack that is
+detected by a key switch in the battery compartment.
+
+Required properties:
+ - compatible: Must be "lego,ev3-battery"
+ - io-channels: phandles to analog inputs for reading voltage and current
+ - io-channel-names: Must be "voltage", "current"
+ - rechargeable-gpios: phandle to the rechargeable battery indication gpio
+
+Example:
+
+       battery {
+               compatible = "lego,ev3-battery";
+               io-channels = <&adc 4>, <&adc 3>;
+               io-channel-names = "voltage", "current";
+               rechargeable-gpios = <&gpio 136 GPIO_ACTIVE_LOW>;
+       };
diff --git a/Documentation/devicetree/bindings/power/supply/ltc2941.txt b/Documentation/devicetree/bindings/power/supply/ltc2941.txt

index ea42ae12d92451579e0af57ee2bdfb886e60ad0a..a9d7aa60558b739ca312f86826b72cb247b111c0 100644 (file)
--- a/Documentation/devicetree/bindings/power/supply/ltc2941.txt
+++ b/Documentation/devicetree/bindings/power/supply/ltc2941.txt
@@ -6,8 +6,8 @@ temperature monitoring, and uses a slightly different conversion
  formula for the charge counter.
  
  Required properties:
-- compatible: Should contain "ltc2941" or "ltc2943" which also indicates the
-    type of I2C chip attached.
+- compatible: Should contain "lltc,ltc2941" or "lltc,ltc2943" which also
+    indicates the type of I2C chip attached.
  - reg: The 7-bit I2C address.
  - lltc,resistor-sense: The sense resistor value in milli-ohms. Can be a 32-bit
      negative value when the battery has been connected to the wrong end of the
@@ -20,7 +20,7 @@ Required properties:
  Example from the Topic Miami Florida board:
  
         fuelgauge: ltc2943@64 {
-               compatible = "ltc2943";
+               compatible = "lltc,ltc2943";
                 reg = <0x64>;
                 lltc,resistor-sense = <15>;
                 lltc,prescaler-exponent = <5>; /* 2^(2*5) = 1024 */
diff --git a/Documentation/devicetree/bindings/power/supply/max8925_batter.txt b/Documentation/devicetree/bindings/power/supply/max8925_batter.txt

deleted file mode 100644 (file)

index d7e3e0c..0000000
--- a/Documentation/devicetree/bindings/power/supply/max8925_batter.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-max8925-battery bindings
-~~~~~~~~~~~~~~~~
-
-Optional properties :
- - batt-detect: whether support battery detect
- - topoff-threshold: set charging current in topoff mode
- - fast-charge: set charging current in fast mode
- - no-temp-support: whether support temperature protection detect
- - no-insert-detect: whether support insert detect
-
-Example:
-       charger {
-               batt-detect = <0>;
-               topoff-threshold = <1>;
-               fast-charge = <7>;
-               no-temp-support = <0>;
-               no-insert-detect = <0>;
-       };
diff --git a/Documentation/devicetree/bindings/power/supply/max8925_battery.txt b/Documentation/devicetree/bindings/power/supply/max8925_battery.txt

new file mode 100644 (file)

index 0000000..d7e3e0c
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/max8925_battery.txt
@@ -0,0 +1,18 @@
+max8925-battery bindings
+~~~~~~~~~~~~~~~~
+
+Optional properties :
+ - batt-detect: whether support battery detect
+ - topoff-threshold: set charging current in topoff mode
+ - fast-charge: set charging current in fast mode
+ - no-temp-support: whether support temperature protection detect
+ - no-insert-detect: whether support insert detect
+
+Example:
+       charger {
+               batt-detect = <0>;
+               topoff-threshold = <1>;
+               fast-charge = <7>;
+               no-temp-support = <0>;
+               no-insert-detect = <0>;
+       };
diff --git a/Documentation/devicetree/bindings/powerpc/4xx/emac.txt b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt

index 712baf6c3e246fa9ea93c6bab4cb8f294b894d86..44b842b6ca154d9c13c3d0da6a784fe54195fcf9 100644 (file)
--- a/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
+++ b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
@@ -71,6 +71,9 @@
                           For Axon it can be absent, though my current driver
                           doesn't handle phy-address yet so for now, keep
                           0x00ffffff in it.
+    - phy-handle       : Used to describe configurations where a external PHY
+                         is used. Please refer to:
+                         Documentation/devicetree/bindings/net/ethernet.txt
      - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
                           operations (if absent the value is the same as
                           rx-fifo-size).  For Axon, either absent or 2048.
@@ -81,8 +84,22 @@
                           offload, phandle of the TAH device node.
      - tah-channel       : 1 cell, optional. If appropriate, channel used on the
                           TAH engine.
+    - fixed-link       : Fixed-link subnode describing a link to a non-MDIO
+                         managed entity. See
+                         Documentation/devicetree/bindings/net/fixed-link.txt
+                         for details.
+    - mdio subnode     : When the EMAC has a phy connected to its local
+                         mdio, which us supported by the kernel's network
+                         PHY library in drivers/net/phy, there must be device
+                         tree subnode with the following required properties:
+                               - #address-cells: Must be <1>.
+                               - #size-cells: Must be <0>.
  
-    Example:
+                         For PHY definitions: Please refer to
+                         Documentation/devicetree/bindings/net/phy.txt and
+                         Documentation/devicetree/bindings/net/ethernet.txt
+
+    Examples:
  
         EMAC0: ethernet@40000800 {
                 device_type = "network";
@@ -104,6 +121,48 @@
                 zmii-channel = <0>;
         };
  
+       EMAC1: ethernet@ef600c00 {
+               device_type = "network";
+               compatible = "ibm,emac-apm821xx", "ibm,emac4sync";
+               interrupt-parent = <&EMAC1>;
+               interrupts = <0 1>;
+               #interrupt-cells = <1>;
+               #address-cells = <0>;
+               #size-cells = <0>;
+               interrupt-map = <0 &UIC2 0x10 IRQ_TYPE_LEVEL_HIGH /* Status */
+                                1 &UIC2 0x14 IRQ_TYPE_LEVEL_HIGH /* Wake */>;
+               reg = <0xef600c00 0x000000c4>;
+               local-mac-address = [000000000000]; /* Filled in by U-Boot */
+               mal-device = <&MAL0>;
+               mal-tx-channel = <0>;
+               mal-rx-channel = <0>;
+               cell-index = <0>;
+               max-frame-size = <9000>;
+               rx-fifo-size = <16384>;
+               tx-fifo-size = <2048>;
+               fifo-entry-size = <10>;
+               phy-mode = "rgmii";
+               phy-handle = <&phy0>;
+               phy-map = <0x00000000>;
+               rgmii-device = <&RGMII0>;
+               rgmii-channel = <0>;
+               tah-device = <&TAH0>;
+               tah-channel = <0>;
+               has-inverted-stacr-oc;
+               has-new-stacr-staopc;
+
+               mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       phy0: ethernet-phy@0 {
+                               compatible = "ethernet-phy-ieee802.3-c22";
+                               reg = <0>;
+                       };
+               };
+       };
+
+
        ii) McMAL node
  
      Required properties:
@@ -145,4 +204,3 @@
      - revision           : as provided by the RGMII new version register if
                            available.
                            For Axon: 0x0000012a
-
diff --git a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt

index c3f6546ebac777421b467b0008f7f78f06e8e5c4..6a23ad9ac53a4cabc85a6bc592a873f38c7c144b 100644 (file)
--- a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
@@ -45,7 +45,7 @@ Required Properties:
  Optional Properties:
  - reg-names: In addition to the required properties, the following are optional
    - "efuse-address"    - Contains efuse base address used to pick up ABB info.
-  - "ldo-address"      - Contains address of ABB LDO overide register address.
+  - "ldo-address"      - Contains address of ABB LDO override register.
         "efuse-address" is required for this.
  - ti,ldovbb-vset-mask  - Required if ldo-address is set, mask for LDO override
         register to provide override vset value.
diff --git a/Documentation/devicetree/bindings/rng/omap_rng.txt b/Documentation/devicetree/bindings/rng/omap_rng.txt

index 471477299ece16c931322e96fbb3c1535b1e1a43..9cf7876ab43444f604a501075ccbf6088e50f880 100644 (file)
--- a/Documentation/devicetree/bindings/rng/omap_rng.txt
+++ b/Documentation/devicetree/bindings/rng/omap_rng.txt
@@ -12,7 +12,8 @@ Required properties:
  - reg : Offset and length of the register set for the module
  - interrupts : the interrupt number for the RNG module.
                 Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76"
-- clocks: the trng clock source
+- clocks: the trng clock source. Only mandatory for the
+  "inside-secure,safexcel-eip76" compatible.
  
  Example:
  /* AM335x */
diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt

index 0c065f77658f138e8bc535e85c7d2a99f5268b59..3957d4edaa745fc068c4ab8ba796f9c8d8190a1f 100644 (file)
--- a/Documentation/devicetree/bindings/usb/usb251xb.txt
+++ b/Documentation/devicetree/bindings/usb/usb251xb.txt
@@ -7,18 +7,18 @@ Required properties :
   - compatible : Should be "microchip,usb251xb" or one of the specific types:
         "microchip,usb2512b", "microchip,usb2512bi", "microchip,usb2513b",
         "microchip,usb2513bi", "microchip,usb2514b", "microchip,usb2514bi"
- - hub-reset-gpios : Should specify the gpio for hub reset
+ - reset-gpios : Should specify the gpio for hub reset
+ - reg : I2C address on the selected bus (default is <0x2C>)
  
  Optional properties :
- - reg : I2C address on the selected bus (default is <0x2C>)
   - skip-config : Skip Hub configuration, but only send the USB-Attach command
- - vendor-id : USB Vendor ID of the hub (16 bit, default is 0x0424)
- - product-id : USB Product ID of the hub (16 bit, default depends on type)
- - device-id : USB Device ID of the hub (16 bit, default is 0x0bb3)
- - language-id : USB Language ID (16 bit, default is 0x0000)
- - manufacturer : USB Manufacturer string (max 31 characters long)
- - product : USB Product string (max 31 characters long)
- - serial : USB Serial string (max 31 characters long)
+ - vendor-id : Set USB Vendor ID of the hub (16 bit, default is 0x0424)
+ - product-id : Set USB Product ID of the hub (16 bit, default depends on type)
+ - device-id : Set USB Device ID of the hub (16 bit, default is 0x0bb3)
+ - language-id : Set USB Language ID (16 bit, default is 0x0000)
+ - manufacturer : Set USB Manufacturer string (max 31 characters long)
+ - product : Set USB Product string (max 31 characters long)
+ - serial : Set USB Serial string (max 31 characters long)
   - {bus,self}-powered : selects between self- and bus-powered operation (default
         is self-powered)
   - disable-hi-speed : disable USB Hi-Speed support
@@ -31,8 +31,10 @@ Optional properties :
         (default is individual)
   - dynamic-power-switching : enable auto-switching from self- to bus-powered
         operation if the local power source is removed or unavailable
- - oc-delay-{100us,4ms,8ms,16ms} : set over current timer delay (default is 8ms)
- - compound-device : indicated the hub is part of a compound device
+ - oc-delay-us : Delay time (in microseconds) for filtering the over-current
+       sense inputs. Valid values are 100, 4000, 8000 (default) and 16000. If
+       an invalid value is given, the default is used instead.
+ - compound-device : indicate the hub is part of a compound device
   - port-mapping-mode : enable port mapping mode
   - string-support : enable string descriptor support (required for manufacturer,
         product and serial string configuration)
@@ -40,34 +42,15 @@ Optional properties :
         device connected.
   - sp-disabled-ports : Specifies the ports which will be self-power disabled
   - bp-disabled-ports : Specifies the ports which will be bus-power disabled
- - max-sp-power : Specifies the maximum current the hub consumes from an
-       upstream port when operating as self-powered hub including the power
-       consumption of a permanently attached peripheral if the hub is
-       configured as a compound device. The value is given in mA in a 0 - 500
-       range (default is 2).
- - max-bp-power : Specifies the maximum current the hub consumes from an
-       upstream port when operating as bus-powered hub including the power
-       consumption of a permanently attached peripheral if the hub is
-       configured as a compound device. The value is given in mA in a 0 - 500
-       range (default is 100).
- - max-sp-current : Specifies the maximum current the hub consumes from an
-       upstream port when operating as self-powered hub EXCLUDING the power
-       consumption of a permanently attached peripheral if the hub is
-       configured as a compound device. The value is given in mA in a 0 - 500
-       range (default is 2).
- - max-bp-current : Specifies the maximum current the hub consumes from an
-       upstream port when operating as bus-powered hub EXCLUDING the power
-       consumption of a permanently attached peripheral if the hub is
-       configured as a compound device. The value is given in mA in a 0 - 500
-       range (default is 100).
- - power-on-time : Specifies the time it takes from the time the host initiates
-       the power-on sequence to a port until the port has adequate power. The
-       value is given in ms in a 0 - 510 range (default is 100ms).
+ - power-on-time-ms : Specifies the time it takes from the time the host
+       initiates the power-on sequence to a port until the port has adequate
+       power. The value is given in ms in a 0 - 510 range (default is 100ms).
  
  Examples:
         usb2512b@2c {
                 compatible = "microchip,usb2512b";
-               hub-reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
+               reg = <0x2c>;
+               reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
         };
  
         usb2514b@2c {
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt

index ec0bfb9bbebd42c828a3b4978db070924275f609..830c9987fa02bc5fd639b530d619f5474fbfdb93 100644 (file)
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -265,6 +265,7 @@ sbs Smart Battery System
  schindler      Schindler
  seagate        Seagate Technology PLC
  semtech        Semtech Corporation
+sensirion      Sensirion AG
  sgx    SGX Sensortech
  sharp  Sharp Corporation
  si-en  Si-En Technology Ltd.
diff --git a/Documentation/extcon/intel-int3496.txt b/Documentation/extcon/intel-int3496.txt

index af0b366c25b73332a77e58b9b7247c4c387fc0d6..8155dbc7fad36a253137c230a7f074fecd0a7969 100644 (file)
--- a/Documentation/extcon/intel-int3496.txt
+++ b/Documentation/extcon/intel-int3496.txt
@@ -20,3 +20,8 @@ Index 1: The output gpio for enabling Vbus output from the device to the otg
  Index 2: The output gpio for muxing of the data pins between the USB host and
           the USB peripheral controller, write 1 to mux to the peripheral
           controller
+
+There is a mapping between indices and GPIO connection IDs as follows
+       id      index 0
+       vbus    index 1
+       mux     index 2
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking

index fdcfdd79682a00102b1c18c8b56e19e00a418d26..fe25787ff6d49748c3e1ecc494f17e401a60916a 100644 (file)
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -58,8 +58,7 @@ prototypes:
         int (*permission) (struct inode *, int, unsigned int);
         int (*get_acl)(struct inode *, int);
         int (*setattr) (struct dentry *, struct iattr *);
-       int (*getattr) (const struct path *, struct dentry *, struct kstat *,
-                       u32, unsigned int);
+       int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
         ssize_t (*listxattr) (struct dentry *, char *, size_t);
         int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
         void (*update_time)(struct inode *, struct timespec *, int);
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting

index 95280079c0b3af0c217270e6838b723eb818cbcd..5fb17f49f7a21907a2c446267cc952c749e42043 100644 (file)
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -600,3 +600,9 @@ in your dentry operations instead.
  [recommended]
         ->readlink is optional for symlinks.  Don't set, unless filesystem needs
         to fake something for readlink(2).
+--
+[mandatory]
+       ->getattr() is now passed a struct path rather than a vfsmount and
+       dentry separately, and it now has request_mask and query_flags arguments
+       to specify the fields and sync type requested by statx.  Filesystems not
+       supporting any statx-specific features may ignore the new arguments.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt

index 569211703721fe81e0cbbbd1cb979a10c276cf9e..94dd27ef4a766ca5414146fab1b76ea5aacb67de 100644 (file)
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -382,8 +382,7 @@ struct inode_operations {
         int (*permission) (struct inode *, int);
         int (*get_acl)(struct inode *, int);
         int (*setattr) (struct dentry *, struct iattr *);
-       int (*getattr) (const struct path *, struct dentry *, struct kstat *,
-                       u32, unsigned int);
+       int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
         ssize_t (*listxattr) (struct dentry *, char *, size_t);
         void (*update_time)(struct inode *, struct timespec *, int);
         int (*atomic_open)(struct inode *, struct dentry *, struct file *,
diff --git a/Documentation/gcc-plugins.txt b/Documentation/gcc-plugins.txt

index 891c694644348241925dc3e585d632222d8b0948..433eaefb4aa171ac62f3162866763a0526d7eb83 100644 (file)
--- a/Documentation/gcc-plugins.txt
+++ b/Documentation/gcc-plugins.txt
@@ -18,8 +18,8 @@ because gcc versions 4.5 and 4.6 are compiled by a C compiler,
  gcc-4.7 can be compiled by a C or a C++ compiler,
  and versions 4.8+ can only be compiled by a C++ compiler.
  
-Currently the GCC plugin infrastructure supports only the x86, arm and arm64
-architectures.
+Currently the GCC plugin infrastructure supports only the x86, arm, arm64 and
+powerpc architectures.
  
  This infrastructure was ported from grsecurity [6] and PaX [7].
  
diff --git a/Documentation/hwmon/aspeed-pwm-tacho b/Documentation/hwmon/aspeed-pwm-tacho

new file mode 100644 (file)

index 0000000..7cfb349
--- /dev/null
+++ b/Documentation/hwmon/aspeed-pwm-tacho
@@ -0,0 +1,22 @@
+Kernel driver aspeed-pwm-tacho
+==============================
+
+Supported chips:
+       ASPEED AST2400/2500
+
+Authors:
+       <jaghu@google.com>
+
+Description:
+------------
+This driver implements support for ASPEED AST2400/2500 PWM and Fan Tacho
+controller. The PWM controller supports upto 8 PWM outputs. The Fan tacho
+controller supports up to 16 tachometer inputs.
+
+The driver provides the following sensor accesses in sysfs:
+
+fanX_input     ro      provide current fan rotation value in RPM as reported
+                       by the fan to the device.
+
+pwmX           rw      get or set PWM fan control value. This is an integer
+                       value between 0(off) and 255(full speed).
diff --git a/Documentation/hwmon/tc654 b/Documentation/hwmon/tc654

index 91a2843f5f98811347e434519e4771502dd95c16..47636a8077b4cbf5bb80dda17a3bbfc6bfae722b 100644 (file)
--- a/Documentation/hwmon/tc654
+++ b/Documentation/hwmon/tc654
@@ -2,7 +2,7 @@ Kernel driver tc654
  ===================
  
  Supported chips:
-  * Microship TC654 and TC655
+  * Microchip TC654 and TC655
      Prefix: 'tc654'
      Datasheet: http://ww1.microchip.com/downloads/en/DeviceDoc/20001734C.pdf
  
diff --git a/Documentation/lightnvm/pblk.txt b/Documentation/lightnvm/pblk.txt

new file mode 100644 (file)

index 0000000..1040ed1
--- /dev/null
+++ b/Documentation/lightnvm/pblk.txt
@@ -0,0 +1,21 @@
+pblk: Physical Block Device Target
+==================================
+
+pblk implements a fully associative, host-based FTL that exposes a traditional
+block I/O interface. Its primary responsibilities are:
+
+  - Map logical addresses onto physical addresses (4KB granularity) in a
+    logical-to-physical (L2P) table.
+  - Maintain the integrity and consistency of the L2P table as well as its
+    recovery from normal tear down and power outage.
+  - Deal with controller- and media-specific constrains.
+  - Handle I/O errors.
+  - Implement garbage collection.
+  - Maintain consistency across the I/O stack during synchronization points.
+
+For more information please refer to:
+
+  http://lightnvm.io
+
+which maintains updated FAQs, manual pages, technical documentation, tools,
+contacts, etc.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt

index fc73eeb7b3b8b119083a03a42e0046a564ea2f0e..ab02304613771b6f6e120da96fb677c293d032d2 100644 (file)
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1006,7 +1006,8 @@ accept_redirects - BOOLEAN
                 FALSE (router)
  
  forwarding - BOOLEAN
-       Enable IP forwarding on this interface.
+       Enable IP forwarding on this interface.  This controls whether packets
+       received _on_ this interface can be forwarded.
  
  mc_forwarding - BOOLEAN
         Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE
diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt

index 54bd5faa8782258f72b134f1aaca684c15a00eb6..f2af35f6d6b2bc8bbfa2ff5727f24b3fd1a9b2f6 100644 (file)
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -77,9 +77,15 @@ static struct pinctrl_desc foo_desc = {
  
  int __init foo_probe(void)
  {
+       int error;
+
         struct pinctrl_dev *pctl;
  
-       return pinctrl_register_and_init(&foo_desc, <PARENT>, NULL, &pctl);
+       error = pinctrl_register_and_init(&foo_desc, <PARENT>, NULL, &pctl);
+       if (error)
+               return error;
+
+       return pinctrl_enable(pctl);
  }
  
  To enable the pinctrl subsystem and the subgroups for PINMUX and PINCONF and
diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst

index 11ec2d93a5e08dc02db1f39291fe8bbec13f8748..61e9c78bd6d1d999033d11170ea6062337984a7e 100644 (file)
--- a/Documentation/process/stable-kernel-rules.rst
+++ b/Documentation/process/stable-kernel-rules.rst
@@ -124,7 +124,7 @@ specified in the following format in the sign-off area:
  
  .. code-block:: none
  
-     Cc: <stable@vger.kernel.org> # 3.3.x-
+     Cc: <stable@vger.kernel.org> # 3.3.x
  
  The tag has the meaning of:
  
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt

index e4991fb1eedcd4efcd258a7b1290fd087e5f9923..41ef9d8efe9517f602e59b21bfe1448196cb3450 100644 (file)
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -12,7 +12,7 @@ kprobes can probe (this means, all functions body except for __kprobes
  functions). Unlike the Tracepoint based event, this can be added and removed
  dynamically, on the fly.
  
-To enable this feature, build your kernel with CONFIG_KPROBE_EVENT=y.
+To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
  
  Similar to the events tracer, this doesn't need to be activated via
  current_tracer. Instead of that, add probe points via
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt

index fa7b680ee8a005acf245ae60aa3404b8e9aaafb7..bf526a7c5559a87829fc05ea5f592d9609ed5f3b 100644 (file)
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -7,7 +7,7 @@
  Overview
  --------
  Uprobe based trace events are similar to kprobe based trace events.
-To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y.
+To enable this feature, build your kernel with CONFIG_UPROBE_EVENTS=y.
  
  Similar to the kprobe-event tracer, this doesn't need to be activated via
  current_tracer. Instead of that, add probe points via
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt

index 069450938b795df4e6f5e16f39b864e8011fb844..fd106899afd1b2cf53d88d0fd2aacc85625ebe57 100644 (file)
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -951,6 +951,10 @@ This ioctl allows the user to create or modify a guest physical memory
  slot.  When changing an existing slot, it may be moved in the guest
  physical memory space, or its flags may be modified.  It may not be
  resized.  Slots may not overlap in guest physical address space.
+Bits 0-15 of "slot" specifies the slot id and this value should be
+less than the maximum number of user memory slots supported per VM.
+The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS,
+if this capability is supported by the architecture.
  
  If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
  specifies the address space which is being modified.  They must be
@@ -3373,6 +3377,69 @@ struct kvm_ppc_resize_hpt {
         __u32 pad;
  };
  
+4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: system ioctl
+Parameters: u64 mce_cap (out)
+Returns: 0 on success, -1 on error
+
+Returns supported MCE capabilities. The u64 mce_cap parameter
+has the same format as the MSR_IA32_MCG_CAP register. Supported
+capabilities will have the corresponding bits set.
+
+4.105 KVM_X86_SETUP_MCE
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: u64 mcg_cap (in)
+Returns: 0 on success,
+         -EFAULT if u64 mcg_cap cannot be read,
+         -EINVAL if the requested number of banks is invalid,
+         -EINVAL if requested MCE capability is not supported.
+
+Initializes MCE support for use. The u64 mcg_cap parameter
+has the same format as the MSR_IA32_MCG_CAP register and
+specifies which capabilities should be enabled. The maximum
+supported number of error-reporting banks can be retrieved when
+checking for KVM_CAP_MCE. The supported capabilities can be
+retrieved with KVM_X86_GET_MCE_CAP_SUPPORTED.
+
+4.106 KVM_X86_SET_MCE
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_x86_mce (in)
+Returns: 0 on success,
+         -EFAULT if struct kvm_x86_mce cannot be read,
+         -EINVAL if the bank number is invalid,
+         -EINVAL if VAL bit is not set in status field.
+
+Inject a machine check error (MCE) into the guest. The input
+parameter is:
+
+struct kvm_x86_mce {
+       __u64 status;
+       __u64 addr;
+       __u64 misc;
+       __u64 mcg_status;
+       __u8 bank;
+       __u8 pad1[7];
+       __u64 pad2[3];
+};
+
+If the MCE being reported is an uncorrected error, KVM will
+inject it as an MCE exception into the guest. If the guest
+MCG_STATUS register reports that an MCE is in progress, KVM
+causes an KVM_EXIT_SHUTDOWN vmexit.
+
+Otherwise, if the MCE is a corrected error, KVM will just
+store it in the corresponding bank (provided this bank is
+not holding a previously reported uncorrected error).
+
  5. The kvm_run structure
  ------------------------
  
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt

index 76e61c883347415f22242060b46ec13ac68cc09c..b2f60ca8b60cad64d9ffba81f61919bc4b8edacb 100644 (file)
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -83,6 +83,12 @@ Groups:
  
      Bits for undefined preemption levels are RAZ/WI.
  
+    For historical reasons and to provide ABI compatibility with userspace we
+    export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask
+    field in the lower 5 bits of a word, meaning that userspace must always
+    use the lower 5 bits to communicate with the KVM device and must shift the
+    value left by 3 places to obtain the actual priority mask level.
+
    Limitations:
      - Priorities are not implemented, and registers are RAZ/WI
      - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
diff --git a/Documentation/vm/userfaultfd.txt b/Documentation/vm/userfaultfd.txt

index 0e5543a920e5b2595f4d194462ba722ce25bd80d..bb2f945f87ab6a2e59f2e7503b7500c08c5427d9 100644 (file)
--- a/Documentation/vm/userfaultfd.txt
+++ b/Documentation/vm/userfaultfd.txt
@@ -172,10 +172,6 @@ the same read(2) protocol as for the page fault notifications. The
  manager has to explicitly enable these events by setting appropriate
  bits in uffdio_api.features passed to UFFDIO_API ioctl:
  
-UFFD_FEATURE_EVENT_EXIT - enable notification about exit() of the
-non-cooperative process. When the monitored process exits, the uffd
-manager will get UFFD_EVENT_EXIT.
-
  UFFD_FEATURE_EVENT_FORK - enable userfaultfd hooks for fork(). When
  this feature is enabled, the userfaultfd context of the parent process
  is duplicated into the newly created process. The manager receives
diff --git a/MAINTAINERS b/MAINTAINERS

index c265a5fe48481f548629079cb529137e0a377f31..6e88975942913fec8ab9064d962d64aee6bbd952 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2544,6 +2544,14 @@ F:       block/
  F:     kernel/trace/blktrace.c
  F:     lib/sbitmap.c
  
+BFQ I/O SCHEDULER
+M:     Paolo Valente <paolo.valente@linaro.org>
+M:     Jens Axboe <axboe@kernel.dk>
+L:     linux-block@vger.kernel.org
+S:     Maintained
+F:     block/bfq-*
+F:     Documentation/block/bfq-iosched.txt
+
  BLOCK2MTD DRIVER
  M:     Joern Engel <joern@lazybastard.org>
  L:     linux-mtd@lists.infradead.org
@@ -2585,12 +2593,26 @@ F:      include/uapi/linux/if_bonding.h
  
  BPF (Safe dynamic programs and tools)
  M:     Alexei Starovoitov <ast@kernel.org>
+M:     Daniel Borkmann <daniel@iogearbox.net>
  L:     netdev@vger.kernel.org
  L:     linux-kernel@vger.kernel.org
  S:     Supported
+F:     arch/x86/net/bpf_jit*
+F:     Documentation/networking/filter.txt
+F:     include/linux/bpf*
+F:     include/linux/filter.h
+F:     include/uapi/linux/bpf*
+F:     include/uapi/linux/filter.h
  F:     kernel/bpf/
-F:     tools/testing/selftests/bpf/
+F:     kernel/trace/bpf_trace.c
  F:     lib/test_bpf.c
+F:     net/bpf/
+F:     net/core/filter.c
+F:     net/sched/act_bpf.c
+F:     net/sched/cls_bpf.c
+F:     samples/bpf/
+F:     tools/net/bpf*
+F:     tools/testing/selftests/bpf/
  
  BROADCOM B44 10/100 ETHERNET DRIVER
  M:     Michael Chan <michael.chan@broadcom.com>
@@ -3216,7 +3238,6 @@ F:        drivers/platform/chrome/
  
  CISCO VIC ETHERNET NIC DRIVER
  M:     Christian Benvenuti <benve@cisco.com>
-M:     Sujith Sankar <ssujith@cisco.com>
  M:     Govindarajulu Varadarajan <_govind@gmx.com>
  M:     Neel Patel <neepatel@cisco.com>
  S:     Supported
@@ -4118,14 +4139,13 @@ F:      drivers/block/drbd/
  F:     lib/lru_cache.c
  F:     Documentation/blockdev/drbd/
  
-DRIVER CORE, KOBJECTS, DEBUGFS, KERNFS AND SYSFS
+DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS
  M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
  S:     Supported
  F:     Documentation/kobject.txt
  F:     drivers/base/
  F:     fs/debugfs/
-F:     fs/kernfs/
  F:     fs/sysfs/
  F:     include/linux/debugfs.h
  F:     include/linux/kobj*
@@ -4695,6 +4715,7 @@ L:        linux-edac@vger.kernel.org
  L:     linux-mips@linux-mips.org
  S:     Supported
  F:     drivers/edac/octeon_edac*
+F:     drivers/edac/thunderx_edac*
  
  EDAC-E752X
  M:     Mark Gross <mark.gross@intel.com>
@@ -4776,6 +4797,12 @@ L:       linux-edac@vger.kernel.org
  S:     Maintained
  F:     drivers/edac/mpc85xx_edac.[ch]
  
+EDAC-PND2
+M:     Tony Luck <tony.luck@intel.com>
+L:     linux-edac@vger.kernel.org
+S:     Maintained
+F:     drivers/edac/pnd2_edac.[ch]
+
  EDAC-PASEMI
  M:     Egor Martovetsky <egor@pasemi.com>
  L:     linux-edac@vger.kernel.org
@@ -4923,6 +4950,7 @@ F:        include/linux/netfilter_bridge/
  F:     net/bridge/
  
  ETHERNET PHY LIBRARY
+M:     Andrew Lunn <andrew@lunn.ch>
  M:     Florian Fainelli <f.fainelli@gmail.com>
  L:     netdev@vger.kernel.org
  S:     Maintained
@@ -7084,9 +7112,9 @@ S:        Maintained
  F:     fs/autofs4/
  
  KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
+M:     Masahiro Yamada <yamada.masahiro@socionext.com>
  M:     Michal Marek <mmarek@suse.com>
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git for-next
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git rc-fixes
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
  L:     linux-kbuild@vger.kernel.org
  S:     Maintained
  F:     Documentation/kbuild/
@@ -7203,6 +7231,14 @@ F:       arch/mips/include/uapi/asm/kvm*
  F:     arch/mips/include/asm/kvm*
  F:     arch/mips/kvm/
  
+KERNFS
+M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+M:     Tejun Heo <tj@kernel.org>
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
+S:     Supported
+F:     include/linux/kernfs.h
+F:     fs/kernfs/
+
  KEXEC
  M:     Eric Biederman <ebiederm@xmission.com>
  W:     http://kernel.org/pub/linux/utils/kernel/kexec/
@@ -7774,13 +7810,6 @@ F:       include/net/mac80211.h
  F:     net/mac80211/
  F:     drivers/net/wireless/mac80211_hwsim.[ch]
  
-MACVLAN DRIVER
-M:     Patrick McHardy <kaber@trash.net>
-L:     netdev@vger.kernel.org
-S:     Maintained
-F:     drivers/net/macvlan.c
-F:     include/linux/if_macvlan.h
-
  MAILBOX API
  M:     Jassi Brar <jassisinghbrar@gmail.com>
  L:     linux-kernel@vger.kernel.org
@@ -7853,6 +7882,8 @@ F:        drivers/net/ethernet/marvell/mvneta.*
  MARVELL MWIFIEX WIRELESS DRIVER
  M:     Amitkumar Karwar <akarwar@marvell.com>
  M:     Nishant Sarmukadam <nishants@marvell.com>
+M:     Ganapathi Bhat <gbhat@marvell.com>
+M:     Xinming Hu <huxm@marvell.com>
  L:     linux-wireless@vger.kernel.org
  S:     Maintained
  F:     drivers/net/wireless/marvell/mwifiex/
@@ -8307,7 +8338,6 @@ M:        Richard Leitner <richard.leitner@skidata.com>
  L:     linux-usb@vger.kernel.org
  S:     Maintained
  F:     drivers/usb/misc/usb251xb.c
-F:     include/linux/platform_data/usb251xb.h
  F:     Documentation/devicetree/bindings/usb/usb251xb.txt
  
  MICROSOFT SURFACE PRO 3 BUTTON DRIVER
@@ -8754,6 +8784,7 @@ W:        http://www.linuxfoundation.org/en/Net
  Q:     http://patchwork.ozlabs.org/project/netdev/list/
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
+B:     mailto:netdev@vger.kernel.org
  S:     Maintained
  F:     net/
  F:     include/net/
@@ -10815,6 +10846,7 @@ F:      drivers/s390/block/dasd*
  F:     block/partitions/ibm.c
  
  S390 NETWORK DRIVERS
+M:     Julian Wiedmann <jwi@linux.vnet.ibm.com>
  M:     Ursula Braun <ubraun@linux.vnet.ibm.com>
  L:     linux-s390@vger.kernel.org
  W:     http://www.ibm.com/developerworks/linux/linux390/
@@ -10845,6 +10877,7 @@ S:      Supported
  F:     drivers/s390/scsi/zfcp_*
  
  S390 IUCV NETWORK LAYER
+M:     Julian Wiedmann <jwi@linux.vnet.ibm.com>
  M:     Ursula Braun <ubraun@linux.vnet.ibm.com>
  L:     linux-s390@vger.kernel.org
  W:     http://www.ibm.com/developerworks/linux/linux390/
@@ -12455,7 +12488,6 @@ F:      drivers/clk/ti/
  F:     include/linux/clk/ti.h
  
  TI ETHERNET SWITCH DRIVER (CPSW)
-M:     Mugunthan V N <mugunthanvnm@ti.com>
  R:     Grygorii Strashko <grygorii.strashko@ti.com>
  L:     linux-omap@vger.kernel.org
  L:     netdev@vger.kernel.org
@@ -13296,7 +13328,7 @@ F:      drivers/virtio/
  F:     tools/virtio/
  F:     drivers/net/virtio_net.c
  F:     drivers/block/virtio_blk.c
-F:     include/linux/virtio_*.h
+F:     include/linux/virtio*.h
  F:     include/uapi/linux/virtio_*.h
  F:     drivers/crypto/virtio/
  
@@ -13384,14 +13416,6 @@ W:     https://linuxtv.org
  S:     Maintained
  F:     drivers/media/platform/vivid/*
  
-VLAN (802.1Q)
-M:     Patrick McHardy <kaber@trash.net>
-L:     netdev@vger.kernel.org
-S:     Maintained
-F:     drivers/net/macvlan.c
-F:     include/linux/if_*vlan.h
-F:     net/8021q/
-
  VLYNQ BUS
  M:     Florian Fainelli <f.fainelli@gmail.com>
  L:     openwrt-devel@lists.openwrt.org (subscribers-only)
diff --git a/Makefile b/Makefile

index 165cf9783a5dbb28ff82ac206d667bcba3b572b6..4b074a904106fa91e75d52c6ea459a2b084acc34 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
  VERSION = 4
  PATCHLEVEL = 11
  SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION =
  NAME = Fearless Coyote
  
  # *DOCUMENTATION*
@@ -372,7 +372,7 @@ LDFLAGS_MODULE  =
  CFLAGS_KERNEL  =
  AFLAGS_KERNEL  =
  LDFLAGS_vmlinux =
-CFLAGS_GCOV    = -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
+CFLAGS_GCOV    := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,)
  CFLAGS_KCOV    := $(call cc-option,-fsanitize-coverage=trace-pc,)
  
  
@@ -653,6 +653,12 @@ KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0409, \
  # Tell gcc to never replace conditional load with a non-conditional one
  KBUILD_CFLAGS  += $(call cc-option,--param=allow-store-data-races=0)
  
+# check for 'asm goto'
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
+       KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+       KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
  include scripts/Makefile.gcc-plugins
  
  ifdef CONFIG_READABLE_ASM
@@ -798,12 +804,6 @@ KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)
  # use the deterministic mode of AR if available
  KBUILD_ARFLAGS := $(call ar-option,D)
  
-# check for 'asm goto'
-ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
-       KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
-       KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
-endif
-
  include scripts/Makefile.kasan
  include scripts/Makefile.extrawarn
  include scripts/Makefile.ubsan
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c

index 0b961093ca5cac69fe88598ed7ebc9c023a11a78..6d76e528ab8f2606ddde5e3b739598b86a7cb5b4 100644 (file)
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1290,7 +1290,7 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p)
         /* copy relevant bits of struct timex. */
         if (copy_from_user(&txc, txc_p, offsetof(struct timex32, time)) ||
             copy_from_user(&txc.tick, &txc_p->tick, sizeof(struct timex32) - 
-                          offsetof(struct timex32, time)))
+                          offsetof(struct timex32, tick)))
           return -EFAULT;
  
         ret = do_adjtimex(&txc);        
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig

index c9f30f4763abce5ca1ffda1817e87bb5de410861..5d7fb3e7cb97159012d49e6cee745ada293949a6 100644 (file)
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -406,6 +406,14 @@ config ARC_HAS_DIV_REM
         bool "Insn: div, divu, rem, remu"
         default y
  
+config ARC_HAS_ACCL_REGS
+       bool "Reg Pair ACCL:ACCH (FPU and/or MPY > 6)"
+       default n
+       help
+         Depending on the configuration, CPU can contain accumulator reg-pair
+         (also referred to as r58:r59). These can also be used by gcc as GPR so
+         kernel needs to save/restore per process
+
  endif  # ISA_ARCV2
  
  endmenu   # "ARC CPU Configuration"
diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi

index 65808fe0a290be15ded0903ad347540d0f35a52e..2891cb266cf0b5f855e4d89bcfa8aef194bd483f 100644 (file)
--- a/arch/arc/boot/dts/skeleton.dtsi
+++ b/arch/arc/boot/dts/skeleton.dtsi
@@ -26,6 +26,7 @@
                         device_type = "cpu";
                         compatible = "snps,arc770d";
                         reg = <0>;
+                       clocks = <&core_clk>;
                 };
         };
  
diff --git a/arch/arc/boot/dts/skeleton_hs.dtsi b/arch/arc/boot/dts/skeleton_hs.dtsi

index 2dfe8037dfbb34ac680597619ecef27319bed1a2..5e944d3e5b74f61387b1f285e6837f96f5f66127 100644 (file)
--- a/arch/arc/boot/dts/skeleton_hs.dtsi
+++ b/arch/arc/boot/dts/skeleton_hs.dtsi
@@ -21,6 +21,7 @@
                         device_type = "cpu";
                         compatible = "snps,archs38";
                         reg = <0>;
+                       clocks = <&core_clk>;
                 };
         };
  
diff --git a/arch/arc/boot/dts/skeleton_hs_idu.dtsi b/arch/arc/boot/dts/skeleton_hs_idu.dtsi

index 4c11079f3565a3decc7f6401a92950df16565411..54b277d7dea0e4594245aa50df0a3276a85a9916 100644 (file)
--- a/arch/arc/boot/dts/skeleton_hs_idu.dtsi
+++ b/arch/arc/boot/dts/skeleton_hs_idu.dtsi
@@ -19,8 +19,27 @@
  
                 cpu@0 {
                         device_type = "cpu";
-                       compatible = "snps,archs38xN";
+                       compatible = "snps,archs38";
                         reg = <0>;
+                       clocks = <&core_clk>;
+               };
+               cpu@1 {
+                       device_type = "cpu";
+                       compatible = "snps,archs38";
+                       reg = <1>;
+                       clocks = <&core_clk>;
+               };
+               cpu@2 {
+                       device_type = "cpu";
+                       compatible = "snps,archs38";
+                       reg = <2>;
+                       clocks = <&core_clk>;
+               };
+               cpu@3 {
+                       device_type = "cpu";
+                       compatible = "snps,archs38";
+                       reg = <3>;
+                       clocks = <&core_clk>;
                 };
         };
  
diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi

index f0df59b23e21e473c6f2987593c1f7ccd9cc5958..459fc656b759aee977c560dcbbc996c6deed0cd3 100644 (file)
--- a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
@@ -112,13 +112,19 @@
                         interrupts = <7>;
                         bus-width = <4>;
                 };
+       };
  
-               /* Embedded Vision subsystem UIO mappings; only relevant for EV VDK */
-               uio_ev: uio@0xD0000000 {
-                       compatible = "generic-uio";
-                       reg = <0xD0000000 0x2000 0xD1000000 0x2000 0x90000000 0x10000000 0xC0000000 0x10000000>;
-                       reg-names = "ev_gsa", "ev_ctrl", "ev_shared_mem", "ev_code_mem";
-                       interrupts = <23>;
-               };
+       /*
+        * Embedded Vision subsystem UIO mappings; only relevant for EV VDK
+        *
+        * This node is intentionally put outside of MB above becase
+        * it maps areas outside of MB's 0xEz-0xFz.
+        */
+       uio_ev: uio@0xD0000000 {
+               compatible = "generic-uio";
+               reg = <0xD0000000 0x2000 0xD1000000 0x2000 0x90000000 0x10000000 0xC0000000 0x10000000>;
+               reg-names = "ev_gsa", "ev_ctrl", "ev_shared_mem", "ev_code_mem";
+               interrupt-parent = <&mb_intc>;
+               interrupts = <23>;
         };
  };
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h

index b65930a4958959fb65891ac375a7637a0e5cb146..54b54da6384c197a93848e05ef292c224f5c0f91 100644 (file)
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -17,10 +17,11 @@
  #include <asm/barrier.h>
  #include <asm/smp.h>
  
+#define ATOMIC_INIT(i) { (i) }
+
  #ifndef CONFIG_ARC_PLAT_EZNPS
  
  #define atomic_read(v)  READ_ONCE((v)->counter)
-#define ATOMIC_INIT(i) { (i) }
  
  #ifdef CONFIG_ARC_HAS_LLSC
  
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h

index aee1a77934cf694e37ae579347a37bc167e43762..ac85380d14a4bb364bb9077e8c8b329f862845b8 100644 (file)
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -16,6 +16,11 @@
         ;
         ; Now manually save: r12, sp, fp, gp, r25
  
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+       PUSH    r59
+       PUSH    r58
+#endif
+
         PUSH    r30
         PUSH    r12
  
@@ -75,6 +80,11 @@
         POP     r12
         POP     r30
  
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+       POP     r58
+       POP     r59
+#endif
+
  .endm
  
  /*------------------------------------------------------------------------*/
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h

index 317ff773e1ca5f4de6e7ab03dc1c9f184426a2d2..b18fcb6069082220b00790fbe6f7008d9d7aa570 100644 (file)
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -11,6 +11,7 @@
  #define _ASM_ARC_HUGEPAGE_H
  
  #include <linux/types.h>
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  static inline pte_t pmd_pte(pmd_t pmd)
diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h

index 00bdbe167615ec2d97c7bccec66595217741cd83..2e52d18e6bc7ee3661d055c2ae6d98806478bb50 100644 (file)
--- a/arch/arc/include/asm/kprobes.h
+++ b/arch/arc/include/asm/kprobes.h
@@ -54,9 +54,7 @@ int kprobe_fault_handler(struct pt_regs *regs, unsigned long cause);
  void kretprobe_trampoline(void);
  void trap_is_kprobe(unsigned long address, struct pt_regs *regs);
  #else
-static void trap_is_kprobe(unsigned long address, struct pt_regs *regs)
-{
-}
+#define trap_is_kprobe(address, regs)
  #endif /* CONFIG_KPROBES */
  
  #endif /* _ARC_KPROBES_H */
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h

index e94ca72b974e7c7b31c2d631cb773ab3ad707b8d..ee22d40afef43b37dec7d93f0f1ee87060607f93 100644 (file)
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -37,6 +37,7 @@
  
  #include <asm/page.h>
  #include <asm/mmu.h>
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  #include <linux/const.h>
  
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h

index 47111d565a959d117ab9e2c7c9eea3b852137971..5297faa8a37803fd702da4270a7e23f5d6ba4f2b 100644 (file)
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -86,6 +86,10 @@ struct pt_regs {
  
         unsigned long r12, r30;
  
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+       unsigned long r58, r59; /* ACCL/ACCH used by FPU / DSP MPY */
+#endif
+
         /*------- Below list auto saved by h/w -----------*/
         unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
  
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S

index 2585632eaa6891d511252e1aee7cce4114725533..cc558a25b8fa690d1c72afed97f80161e4167db6 100644 (file)
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -100,15 +100,21 @@ END(handle_interrupt)
  ;################### Non TLB Exception Handling #############################
  
  ENTRY(EV_SWI)
-       flag 1
+       ; TODO: implement this
+       EXCEPTION_PROLOGUE
+       b   ret_from_exception
  END(EV_SWI)
  
  ENTRY(EV_DivZero)
-       flag 1
+       ; TODO: implement this
+       EXCEPTION_PROLOGUE
+       b   ret_from_exception
  END(EV_DivZero)
  
  ENTRY(EV_DCError)
-       flag 1
+       ; TODO: implement this
+       EXCEPTION_PROLOGUE
+       b   ret_from_exception
  END(EV_DCError)
  
  ; ---------------------------------------------
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c

index 3093fa898a236a1b6ae18757bbaf3f954c49dc26..fc8211f338ad33cab4036a09693cb77e1ea62a98 100644 (file)
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -10,6 +10,7 @@
  #include <linux/fs.h>
  #include <linux/delay.h>
  #include <linux/root_dev.h>
+#include <linux/clk.h>
  #include <linux/clk-provider.h>
  #include <linux/clocksource.h>
  #include <linux/console.h>
@@ -318,7 +319,8 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
  static void arc_chk_core_config(void)
  {
         struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
-       int fpu_enabled;
+       int saved = 0, present = 0;
+       char *opt_nm = NULL;;
  
         if (!cpu->extn.timer0)
                 panic("Timer0 is not present!\n");
@@ -345,17 +347,28 @@ static void arc_chk_core_config(void)
  
         /*
          * FP hardware/software config sanity
-        * -If hardware contains DPFP, kernel needs to save/restore FPU state
+        * -If hardware present, kernel needs to save/restore FPU state
          * -If not, it will crash trying to save/restore the non-existant regs
-        *
-        * (only DPDP checked since SP has no arch visible regs)
          */
-       fpu_enabled = IS_ENABLED(CONFIG_ARC_FPU_SAVE_RESTORE);
  
-       if (cpu->extn.fpu_dp && !fpu_enabled)
-               pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
-       else if (!cpu->extn.fpu_dp && fpu_enabled)
-               panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
+       if (is_isa_arcompact()) {
+               opt_nm = "CONFIG_ARC_FPU_SAVE_RESTORE";
+               saved = IS_ENABLED(CONFIG_ARC_FPU_SAVE_RESTORE);
+
+               /* only DPDP checked since SP has no arch visible regs */
+               present = cpu->extn.fpu_dp;
+       } else {
+               opt_nm = "CONFIG_ARC_HAS_ACCL_REGS";
+               saved = IS_ENABLED(CONFIG_ARC_HAS_ACCL_REGS);
+
+               /* Accumulator Low:High pair (r58:59) present if DSP MPY or FPU */
+               present = cpu->extn_mpy.dsp | cpu->extn.fpu_sp | cpu->extn.fpu_dp;
+       }
+
+       if (present && !saved)
+               pr_warn("Enable %s for working apps\n", opt_nm);
+       else if (!present && saved)
+               panic("Disable %s, hardware NOT present\n", opt_nm);
  }
  
  /*
@@ -488,8 +501,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
  {
         char *str;
         int cpu_id = ptr_to_cpu(v);
-       struct device_node *core_clk = of_find_node_by_name(NULL, "core_clk");
-       u32 freq = 0;
+       struct device *cpu_dev = get_cpu_device(cpu_id);
+       struct clk *cpu_clk;
+       unsigned long freq = 0;
  
         if (!cpu_online(cpu_id)) {
                 seq_printf(m, "processor [%d]\t: Offline\n", cpu_id);
@@ -502,9 +516,15 @@ static int show_cpuinfo(struct seq_file *m, void *v)
  
         seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
  
-       of_property_read_u32(core_clk, "clock-frequency", &freq);
+       cpu_clk = clk_get(cpu_dev, NULL);
+       if (IS_ERR(cpu_clk)) {
+               seq_printf(m, "CPU speed \t: Cannot get clock for processor [%d]\n",
+                          cpu_id);
+       } else {
+               freq = clk_get_rate(cpu_clk);
+       }
         if (freq)
-               seq_printf(m, "CPU speed\t: %u.%02u Mhz\n",
+               seq_printf(m, "CPU speed\t: %lu.%02lu Mhz\n",
                            freq / 1000000, (freq / 10000) % 100);
  
         seq_printf(m, "Bogo MIPS\t: %lu.%02lu\n",
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c

index d408fa21a07c9937a0e2956a6e12a7895ffef684..928562967f3cd02fee4a5de34e687d442d64a4e0 100644 (file)
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -633,6 +633,9 @@ noinline static void slc_entire_op(const int op)
  
         write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);
  
+       /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
+       read_aux_reg(r);
+
         /* Important to wait for flush to complete */
         while (read_aux_reg(r) & SLC_CTRL_BUSY);
  }
diff --git a/arch/arm/boot/dts/am335x-baltos.dtsi b/arch/arm/boot/dts/am335x-baltos.dtsi

index efb5eae290a8b7437b95206fa9f69ae4ccecab44..d42b98f15e8b97aaa5956047b51bd1d4a8d610ed 100644 (file)
--- a/arch/arm/boot/dts/am335x-baltos.dtsi
+++ b/arch/arm/boot/dts/am335x-baltos.dtsi
@@ -371,6 +371,8 @@
  
         phy1: ethernet-phy@1 {
                 reg = <7>;
+               eee-broken-100tx;
+               eee-broken-1000t;
         };
  };
  
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts

index 9e43c443738a8acedd2c0e160a5b6ddc9e0f6c39..9ba4b18c0cb21711dcd8ef60648a0916914819e7 100644 (file)
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -672,6 +672,7 @@
         ti,non-removable;
         bus-width = <4>;
         cap-power-off-card;
+       keep-power-in-suspend;
         pinctrl-names = "default";
         pinctrl-0 = <&mmc2_pins>;
  
diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi

index 02981eae96b99413f28ddb76cdcc90c810c5ae37..1ec8e0d801912fbb080b7f3ff880d017f60bf320 100644 (file)
--- a/arch/arm/boot/dts/am335x-pcm-953.dtsi
+++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi
@@ -63,14 +63,14 @@
                         label = "home";
                         linux,code = <KEY_HOME>;
                         gpios = <&gpio3 7 GPIO_ACTIVE_HIGH>;
-                       gpio-key,wakeup;
+                       wakeup-source;
                 };
  
                 button@1 {
                         label = "menu";
                         linux,code = <KEY_MENU>;
                         gpios = <&gpio3 8 GPIO_ACTIVE_HIGH>;
-                       gpio-key,wakeup;
+                       wakeup-source;
                 };
  
         };
diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi

index 0d341c545b010fb810b890166eae3aa7967313ea..e5ac1d81d15c9e482ab06b41830e5230a0187e69 100644 (file)
--- a/arch/arm/boot/dts/am57xx-idk-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi
@@ -315,6 +315,13 @@
                         /* ID & VBUS GPIOs provided in board dts */
                 };
         };
+
+       tpic2810: tpic2810@60 {
+               compatible = "ti,tpic2810";
+               reg = <0x60>;
+               gpio-controller;
+               #gpio-cells = <2>;
+       };
  };
  
  &mcspi3 {
@@ -330,13 +337,6 @@
                 spi-max-frequency = <1000000>;
                 spi-cpol;
         };
-
-       tpic2810: tpic2810@60 {
-               compatible = "ti,tpic2810";
-               reg = <0x60>;
-               gpio-controller;
-               #gpio-cells = <2>;
-       };
  };
  
  &uart3 {
diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi

index 4fbb089cf5ad3c1f96a15f504ee433ff06b724df..00de62dc0042f1445851d1cb84c2cbea3e28d25f 100644 (file)
--- a/arch/arm/boot/dts/bcm5301x.dtsi
+++ b/arch/arm/boot/dts/bcm5301x.dtsi
@@ -66,14 +66,14 @@
                 timer@20200 {
                         compatible = "arm,cortex-a9-global-timer";
                         reg = <0x20200 0x100>;
-                       interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
                         clocks = <&periph_clk>;
                 };
  
                 local-timer@20600 {
                         compatible = "arm,cortex-a9-twd-timer";
                         reg = <0x20600 0x100>;
-                       interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>;
                         clocks = <&periph_clk>;
                 };
  
diff --git a/arch/arm/boot/dts/bcm953012k.dts b/arch/arm/boot/dts/bcm953012k.dts

index bfd923096a8c1f5487b4bed2302ee8c784c395c7..ae31a5826e918ec58fc1948df0f9998664b7064f 100644 (file)
--- a/arch/arm/boot/dts/bcm953012k.dts
+++ b/arch/arm/boot/dts/bcm953012k.dts
@@ -48,15 +48,14 @@
         };
  
         memory {
-               reg = <0x00000000 0x10000000>;
+               reg = <0x80000000 0x10000000>;
         };
  };
  
  &uart0 {
-       clock-frequency = <62499840>;
+       status = "okay";
  };
  
  &uart1 {
-       clock-frequency = <62499840>;
         status = "okay";
  };
diff --git a/arch/arm/boot/dts/bcm958522er.dts b/arch/arm/boot/dts/bcm958522er.dts

index 3f04a40eb90cc904afb27fb71603270a8d8e1bea..df05e7f568af3e36bb2aa703d39fdbec37a6cece 100644 (file)
--- a/arch/arm/boot/dts/bcm958522er.dts
+++ b/arch/arm/boot/dts/bcm958522er.dts
@@ -55,6 +55,7 @@
         gpio-restart {
                 compatible = "gpio-restart";
                 gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+               open-source;
                 priority = <200>;
         };
  };
diff --git a/arch/arm/boot/dts/bcm958525er.dts b/arch/arm/boot/dts/bcm958525er.dts

index 9fd542200d3d52229e1a0651330a5ef48b2ee8a9..4a3ab19c62819fb8c57d9ccf406100b64b15c598 100644 (file)
--- a/arch/arm/boot/dts/bcm958525er.dts
+++ b/arch/arm/boot/dts/bcm958525er.dts
@@ -55,6 +55,7 @@
         gpio-restart {
                 compatible = "gpio-restart";
                 gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+               open-source;
                 priority = <200>;
         };
  };
diff --git a/arch/arm/boot/dts/bcm958525xmc.dts b/arch/arm/boot/dts/bcm958525xmc.dts

index 41e7fd350fcd1bbf6c36008c5f1e1c179679ffe0..81f78435d8c76cca38cb777d86e139c7d3751151 100644 (file)
--- a/arch/arm/boot/dts/bcm958525xmc.dts
+++ b/arch/arm/boot/dts/bcm958525xmc.dts
@@ -55,6 +55,7 @@
         gpio-restart {
                 compatible = "gpio-restart";
                 gpios = <&gpioa 31 GPIO_ACTIVE_LOW>;
+               open-source;
                 priority = <200>;
         };
  };
diff --git a/arch/arm/boot/dts/bcm958622hr.dts b/arch/arm/boot/dts/bcm958622hr.dts

index 477c4860db52236fa12a70aef2235c302f4a2a45..c88b8fefcb2f13e3c9bd98321948c8988d8e428d 100644 (file)
--- a/arch/arm/boot/dts/bcm958622hr.dts
+++ b/arch/arm/boot/dts/bcm958622hr.dts
@@ -55,6 +55,7 @@
         gpio-restart {
                 compatible = "gpio-restart";
                 gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+               open-source;
                 priority = <200>;
         };
  };
diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts

index c0a499d5ba447d5503d6655541461dddb9f35317..d503fa0dde310ff7597aeb7d3bfc757bcca32291 100644 (file)
--- a/arch/arm/boot/dts/bcm958623hr.dts
+++ b/arch/arm/boot/dts/bcm958623hr.dts
@@ -55,6 +55,7 @@
         gpio-restart {
                 compatible = "gpio-restart";
                 gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+               open-source;
                 priority = <200>;
         };
  };
diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts

index f7eb5854a224486adbe98a33da6a4ae59b17a90a..cc0363b843c1a0ae777efa40a6f2e25b34cf1f4c 100644 (file)
--- a/arch/arm/boot/dts/bcm958625hr.dts
+++ b/arch/arm/boot/dts/bcm958625hr.dts
@@ -55,6 +55,7 @@
         gpio-restart {
                 compatible = "gpio-restart";
                 gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+               open-source;
                 priority = <200>;
         };
  };
diff --git a/arch/arm/boot/dts/bcm988312hr.dts b/arch/arm/boot/dts/bcm988312hr.dts

index 16666324fda8b5b901af538423ad302d13b9f98d..74e15a3cd9f8efb6a65238054824ba7e22df0bc2 100644 (file)
--- a/arch/arm/boot/dts/bcm988312hr.dts
+++ b/arch/arm/boot/dts/bcm988312hr.dts
@@ -55,6 +55,7 @@
         gpio-restart {
                 compatible = "gpio-restart";
                 gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+               open-source;
                 priority = <200>;
         };
  };
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi

index 2c9e56f4aac53aa74c206b2cbb3622965d7ab792..bbfb9d5a70a98116d303844a91c6a65dd42cfb39 100644 (file)
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -283,6 +283,7 @@
                                 device_type = "pci";
                                 ranges = <0x81000000 0 0          0x03000 0 0x00010000
                                           0x82000000 0 0x20013000 0x13000 0 0xffed000>;
+                               bus-range = <0x00 0xff>;
                                 #interrupt-cells = <1>;
                                 num-lanes = <1>;
                                 linux,pci-domain = <0>;
@@ -319,6 +320,7 @@
                                 device_type = "pci";
                                 ranges = <0x81000000 0 0          0x03000 0 0x00010000
                                           0x82000000 0 0x30013000 0x13000 0 0xffed000>;
+                               bus-range = <0x00 0xff>;
                                 #interrupt-cells = <1>;
                                 num-lanes = <1>;
                                 linux,pci-domain = <1>;
diff --git a/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi b/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi

index 49f466fe0b1dc2eecdaca3e6638dd80a12029bcd..dcfc9759143375decd12bea87ad5e7547837f3bd 100644 (file)
--- a/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi
+++ b/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi
@@ -121,11 +121,6 @@
         };
  };
  
-&cpu0 {
-       arm-supply = <&sw1a_reg>;
-       soc-supply = <&sw1c_reg>;
-};
-
  &fec1 {
         pinctrl-names = "default";
         pinctrl-0 = <&pinctrl_enet1>;
diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi

index 8f9a69ca818cecb759e71c1b6b97e4073c3e22e4..efe53998c961244fc0cd1ff32a5e53c885b6322f 100644 (file)
--- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
@@ -121,7 +121,7 @@
  &i2c3 {
         clock-frequency = <400000>;
         at24@50 {
-               compatible = "at24,24c02";
+               compatible = "atmel,24c64";
                 readonly;
                 reg = <0x50>;
         };
diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi

index 22332be7214032fd3ba710def036168f3f9c2b3c..528b4e9c6d3d30928d363ffc6cf1d8d2275a8ce7 100644 (file)
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -266,7 +266,7 @@
                 };
  
                 usb1: ohci@00400000 {
-                       compatible = "atmel,sama5d2-ohci", "usb-ohci";
+                       compatible = "atmel,at91rm9200-ohci", "usb-ohci";
                         reg = <0x00400000 0x100000>;
                         interrupts = <41 IRQ_TYPE_LEVEL_HIGH 2>;
                         clocks = <&uhphs_clk>, <&uhphs_clk>, <&uhpck>;
diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi

index 82d8c477129359952b0ae499a2fdefca9393bdf4..162e1eb5373d3475fa785639223dd80750d5ca42 100644 (file)
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -14,6 +14,7 @@
  #include <dt-bindings/mfd/dbx500-prcmu.h>
  #include <dt-bindings/arm/ux500_pm_domains.h>
  #include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/clock/ste-ab8500.h>
  #include "skeleton.dtsi"
  
  / {
@@ -603,6 +604,11 @@
                                 interrupt-controller;
                                 #interrupt-cells = <2>;
  
+                               ab8500_clock: clock-controller {
+                                       compatible = "stericsson,ab8500-clk";
+                                       #clock-cells = <1>;
+                               };
+
                                 ab8500_gpio: ab8500-gpio {
                                         compatible = "stericsson,ab8500-gpio";
                                         gpio-controller;
@@ -686,6 +692,8 @@
  
                                 ab8500-pwm {
                                         compatible = "stericsson,ab8500-pwm";
+                                       clocks = <&ab8500_clock AB8500_SYSCLK_INT>;
+                                       clock-names = "intclk";
                                 };
  
                                 ab8500-debugfs {
@@ -700,6 +708,9 @@
                                         V-AMIC2-supply = <&ab8500_ldo_anamic2_reg>;
                                         V-DMIC-supply = <&ab8500_ldo_dmic_reg>;
  
+                                       clocks = <&ab8500_clock AB8500_SYSCLK_AUDIO>;
+                                       clock-names = "audioclk";
+
                                         stericsson,earpeice-cmv = <950>; /* Units in mV. */
                                 };
  
@@ -1095,6 +1106,14 @@
                         status = "disabled";
                 };
  
+               sound {
+                       compatible = "stericsson,snd-soc-mop500";
+                       stericsson,cpu-dai = <&msp1 &msp3>;
+                       stericsson,audio-codec = <&codec>;
+                       clocks = <&prcmu_clk PRCMU_SYSCLK>, <&ab8500_clock AB8500_SYSCLK_ULP>, <&ab8500_clock AB8500_SYSCLK_INT>;
+                       clock-names = "sysclk", "ulpclk", "intclk";
+               };
+
                 msp0: msp@80123000 {
                         compatible = "stericsson,ux500-msp-i2s";
                         reg = <0x80123000 0x1000>;
diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi

index f37f9e10713cc878ce9ee0393aa4b0c6ad239adf..9e359e4f342e76ebd1fbfab57d1f8f427ece2cd7 100644 (file)
--- a/arch/arm/boot/dts/ste-href.dtsi
+++ b/arch/arm/boot/dts/ste-href.dtsi
@@ -186,15 +186,6 @@
                         status = "okay";
                 };
  
-               sound {
-                       compatible = "stericsson,snd-soc-mop500";
-
-                       stericsson,cpu-dai = <&msp1 &msp3>;
-                       stericsson,audio-codec = <&codec>;
-                       clocks = <&prcmu_clk PRCMU_SYSCLK>;
-                       clock-names = "sysclk";
-               };
-
                 msp0: msp@80123000 {
                         pinctrl-names = "default";
                         pinctrl-0 = <&msp0_default_mode>;
diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts

index dd5514def6042470aabea1ee29853d6c713c2691..ade1d0d4e5f45c595f079c251c8b93dbe95742df 100644 (file)
--- a/arch/arm/boot/dts/ste-snowball.dts
+++ b/arch/arm/boot/dts/ste-snowball.dts
@@ -159,15 +159,6 @@
                                      "", "", "", "", "", "", "", "";
                 };
  
-               sound {
-                       compatible = "stericsson,snd-soc-mop500";
-
-                       stericsson,cpu-dai = <&msp1 &msp3>;
-                       stericsson,audio-codec = <&codec>;
-                       clocks = <&prcmu_clk PRCMU_SYSCLK>;
-                       clock-names = "sysclk";
-               };
-
                 msp0: msp@80123000 {
                         pinctrl-names = "default";
                         pinctrl-0 = <&msp0_default_mode>;
diff --git a/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts b/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts

index 72ec0d5ae052cda33bc05a0086cd7e34a684184c..bbf1c8cbaac6aa19a6acd946a0230220e4e11417 100644 (file)
--- a/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts
+++ b/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts
@@ -167,7 +167,7 @@
                                         reg = <8>;
                                         label = "cpu";
                                         ethernet = <&gmac>;
-                                       phy-mode = "rgmii";
+                                       phy-mode = "rgmii-txid";
                                         fixed-link {
                                                 speed = <1000>;
                                                 full-duplex;
diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi

index a952cc0703cc172b3a50ab334b1eec6b44a93df8..8a3ed21cb7bcfcf4785784bcb66d10aafd2081e7 100644 (file)
--- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
@@ -495,7 +495,7 @@
                         resets = <&ccu RST_BUS_GPU>;
  
                         assigned-clocks = <&ccu CLK_GPU>;
-                       assigned-clock-rates = <408000000>;
+                       assigned-clock-rates = <384000000>;
                 };
  
                 gic: interrupt-controller@01c81000 {
diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi

index 18c174fef84f512c18145e795276af099d8de07b..306af6cadf26033c6102b61c6a6d7693faba30fe 100644 (file)
--- a/arch/arm/boot/dts/sun8i-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a33.dtsi
@@ -66,12 +66,6 @@
                         opp-microvolt = <1200000>;
                         clock-latency-ns = <244144>; /* 8 32k periods */
                 };
-
-               opp@1200000000 {
-                       opp-hz = /bits/ 64 <1200000000>;
-                       opp-microvolt = <1320000>;
-                       clock-latency-ns = <244144>; /* 8 32k periods */
-               };
         };
  
         cpus {
@@ -81,16 +75,22 @@
                         operating-points-v2 = <&cpu0_opp_table>;
                 };
  
+               cpu@1 {
+                       operating-points-v2 = <&cpu0_opp_table>;
+               };
+
                 cpu@2 {
                         compatible = "arm,cortex-a7";
                         device_type = "cpu";
                         reg = <2>;
+                       operating-points-v2 = <&cpu0_opp_table>;
                 };
  
                 cpu@3 {
                         compatible = "arm,cortex-a7";
                         device_type = "cpu";
                         reg = <3>;
+                       operating-points-v2 = <&cpu0_opp_table>;
                 };
         };
  
@@ -113,8 +113,8 @@
                 simple-audio-card,mclk-fs = <512>;
                 simple-audio-card,aux-devs = <&codec_analog>;
                 simple-audio-card,routing =
-                       "Left DAC", "Digital Left DAC",
-                       "Right DAC", "Digital Right DAC";
+                       "Left DAC", "AIF1 Slot 0 Left",
+                       "Right DAC", "AIF1 Slot 0 Right";
                 status = "disabled";
  
                 simple-audio-card,cpu {
diff --git a/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi b/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi

index 7097c18ff487d4851ca5d76d73e018b75aed7cb2..d6bd15898db6d6cc880fe0abb4999712450dc50e 100644 (file)
--- a/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi
+++ b/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi
@@ -50,8 +50,6 @@
  
         backlight: backlight {
                 compatible = "pwm-backlight";
-               pinctrl-names = "default";
-               pinctrl-0 = <&bl_en_pin>;
                 pwms = <&pwm 0 50000 PWM_POLARITY_INVERTED>;
                 brightness-levels = <0 10 20 30 40 50 60 70 80 90 100>;
                 default-brightness-level = <8>;
@@ -93,11 +91,6 @@
  };
  
  &pio {
-       bl_en_pin: bl_en_pin@0 {
-               pins = "PH6";
-               function = "gpio_in";
-       };
-
         mmc0_cd_pin: mmc0_cd_pin@0 {
                 pins = "PB4";
                 function = "gpio_in";
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig

index a94126fb02c2af5b44dac0948febe57e3997c59a..6aa7be191f1aadee3371254703d2a0dc5c4abc21 100644 (file)
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -748,7 +748,6 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
  CONFIG_LEDS_TRIGGER_TRANSIENT=y
  CONFIG_LEDS_TRIGGER_CAMERA=y
  CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
  CONFIG_EDAC_HIGHBANK_MC=y
  CONFIG_EDAC_HIGHBANK_L2=y
  CONFIG_RTC_CLASS=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig

index f2462a6bdba6e7fcfc8c04815dd763bdcd5b3069..decd388d613d7e1d65c5d194f77fb23061ed37e0 100644 (file)
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -188,6 +188,7 @@ CONFIG_WL12XX=m
  CONFIG_WL18XX=m
  CONFIG_WLCORE_SPI=m
  CONFIG_WLCORE_SDIO=m
+CONFIG_INPUT_MOUSEDEV=m
  CONFIG_INPUT_JOYDEV=m
  CONFIG_INPUT_EVDEV=m
  CONFIG_KEYBOARD_ATKBD=m
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig

index 2aac99fd1c41d264ed45c4a4a7fd68c5236ab9b8..1318f61589dc462370d0c9a6916fb1b707a94e2d 100644 (file)
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -635,8 +635,7 @@ CONFIG_LEDS_TRIGGER_GPIO=m
  CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
  CONFIG_LEDS_TRIGGER_TRANSIENT=m
  CONFIG_LEDS_TRIGGER_CAMERA=m
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=m
+CONFIG_EDAC=m
  CONFIG_RTC_CLASS=y
  CONFIG_RTC_DEBUG=y
  CONFIG_RTC_DRV_DS1307=m
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h

index e22089fb44dc86b7ed2fdb175bc6ec7b47ee4001..a3f0b3d500895b349004921b5f1b9435a45a0f5b 100644 (file)
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -209,6 +209,7 @@
  #define HSR_EC_IABT_HYP        (0x21)
  #define HSR_EC_DABT    (0x24)
  #define HSR_EC_DABT_HYP        (0x25)
+#define HSR_EC_MAX     (0x3f)
  
  #define HSR_WFI_IS_WFE         (_AC(1, UL) << 0)
  
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h

index cc495d799c67643c58e136249197a06736299339..31ee468ce667dee8a219f775f1106714879088c2 100644 (file)
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -30,7 +30,6 @@
  #define __KVM_HAVE_ARCH_INTC_INITIALIZED
  
  #define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
  #define KVM_HAVE_ONE_REG
  #define KVM_HALT_POLL_NS_DEFAULT 500000
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h

index a8d656d9aec715f5ddcea1295c54923b82c49be9..1c462381c225eea31346ec4f19145e3fd449caab 100644 (file)
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -20,6 +20,7 @@
  
  #else
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopud.h>
  #include <asm/memory.h>
  #include <asm/pgtable-hwdef.h>
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c

index c9a2103faeb9acf82f0c26164085506f14015822..314eb6abe1ff9879272fdae542c2e0043f3759eb 100644 (file)
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -221,6 +221,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
         case KVM_CAP_MAX_VCPUS:
                 r = KVM_MAX_VCPUS;
                 break;
+       case KVM_CAP_NR_MEMSLOTS:
+               r = KVM_USER_MEM_SLOTS;
+               break;
         case KVM_CAP_MSI_DEVID:
                 if (!kvm)
                         r = -EINVAL;
@@ -1121,6 +1124,9 @@ static void cpu_hyp_reinit(void)
                 if (__hyp_get_vectors() == hyp_default_vectors)
                         cpu_init_hyp_mode(NULL);
         }
+
+       if (vgic_present)
+               kvm_vgic_init_cpu_hardware();
  }
  
  static void cpu_hyp_reset(void)
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c

index 4e40d1955e35341b7756efe72f2da6bf2360b224..96af65a30d78b1e09182d8e41f8b8e3ff4aae81e 100644 (file)
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
         return 1;
  }
  
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+       u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+       kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
+                     hsr);
+
+       kvm_inject_undefined(vcpu);
+       return 1;
+}
+
  static exit_handle_fn arm_exit_handlers[] = {
+       [0 ... HSR_EC_MAX]      = kvm_handle_unknown_ec,
         [HSR_EC_WFI]            = kvm_handle_wfx,
         [HSR_EC_CP15_32]        = kvm_handle_cp15_32,
         [HSR_EC_CP15_64]        = kvm_handle_cp15_64,
@@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
  {
         u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
  
-       if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
-           !arm_exit_handlers[hsr_ec]) {
-               kvm_err("Unknown exception class: hsr: %#08x\n",
-                       (unsigned int)kvm_vcpu_get_hsr(vcpu));
-               BUG();
-       }
-
         return arm_exit_handlers[hsr_ec];
  }
  
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c

index 962616fd4ddd633289e98b99d99dea9fb758b820..582a972371cf886b8581de1abcfe609a53540245 100644 (file)
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -292,11 +292,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
         phys_addr_t addr = start, end = start + size;
         phys_addr_t next;
  
+       assert_spin_locked(&kvm->mmu_lock);
         pgd = kvm->arch.pgd + stage2_pgd_index(addr);
         do {
                 next = stage2_pgd_addr_end(addr, end);
                 if (!stage2_pgd_none(*pgd))
                         unmap_stage2_puds(kvm, pgd, addr, next);
+               /*
+                * If the range is too large, release the kvm->mmu_lock
+                * to prevent starvation and lockup detector warnings.
+                */
+               if (next != end)
+                       cond_resched_lock(&kvm->mmu_lock);
         } while (pgd++, addr = next, addr != end);
  }
  
@@ -803,6 +810,7 @@ void stage2_unmap_vm(struct kvm *kvm)
         int idx;
  
         idx = srcu_read_lock(&kvm->srcu);
+       down_read(&current->mm->mmap_sem);
         spin_lock(&kvm->mmu_lock);
  
         slots = kvm_memslots(kvm);
@@ -810,6 +818,7 @@ void stage2_unmap_vm(struct kvm *kvm)
                 stage2_unmap_memslot(kvm, memslot);
  
         spin_unlock(&kvm->mmu_lock);
+       up_read(&current->mm->mmap_sem);
         srcu_read_unlock(&kvm->srcu, idx);
  }
  
@@ -829,7 +838,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
         if (kvm->arch.pgd == NULL)
                 return;
  
+       spin_lock(&kvm->mmu_lock);
         unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+       spin_unlock(&kvm->mmu_lock);
+
         /* Free the HW pgd, one page at a time */
         free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
         kvm->arch.pgd = NULL;
@@ -1801,6 +1813,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
             (KVM_PHYS_SIZE >> PAGE_SHIFT))
                 return -EFAULT;
  
+       down_read(&current->mm->mmap_sem);
         /*
          * A memory region could potentially cover multiple VMAs, and any holes
          * between them, so iterate over all of them to find out if we can map
@@ -1844,8 +1857,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                         pa += vm_start - vma->vm_start;
  
                         /* IO region dirty page logging not allowed */
-                       if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
-                               return -EINVAL;
+                       if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
  
                         ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
                                                     vm_end - vm_start,
@@ -1857,7 +1872,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
         } while (hva < reg_end);
  
         if (change == KVM_MR_FLAGS_ONLY)
-               return ret;
+               goto out;
  
         spin_lock(&kvm->mmu_lock);
         if (ret)
@@ -1865,6 +1880,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
         else
                 stage2_flush_memslot(kvm, memslot);
         spin_unlock(&kvm->mmu_lock);
+out:
+       up_read(&current->mm->mmap_sem);
         return ret;
  }
  
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c

index 3d89b7905bd903687b481fccab7249382c2428c6..a277981f414d8dd9433569c529068f206daab81f 100644 (file)
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -289,6 +289,22 @@ static void at91_ddr_standby(void)
                 at91_ramc_write(1, AT91_DDRSDRC_LPR, saved_lpr1);
  }
  
+static void sama5d3_ddr_standby(void)
+{
+       u32 lpr0;
+       u32 saved_lpr0;
+
+       saved_lpr0 = at91_ramc_read(0, AT91_DDRSDRC_LPR);
+       lpr0 = saved_lpr0 & ~AT91_DDRSDRC_LPCB;
+       lpr0 |= AT91_DDRSDRC_LPCB_POWER_DOWN;
+
+       at91_ramc_write(0, AT91_DDRSDRC_LPR, lpr0);
+
+       cpu_do_idle();
+
+       at91_ramc_write(0, AT91_DDRSDRC_LPR, saved_lpr0);
+}
+
  /* We manage both DDRAM/SDRAM controllers, we need more than one value to
   * remember.
   */
@@ -323,7 +339,7 @@ static const struct of_device_id const ramc_ids[] __initconst = {
         { .compatible = "atmel,at91rm9200-sdramc", .data = at91rm9200_standby },
         { .compatible = "atmel,at91sam9260-sdramc", .data = at91sam9_sdram_standby },
         { .compatible = "atmel,at91sam9g45-ddramc", .data = at91_ddr_standby },
-       { .compatible = "atmel,sama5d3-ddramc", .data = at91_ddr_standby },
+       { .compatible = "atmel,sama5d3-ddramc", .data = sama5d3_ddr_standby },
         { /*sentinel*/ }
  };
  
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile

index 093458b62c8dadbcc3c7cc1c3b66d84e59af3d8d..c89757abb0ae4bc82adf923a5947f2c0fbefb42c 100644 (file)
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -241,6 +241,3 @@ obj-$(CONFIG_MACH_OMAP2_TUSB6010)   += usb-tusb6010.o
  
  onenand-$(CONFIG_MTD_ONENAND_OMAP2)    := gpmc-onenand.o
  obj-y                                  += $(onenand-m) $(onenand-y)
-
-nand-$(CONFIG_MTD_NAND_OMAP2)          := gpmc-nand.o
-obj-y                                  += $(nand-m) $(nand-y)
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h

index c4f2ace91ea22d1a6a344388498ec8da30906e63..3089d3bfa19b4a5c595e6872824c81f103d4c5e2 100644 (file)
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -270,6 +270,7 @@ extern const struct smp_operations omap4_smp_ops;
  extern int omap4_mpuss_init(void);
  extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state);
  extern int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state);
+extern u32 omap4_get_cpu1_ns_pa_addr(void);
  #else
  static inline int omap4_enter_lowpower(unsigned int cpu,
                                         unsigned int power_state)
diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c

deleted file mode 100644 (file)

index f6ac027..0000000
--- a/arch/arm/mach-omap2/gpmc-nand.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * gpmc-nand.c
- *
- * Copyright (C) 2009 Texas Instruments
- * Vimal Singh <vimalsingh@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/omap-gpmc.h>
-#include <linux/mtd/nand.h>
-#include <linux/platform_data/mtd-nand-omap2.h>
-
-#include <asm/mach/flash.h>
-
-#include "soc.h"
-
-/* minimum size for IO mapping */
-#define        NAND_IO_SIZE    4
-
-static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt)
-{
-       /* platforms which support all ECC schemes */
-       if (soc_is_am33xx() || soc_is_am43xx() || cpu_is_omap44xx() ||
-                soc_is_omap54xx() || soc_is_dra7xx())
-               return 1;
-
-       if (ecc_opt == OMAP_ECC_BCH4_CODE_HW_DETECTION_SW ||
-                ecc_opt == OMAP_ECC_BCH8_CODE_HW_DETECTION_SW) {
-               if (cpu_is_omap24xx())
-                       return 0;
-               else if (cpu_is_omap3630() && (GET_OMAP_REVISION() == 0))
-                       return 0;
-               else
-                       return 1;
-       }
-
-       /* OMAP3xxx do not have ELM engine, so cannot support ECC schemes
-        * which require H/W based ECC error detection */
-       if ((cpu_is_omap34xx() || cpu_is_omap3630()) &&
-           ((ecc_opt == OMAP_ECC_BCH4_CODE_HW) ||
-                (ecc_opt == OMAP_ECC_BCH8_CODE_HW)))
-               return 0;
-
-       /* legacy platforms support only HAM1 (1-bit Hamming) ECC scheme */
-       if (ecc_opt == OMAP_ECC_HAM1_CODE_HW ||
-           ecc_opt == OMAP_ECC_HAM1_CODE_SW)
-               return 1;
-       else
-               return 0;
-}
-
-/* This function will go away once the device-tree convertion is complete */
-static void gpmc_set_legacy(struct omap_nand_platform_data *gpmc_nand_data,
-                           struct gpmc_settings *s)
-{
-       /* Enable RD PIN Monitoring Reg */
-       if (gpmc_nand_data->dev_ready) {
-               s->wait_on_read = true;
-               s->wait_on_write = true;
-       }
-
-       if (gpmc_nand_data->devsize == NAND_BUSWIDTH_16)
-               s->device_width = GPMC_DEVWIDTH_16BIT;
-       else
-               s->device_width = GPMC_DEVWIDTH_8BIT;
-}
-
-int gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data,
-                  struct gpmc_timings *gpmc_t)
-{
-       int err = 0;
-       struct gpmc_settings s;
-       struct platform_device *pdev;
-       struct resource gpmc_nand_res[] = {
-               { .flags = IORESOURCE_MEM, },
-               { .flags = IORESOURCE_IRQ, },
-               { .flags = IORESOURCE_IRQ, },
-       };
-
-       BUG_ON(gpmc_nand_data->cs >= GPMC_CS_NUM);
-
-       err = gpmc_cs_request(gpmc_nand_data->cs, NAND_IO_SIZE,
-                             (unsigned long *)&gpmc_nand_res[0].start);
-       if (err < 0) {
-               pr_err("omap2-gpmc: Cannot request GPMC CS %d, error %d\n",
-                      gpmc_nand_data->cs, err);
-               return err;
-       }
-       gpmc_nand_res[0].end = gpmc_nand_res[0].start + NAND_IO_SIZE - 1;
-       gpmc_nand_res[1].start = gpmc_get_client_irq(GPMC_IRQ_FIFOEVENTENABLE);
-       gpmc_nand_res[2].start = gpmc_get_client_irq(GPMC_IRQ_COUNT_EVENT);
-
-       memset(&s, 0, sizeof(struct gpmc_settings));
-       gpmc_set_legacy(gpmc_nand_data, &s);
-
-       s.device_nand = true;
-
-       if (gpmc_t) {
-               err = gpmc_cs_set_timings(gpmc_nand_data->cs, gpmc_t, &s);
-               if (err < 0) {
-                       pr_err("omap2-gpmc: Unable to set gpmc timings: %d\n",
-                              err);
-                       return err;
-               }
-       }
-
-       err = gpmc_cs_program_settings(gpmc_nand_data->cs, &s);
-       if (err < 0)
-               goto out_free_cs;
-
-       err = gpmc_configure(GPMC_CONFIG_WP, 0);
-       if (err < 0)
-               goto out_free_cs;
-
-       if (!gpmc_hwecc_bch_capable(gpmc_nand_data->ecc_opt)) {
-               pr_err("omap2-nand: Unsupported NAND ECC scheme selected\n");
-               err = -EINVAL;
-               goto out_free_cs;
-       }
-
-
-       pdev = platform_device_alloc("omap2-nand", gpmc_nand_data->cs);
-       if (pdev) {
-               err = platform_device_add_resources(pdev, gpmc_nand_res,
-                                                   ARRAY_SIZE(gpmc_nand_res));
-               if (!err)
-                       pdev->dev.platform_data = gpmc_nand_data;
-       } else {
-               err = -ENOMEM;
-       }
-       if (err)
-               goto out_free_pdev;
-
-       err = platform_device_add(pdev);
-       if (err) {
-               dev_err(&pdev->dev, "Unable to register NAND device\n");
-               goto out_free_pdev;
-       }
-
-       return 0;
-
-out_free_pdev:
-       platform_device_put(pdev);
-out_free_cs:
-       gpmc_cs_free(gpmc_nand_data->cs);
-
-       return err;
-}
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c

index 8633c703546a65c2e5b0071ffca0d5a12b664884..2944af82055847935462da4035a73b513c5795a6 100644 (file)
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -367,7 +367,7 @@ static int gpmc_onenand_setup(void __iomem *onenand_base, int *freq_ptr)
         return ret;
  }
  
-void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
+int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
  {
         int err;
         struct device *dev = &gpmc_onenand_device.dev;
@@ -393,15 +393,17 @@ void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
         if (err < 0) {
                 dev_err(dev, "Cannot request GPMC CS %d, error %d\n",
                         gpmc_onenand_data->cs, err);
-               return;
+               return err;
         }
  
         gpmc_onenand_resource.end = gpmc_onenand_resource.start +
                                                         ONENAND_IO_SIZE - 1;
  
-       if (platform_device_register(&gpmc_onenand_device) < 0) {
+       err = platform_device_register(&gpmc_onenand_device);
+       if (err) {
                 dev_err(dev, "Unable to register OneNAND device\n");
                 gpmc_cs_free(gpmc_onenand_data->cs);
-               return;
         }
+
+       return err;
  }
diff --git a/arch/arm/mach-omap2/omap-headsmp.S b/arch/arm/mach-omap2/omap-headsmp.S

index fe36ce2734d47a81f8dd5cbfdf72e9f6717df903..4c6f14cf92a82e3dbf8d7ceeb985686feb79e924 100644 (file)
--- a/arch/arm/mach-omap2/omap-headsmp.S
+++ b/arch/arm/mach-omap2/omap-headsmp.S
@@ -17,6 +17,7 @@
  
  #include <linux/linkage.h>
  #include <linux/init.h>
+#include <asm/assembler.h>
  
  #include "omap44xx.h"
  
@@ -66,7 +67,7 @@ wait_2:       ldr     r2, =AUX_CORE_BOOT0_PA  @ read from AuxCoreBoot0
         cmp     r0, r4
         bne     wait_2
         ldr     r12, =API_HYP_ENTRY
-       adr     r0, hyp_boot
+       badr    r0, hyp_boot
         smc     #0
  hyp_boot:
         b       omap_secondary_startup
diff --git a/arch/arm/mach-omap2/omap-hotplug.c b/arch/arm/mach-omap2/omap-hotplug.c

index d3fb5661bb5d4bc098b05e36f12278301b4c5597..433db6d0b07396288f3b1e38c57d0a64b2ad1e66 100644 (file)
--- a/arch/arm/mach-omap2/omap-hotplug.c
+++ b/arch/arm/mach-omap2/omap-hotplug.c
@@ -50,7 +50,7 @@ void omap4_cpu_die(unsigned int cpu)
                 omap4_hotplug_cpu(cpu, PWRDM_POWER_OFF);
  
                 if (omap_secure_apis_support())
-                       boot_cpu = omap_read_auxcoreboot0();
+                       boot_cpu = omap_read_auxcoreboot0() >> 9;
                 else
                         boot_cpu =
                                 readl_relaxed(base + OMAP_AUX_CORE_BOOT_0) >> 5;
diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c

index 113ab2dd2ee91ccf9c238813bd6c4d7561ff7d97..03ec6d307c8235fc907a599322991b1efd6c27bf 100644 (file)
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -64,6 +64,7 @@
  #include "prm-regbits-44xx.h"
  
  static void __iomem *sar_base;
+static u32 old_cpu1_ns_pa_addr;
  
  #if defined(CONFIG_PM) && defined(CONFIG_SMP)
  
@@ -212,6 +213,11 @@ static void __init save_l2x0_context(void)
  {}
  #endif
  
+u32 omap4_get_cpu1_ns_pa_addr(void)
+{
+       return old_cpu1_ns_pa_addr;
+}
+
  /**
   * omap4_enter_lowpower: OMAP4 MPUSS Low Power Entry Function
   * The purpose of this function is to manage low power programming
@@ -460,22 +466,30 @@ int __init omap4_mpuss_init(void)
  void __init omap4_mpuss_early_init(void)
  {
         unsigned long startup_pa;
+       void __iomem *ns_pa_addr;
  
-       if (!(cpu_is_omap44xx() || soc_is_omap54xx()))
+       if (!(soc_is_omap44xx() || soc_is_omap54xx()))
                 return;
  
         sar_base = omap4_get_sar_ram_base();
  
-       if (cpu_is_omap443x())
+       /* Save old NS_PA_ADDR for validity checks later on */
+       if (soc_is_omap44xx())
+               ns_pa_addr = sar_base + CPU1_WAKEUP_NS_PA_ADDR_OFFSET;
+       else
+               ns_pa_addr = sar_base + OMAP5_CPU1_WAKEUP_NS_PA_ADDR_OFFSET;
+       old_cpu1_ns_pa_addr = readl_relaxed(ns_pa_addr);
+
+       if (soc_is_omap443x())
                 startup_pa = __pa_symbol(omap4_secondary_startup);
-       else if (cpu_is_omap446x())
+       else if (soc_is_omap446x())
                 startup_pa = __pa_symbol(omap4460_secondary_startup);
         else if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE)
                 startup_pa = __pa_symbol(omap5_secondary_hyp_startup);
         else
                 startup_pa = __pa_symbol(omap5_secondary_startup);
  
-       if (cpu_is_omap44xx())
+       if (soc_is_omap44xx())
                 writel_relaxed(startup_pa, sar_base +
                                CPU1_WAKEUP_NS_PA_ADDR_OFFSET);
         else
diff --git a/arch/arm/mach-omap2/omap-smc.S b/arch/arm/mach-omap2/omap-smc.S

index fd90125bffc70ad6719bfc2b9f3e22d21b595367..72506e6cf9e7423f946d83e61b5aca66d2fee0c0 100644 (file)
--- a/arch/arm/mach-omap2/omap-smc.S
+++ b/arch/arm/mach-omap2/omap-smc.S
@@ -94,6 +94,5 @@ ENTRY(omap_read_auxcoreboot0)
         ldr     r12, =0x103
         dsb
         smc     #0
-       mov     r0, r0, lsr #9
         ldmfd   sp!, {r2-r12, pc}
  ENDPROC(omap_read_auxcoreboot0)
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c

index 003353b0b7944d9363fb6446e683314823cd82f9..3faf454ba4871c8f60d5e12e912a1ff9dd9af271 100644 (file)
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -21,6 +21,7 @@
  #include <linux/io.h>
  #include <linux/irqchip/arm-gic.h>
  
+#include <asm/sections.h>
  #include <asm/smp_scu.h>
  #include <asm/virt.h>
  
@@ -40,10 +41,14 @@
  
  #define OMAP5_CORE_COUNT       0x2
  
+#define AUX_CORE_BOOT0_GP_RELEASE      0x020
+#define AUX_CORE_BOOT0_HS_RELEASE      0x200
+
  struct omap_smp_config {
         unsigned long cpu1_rstctrl_pa;
         void __iomem *cpu1_rstctrl_va;
         void __iomem *scu_base;
+       void __iomem *wakeupgen_base;
         void *startup_addr;
  };
  
@@ -140,7 +145,6 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
         static struct clockdomain *cpu1_clkdm;
         static bool booted;
         static struct powerdomain *cpu1_pwrdm;
-       void __iomem *base = omap_get_wakeupgen_base();
  
         /*
          * Set synchronisation state between this boot processor
@@ -155,9 +159,11 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
          * A barrier is added to ensure that write buffer is drained
          */
         if (omap_secure_apis_support())
-               omap_modify_auxcoreboot0(0x200, 0xfffffdff);
+               omap_modify_auxcoreboot0(AUX_CORE_BOOT0_HS_RELEASE,
+                                        0xfffffdff);
         else
-               writel_relaxed(0x20, base + OMAP_AUX_CORE_BOOT_0);
+               writel_relaxed(AUX_CORE_BOOT0_GP_RELEASE,
+                              cfg.wakeupgen_base + OMAP_AUX_CORE_BOOT_0);
  
         if (!cpu1_clkdm && !cpu1_pwrdm) {
                 cpu1_clkdm = clkdm_lookup("mpu1_clkdm");
@@ -261,9 +267,72 @@ static void __init omap4_smp_init_cpus(void)
                 set_cpu_possible(i, true);
  }
  
+/*
+ * For now, just make sure the start-up address is not within the booting
+ * kernel space as that means we just overwrote whatever secondary_startup()
+ * code there was.
+ */
+static bool __init omap4_smp_cpu1_startup_valid(unsigned long addr)
+{
+       if ((addr >= __pa(PAGE_OFFSET)) && (addr <= __pa(__bss_start)))
+               return false;
+
+       return true;
+}
+
+/*
+ * We may need to reset CPU1 before configuring, otherwise kexec boot can end
+ * up trying to use old kernel startup address or suspend-resume will
+ * occasionally fail to bring up CPU1 on 4430 if CPU1 fails to enter deeper
+ * idle states.
+ */
+static void __init omap4_smp_maybe_reset_cpu1(struct omap_smp_config *c)
+{
+       unsigned long cpu1_startup_pa, cpu1_ns_pa_addr;
+       bool needs_reset = false;
+       u32 released;
+
+       if (omap_secure_apis_support())
+               released = omap_read_auxcoreboot0() & AUX_CORE_BOOT0_HS_RELEASE;
+       else
+               released = readl_relaxed(cfg.wakeupgen_base +
+                                        OMAP_AUX_CORE_BOOT_0) &
+                                               AUX_CORE_BOOT0_GP_RELEASE;
+       if (released) {
+               pr_warn("smp: CPU1 not parked?\n");
+
+               return;
+       }
+
+       cpu1_startup_pa = readl_relaxed(cfg.wakeupgen_base +
+                                       OMAP_AUX_CORE_BOOT_1);
+       cpu1_ns_pa_addr = omap4_get_cpu1_ns_pa_addr();
+
+       /* Did the configured secondary_startup() get overwritten? */
+       if (!omap4_smp_cpu1_startup_valid(cpu1_startup_pa))
+               needs_reset = true;
+
+       /*
+        * If omap4 or 5 has NS_PA_ADDR configured, CPU1 may be in a
+        * deeper idle state in WFI and will wake to an invalid address.
+        */
+       if ((soc_is_omap44xx() || soc_is_omap54xx()) &&
+           !omap4_smp_cpu1_startup_valid(cpu1_ns_pa_addr))
+               needs_reset = true;
+
+       if (!needs_reset || !c->cpu1_rstctrl_va)
+               return;
+
+       pr_info("smp: CPU1 parked within kernel, needs reset (0x%lx 0x%lx)\n",
+               cpu1_startup_pa, cpu1_ns_pa_addr);
+
+       writel_relaxed(1, c->cpu1_rstctrl_va);
+       readl_relaxed(c->cpu1_rstctrl_va);
+       writel_relaxed(0, c->cpu1_rstctrl_va);
+}
+
  static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
  {
-       void __iomem *base = omap_get_wakeupgen_base();
         const struct omap_smp_config *c = NULL;
  
         if (soc_is_omap443x())
@@ -281,6 +350,7 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
         /* Must preserve cfg.scu_base set earlier */
         cfg.cpu1_rstctrl_pa = c->cpu1_rstctrl_pa;
         cfg.startup_addr = c->startup_addr;
+       cfg.wakeupgen_base = omap_get_wakeupgen_base();
  
         if (soc_is_dra74x() || soc_is_omap54xx()) {
                 if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE)
@@ -299,15 +369,7 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
         if (cfg.scu_base)
                 scu_enable(cfg.scu_base);
  
-       /*
-        * Reset CPU1 before configuring, otherwise kexec will
-        * end up trying to use old kernel startup address.
-        */
-       if (cfg.cpu1_rstctrl_va) {
-               writel_relaxed(1, cfg.cpu1_rstctrl_va);
-               readl_relaxed(cfg.cpu1_rstctrl_va);
-               writel_relaxed(0, cfg.cpu1_rstctrl_va);
-       }
+       omap4_smp_maybe_reset_cpu1(&cfg);
  
         /*
          * Write the address of secondary startup routine into the
@@ -319,7 +381,7 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
                 omap_auxcoreboot_addr(__pa_symbol(cfg.startup_addr));
         else
                 writel_relaxed(__pa_symbol(cfg.startup_addr),
-                              base + OMAP_AUX_CORE_BOOT_1);
+                              cfg.wakeupgen_base + OMAP_AUX_CORE_BOOT_1);
  }
  
  const struct smp_operations omap4_smp_ops __initconst = {
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c

index e920dd83e443753ccced325ce19c48c6bca398c6..f989145480c8fcd0c947beaadeefe6955896a434 100644 (file)
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -222,6 +222,14 @@ static int _omap_device_notifier_call(struct notifier_block *nb,
                                 dev_err(dev, "failed to idle\n");
                 }
                 break;
+       case BUS_NOTIFY_BIND_DRIVER:
+               od = to_omap_device(pdev);
+               if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED) &&
+                   pm_runtime_status_suspended(dev)) {
+                       od->_driver_status = BUS_NOTIFY_BIND_DRIVER;
+                       pm_runtime_set_active(dev);
+               }
+               break;
         case BUS_NOTIFY_ADD_DEVICE:
                 if (pdev->dev.of_node)
                         omap_device_build_from_dt(pdev);
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c

index 56f917ec8621e8d5d4a9410975ec454aa802414a..1435fee39a89ba18239291859c97f0f333ca877f 100644 (file)
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -2112,11 +2112,20 @@ static struct omap_hwmod_ocp_if omap3_l4_core__i2c3 = {
  };
  
  /* L4 CORE -> SR1 interface */
+static struct omap_hwmod_addr_space omap3_sr1_addr_space[] = {
+       {
+               .pa_start       = OMAP34XX_SR1_BASE,
+               .pa_end         = OMAP34XX_SR1_BASE + SZ_1K - 1,
+               .flags          = ADDR_TYPE_RT,
+       },
+       { },
+};
  
  static struct omap_hwmod_ocp_if omap34xx_l4_core__sr1 = {
         .master         = &omap3xxx_l4_core_hwmod,
         .slave          = &omap34xx_sr1_hwmod,
         .clk            = "sr_l4_ick",
+       .addr           = omap3_sr1_addr_space,
         .user           = OCP_USER_MPU,
  };
  
@@ -2124,15 +2133,25 @@ static struct omap_hwmod_ocp_if omap36xx_l4_core__sr1 = {
         .master         = &omap3xxx_l4_core_hwmod,
         .slave          = &omap36xx_sr1_hwmod,
         .clk            = "sr_l4_ick",
+       .addr           = omap3_sr1_addr_space,
         .user           = OCP_USER_MPU,
  };
  
  /* L4 CORE -> SR1 interface */
+static struct omap_hwmod_addr_space omap3_sr2_addr_space[] = {
+       {
+               .pa_start       = OMAP34XX_SR2_BASE,
+               .pa_end         = OMAP34XX_SR2_BASE + SZ_1K - 1,
+               .flags          = ADDR_TYPE_RT,
+       },
+       { },
+};
  
  static struct omap_hwmod_ocp_if omap34xx_l4_core__sr2 = {
         .master         = &omap3xxx_l4_core_hwmod,
         .slave          = &omap34xx_sr2_hwmod,
         .clk            = "sr_l4_ick",
+       .addr           = omap3_sr2_addr_space,
         .user           = OCP_USER_MPU,
  };
  
@@ -2140,6 +2159,7 @@ static struct omap_hwmod_ocp_if omap36xx_l4_core__sr2 = {
         .master         = &omap3xxx_l4_core_hwmod,
         .slave          = &omap36xx_sr2_hwmod,
         .clk            = "sr_l4_ick",
+       .addr           = omap3_sr2_addr_space,
         .user           = OCP_USER_MPU,
  };
  
@@ -3111,16 +3131,20 @@ static struct omap_hwmod_ocp_if *omap3xxx_dss_hwmod_ocp_ifs[] __initdata = {
   * Return: 0 if device named @dev_name is not likely to be accessible,
   * or 1 if it is likely to be accessible.
   */
-static int __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
-                                                      const char *dev_name)
+static bool __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
+                                                       const char *dev_name)
  {
+       struct device_node *node;
+       bool available;
+
         if (!bus)
-               return (omap_type() == OMAP2_DEVICE_TYPE_GP) ? 1 : 0;
+               return omap_type() == OMAP2_DEVICE_TYPE_GP;
  
-       if (of_device_is_available(of_find_node_by_name(bus, dev_name)))
-               return 1;
+       node = of_get_child_by_name(bus, dev_name);
+       available = of_device_is_available(node);
+       of_node_put(node);
  
-       return 0;
+       return available;
  }
  
  int __init omap3xxx_hwmod_init(void)
@@ -3189,15 +3213,20 @@ int __init omap3xxx_hwmod_init(void)
  
         if (h_sham && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "sham")) {
                 r = omap_hwmod_register_links(h_sham);
-               if (r < 0)
+               if (r < 0) {
+                       of_node_put(bus);
                         return r;
+               }
         }
  
         if (h_aes && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "aes")) {
                 r = omap_hwmod_register_links(h_aes);
-               if (r < 0)
+               if (r < 0) {
+                       of_node_put(bus);
                         return r;
+               }
         }
+       of_node_put(bus);
  
         /*
          * Register hwmod links specific to certain ES levels of a
diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig

index 633442ad4e4c16d4c80564410167b53977e7575f..2a7bb6ccdcb7eb219f515c6e0f1ba2bfe573a349 100644 (file)
--- a/arch/arm/mach-orion5x/Kconfig
+++ b/arch/arm/mach-orion5x/Kconfig
@@ -6,6 +6,7 @@ menuconfig ARCH_ORION5X
         select GPIOLIB
         select MVEBU_MBUS
         select PCI
+       select PHYLIB if NETDEVICES
         select PLAT_ORION_LEGACY
         help
           Support for the following Marvell Orion 5x series SoCs:
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c

index 63eabb06f9f17551695e89efc0ed59e0ce6ba186..475811f5383afc40d5e00ab15e03fca6d88d8866 100644 (file)
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -935,13 +935,31 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add
         __arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
  }
  
+/*
+ * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
+ * that the intention is to allow exporting memory allocated via the
+ * coherent DMA APIs through the dma_buf API, which only accepts a
+ * scattertable.  This presents a couple of problems:
+ * 1. Not all memory allocated via the coherent DMA APIs is backed by
+ *    a struct page
+ * 2. Passing coherent DMA memory into the streaming APIs is not allowed
+ *    as we will try to flush the memory through a different alias to that
+ *    actually being used (and the flushes are redundant.)
+ */
  int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
                  void *cpu_addr, dma_addr_t handle, size_t size,
                  unsigned long attrs)
  {
-       struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
+       unsigned long pfn = dma_to_pfn(dev, handle);
+       struct page *page;
         int ret;
  
+       /* If the PFN is not valid, we do not have a struct page */
+       if (!pfn_valid(pfn))
+               return -ENXIO;
+
+       page = pfn_to_page(pfn);
+
         ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
         if (unlikely(ret))
                 return ret;
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c

index 3b5c7aaf9c76c522f8c6cbd7890c5105b3a3e1d4..33a45bd9686012c2547b218133f59a0169fea33d 100644 (file)
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -303,7 +303,10 @@ static inline void set_vbar(unsigned long val)
   */
  static inline bool security_extensions_enabled(void)
  {
-       return !!cpuid_feature_extract(CPUID_EXT_PFR1, 4);
+       /* Check CPUID Identification Scheme before ID_PFR1 read */
+       if ((read_cpuid_id() & 0x000f0000) == 0x000f0000)
+               return !!cpuid_feature_extract(CPUID_EXT_PFR1, 4);
+       return 0;
  }
  
  static unsigned long __init setup_vectors_base(void)
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c

index 9255b6d67ba5e3a3b3586639cadc86342e8b4b04..aff6994950ba6db7eb6579a90cc94e5b2bfc7329 100644 (file)
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -468,6 +468,7 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,
                     eth_data, &orion_ge11);
  }
  
+#ifdef CONFIG_ARCH_ORION5X
  /*****************************************************************************
   * Ethernet switch
   ****************************************************************************/
@@ -480,6 +481,9 @@ void __init orion_ge00_switch_init(struct dsa_chip_data *d)
         struct mdio_board_info *bd;
         unsigned int i;
  
+       if (!IS_BUILTIN(CONFIG_PHYLIB))
+               return;
+
         for (i = 0; i < ARRAY_SIZE(d->port_names); i++)
                 if (!strcmp(d->port_names[i], "cpu"))
                         break;
@@ -493,6 +497,7 @@ void __init orion_ge00_switch_init(struct dsa_chip_data *d)
  
         mdiobus_register_board_info(&orion_ge00_switch_board_info, 1);
  }
+#endif
  
  /*****************************************************************************
   * I2C
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c

index b6dc9d838a9a39b9ff2e22e9ed371a522ead0fa7..ad1f4e6a9e339374219d8a74ccd6bfbc01b41f17 100644 (file)
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -266,11 +266,20 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
  #endif
  
         if (p) {
-               if (cur) {
+               if (!p->ainsn.insn_check_cc(regs->ARM_cpsr)) {
+                       /*
+                        * Probe hit but conditional execution check failed,
+                        * so just skip the instruction and continue as if
+                        * nothing had happened.
+                        * In this case, we can skip recursing check too.
+                        */
+                       singlestep_skip(p, regs);
+               } else if (cur) {
                         /* Kprobe is pending, so we're recursing. */
                         switch (kcb->kprobe_status) {
                         case KPROBE_HIT_ACTIVE:
                         case KPROBE_HIT_SSDONE:
+                       case KPROBE_HIT_SS:
                                 /* A pre- or post-handler probe got us here. */
                                 kprobes_inc_nmissed_count(p);
                                 save_previous_kprobe(kcb);
@@ -279,11 +288,16 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
                                 singlestep(p, regs, kcb);
                                 restore_previous_kprobe(kcb);
                                 break;
+                       case KPROBE_REENTER:
+                               /* A nested probe was hit in FIQ, it is a BUG */
+                               pr_warn("Unrecoverable kprobe detected at %p.\n",
+                                       p->addr);
+                               /* fall through */
                         default:
                                 /* impossible cases */
                                 BUG();
                         }
-               } else if (p->ainsn.insn_check_cc(regs->ARM_cpsr)) {
+               } else {
                         /* Probe hit and conditional execution check ok. */
                         set_current_kprobe(p);
                         kcb->kprobe_status = KPROBE_HIT_ACTIVE;
@@ -304,13 +318,6 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
                                 }
                                 reset_current_kprobe();
                         }
-               } else {
-                       /*
-                        * Probe hit but conditional execution check failed,
-                        * so just skip the instruction and continue as if
-                        * nothing had happened.
-                        */
-                       singlestep_skip(p, regs);
                 }
         } else if (cur) {
                 /* We probably hit a jprobe.  Call its break handler. */
@@ -434,6 +441,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
         struct hlist_node *tmp;
         unsigned long flags, orig_ret_address = 0;
         unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+       kprobe_opcode_t *correct_ret_addr = NULL;
  
         INIT_HLIST_HEAD(&empty_rp);
         kretprobe_hash_lock(current, &head, &flags);
@@ -456,14 +464,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
                         /* another task is sharing our hash bucket */
                         continue;
  
+               orig_ret_address = (unsigned long)ri->ret_addr;
+
+               if (orig_ret_address != trampoline_address)
+                       /*
+                        * This is the real return address. Any other
+                        * instances associated with this task are for
+                        * other calls deeper on the call stack
+                        */
+                       break;
+       }
+
+       kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+       correct_ret_addr = ri->ret_addr;
+       hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+               if (ri->task != current)
+                       /* another task is sharing our hash bucket */
+                       continue;
+
+               orig_ret_address = (unsigned long)ri->ret_addr;
                 if (ri->rp && ri->rp->handler) {
                         __this_cpu_write(current_kprobe, &ri->rp->kp);
                         get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+                       ri->ret_addr = correct_ret_addr;
                         ri->rp->handler(ri, regs);
                         __this_cpu_write(current_kprobe, NULL);
                 }
  
-               orig_ret_address = (unsigned long)ri->ret_addr;
                 recycle_rp_inst(ri, &empty_rp);
  
                 if (orig_ret_address != trampoline_address)
@@ -475,7 +503,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
                         break;
         }
  
-       kretprobe_assert(ri, orig_ret_address, trampoline_address);
         kretprobe_hash_unlock(current, &flags);
  
         hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c

index c893726aa52d8da16e56d9df56ad1e6ca8685b29..1c98a87786ca768adb622f1720df442a272bfb75 100644 (file)
--- a/arch/arm/probes/kprobes/test-core.c
+++ b/arch/arm/probes/kprobes/test-core.c
@@ -977,7 +977,10 @@ static void coverage_end(void)
  void __naked __kprobes_test_case_start(void)
  {
         __asm__ __volatile__ (
-               "stmdb  sp!, {r4-r11}                           \n\t"
+               "mov    r2, sp                                  \n\t"
+               "bic    r3, r2, #7                              \n\t"
+               "mov    sp, r3                                  \n\t"
+               "stmdb  sp!, {r2-r11}                           \n\t"
                 "sub    sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
                 "bic    r0, lr, #1  @ r0 = inline data          \n\t"
                 "mov    r1, sp                                  \n\t"
@@ -997,7 +1000,8 @@ void __naked __kprobes_test_case_end_32(void)
                 "movne  pc, r0                                  \n\t"
                 "mov    r0, r4                                  \n\t"
                 "add    sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
-               "ldmia  sp!, {r4-r11}                           \n\t"
+               "ldmia  sp!, {r2-r11}                           \n\t"
+               "mov    sp, r2                                  \n\t"
                 "mov    pc, r0                                  \n\t"
         );
  }
@@ -1013,7 +1017,8 @@ void __naked __kprobes_test_case_end_16(void)
                 "bxne   r0                                      \n\t"
                 "mov    r0, r4                                  \n\t"
                 "add    sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
-               "ldmia  sp!, {r4-r11}                           \n\t"
+               "ldmia  sp!, {r2-r11}                           \n\t"
+               "mov    sp, r2                                  \n\t"
                 "bx     r0                                      \n\t"
         );
  }
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl

index 3c2cb5d5adfa4f17bab53005b7722ffe8add022e..0bb0e9c6376c4aab7bb1ad43c2bd4fce87cef943 100644 (file)
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -411,3 +411,4 @@
  394    common  pkey_mprotect           sys_pkey_mprotect
  395    common  pkey_alloc              sys_pkey_alloc
  396    common  pkey_free               sys_pkey_free
+397    common  statx                   sys_statx
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c

index ce18c91b50a1cbac3fb6d38af60c63af9b031185..f0325d96b97aed734f86deb3a9f5b3266b5a16ed 100644 (file)
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -198,6 +198,8 @@ static const struct dma_map_ops xen_swiotlb_dma_ops = {
         .unmap_page = xen_swiotlb_unmap_page,
         .dma_supported = xen_swiotlb_dma_supported,
         .set_dma_mask = xen_swiotlb_set_dma_mask,
+       .mmap = xen_swiotlb_dma_mmap,
+       .get_sgtable = xen_swiotlb_get_sgtable,
  };
  
  int __init xen_mm_init(void)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

index a39029b5414eb25f23f3409f74a4d84713a02c4f..3741859765cfe050d2c4a174d613ff90e1074be0 100644 (file)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -508,6 +508,16 @@ config QCOM_FALKOR_ERRATUM_1009
  
           If unsure, say Y.
  
+config QCOM_QDF2400_ERRATUM_0065
+       bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size"
+       default y
+       help
+         On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports
+         ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have
+         been indicated as 16Bytes (0xf), not 8Bytes (0x7).
+
+         If unsure, say Y.
+
  endmenu
  
  
@@ -1063,6 +1073,10 @@ config SYSVIPC_COMPAT
         def_bool y
         depends on COMPAT && SYSVIPC
  
+config KEYS_COMPAT
+       def_bool y
+       depends on COMPAT && KEYS
+
  endmenu
  
  menu "Power management options"
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi

index 1c64ea2d23f96a4a990f9f6e8cd4b22fd3fd3dd3..0565779e66fafd9755048c2a1c7f8ebc15533eff 100644 (file)
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
@@ -179,8 +179,10 @@
                 usbphy: phy@01c19400 {
                         compatible = "allwinner,sun50i-a64-usb-phy";
                         reg = <0x01c19400 0x14>,
+                             <0x01c1a800 0x4>,
                               <0x01c1b800 0x4>;
                         reg-names = "phy_ctrl",
+                                   "pmu0",
                                     "pmu1";
                         clocks = <&ccu CLK_USB_PHY0>,
                                  <&ccu CLK_USB_PHY1>;
diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi

index 9f9e203c09c5ad362ae00d5038d2e52c91043f39..bcb03fc3266552e22ce855ac81677584d0937e63 100644 (file)
--- a/arch/arm64/boot/dts/broadcom/ns2.dtsi
+++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi
@@ -114,6 +114,7 @@
         pcie0: pcie@20020000 {
                 compatible = "brcm,iproc-pcie";
                 reg = <0 0x20020000 0 0x1000>;
+               dma-coherent;
  
                 #interrupt-cells = <1>;
                 interrupt-map-mask = <0 0 0 0>;
@@ -144,6 +145,7 @@
         pcie4: pcie@50020000 {
                 compatible = "brcm,iproc-pcie";
                 reg = <0 0x50020000 0 0x1000>;
+               dma-coherent;
  
                 #interrupt-cells = <1>;
                 interrupt-map-mask = <0 0 0 0>;
@@ -174,6 +176,7 @@
         pcie8: pcie@60c00000 {
                 compatible = "brcm,iproc-pcie-paxc";
                 reg = <0 0x60c00000 0 0x1000>;
+               dma-coherent;
                 linux,pci-domain = <8>;
  
                 bus-range = <0x0 0x1>;
@@ -203,6 +206,7 @@
                               <0x61030000 0x100>;
                         reg-names = "amac_base", "idm_base", "nicpm_base";
                         interrupts = <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>;
+                       dma-coherent;
                         phy-handle = <&gphy0>;
                         phy-mode = "rgmii";
                         status = "disabled";
@@ -213,6 +217,7 @@
                         reg = <0x612c0000 0x445>;  /* PDC FS0 regs */
                         interrupts = <GIC_SPI 187 IRQ_TYPE_LEVEL_HIGH>;
                         #mbox-cells = <1>;
+                       dma-coherent;
                         brcm,rx-status-len = <32>;
                         brcm,use-bcm-hdr;
                 };
@@ -222,6 +227,7 @@
                         reg = <0x612e0000 0x445>;  /* PDC FS1 regs */
                         interrupts = <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>;
                         #mbox-cells = <1>;
+                       dma-coherent;
                         brcm,rx-status-len = <32>;
                         brcm,use-bcm-hdr;
                 };
@@ -231,6 +237,7 @@
                         reg = <0x61300000 0x445>;  /* PDC FS2 regs */
                         interrupts = <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>;
                         #mbox-cells = <1>;
+                       dma-coherent;
                         brcm,rx-status-len = <32>;
                         brcm,use-bcm-hdr;
                 };
@@ -240,6 +247,7 @@
                         reg = <0x61320000 0x445>;  /* PDC FS3 regs */
                         interrupts = <GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>;
                         #mbox-cells = <1>;
+                       dma-coherent;
                         brcm,rx-status-len = <32>;
                         brcm,use-bcm-hdr;
                 };
@@ -644,6 +652,7 @@
                 sata: ahci@663f2000 {
                         compatible = "brcm,iproc-ahci", "generic-ahci";
                         reg = <0x663f2000 0x1000>;
+                       dma-coherent;
                         reg-names = "ahci";
                         interrupts = <GIC_SPI 438 IRQ_TYPE_LEVEL_HIGH>;
                         #address-cells = <1>;
@@ -667,6 +676,7 @@
                         compatible = "brcm,sdhci-iproc-cygnus";
                         reg = <0x66420000 0x100>;
                         interrupts = <GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>;
+                       dma-coherent;
                         bus-width = <8>;
                         clocks = <&genpll_sw BCM_NS2_GENPLL_SW_SDIO_CLK>;
                         status = "disabled";
@@ -676,6 +686,7 @@
                         compatible = "brcm,sdhci-iproc-cygnus";
                         reg = <0x66430000 0x100>;
                         interrupts = <GIC_SPI 422 IRQ_TYPE_LEVEL_HIGH>;
+                       dma-coherent;
                         bus-width = <8>;
                         clocks = <&genpll_sw BCM_NS2_GENPLL_SW_SDIO_CLK>;
                         status = "disabled";
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h

index 05310ad8c5abec54a445cb2dfcd3df5fefcefe3a..f31c48d0cd6873f399a6d8f5f861e98fa3f66e10 100644 (file)
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void)
  static inline bool system_uses_ttbr0_pan(void)
  {
         return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
-               !cpus_have_cap(ARM64_HAS_PAN);
+               !cpus_have_const_cap(ARM64_HAS_PAN);
  }
  
  #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h

index 86c404171305abd290a6a85d7a5edd69c55ecd02..f6580d4afb0e0c4e242e5171ecda679d927286d2 100644 (file)
--- a/arch/arm64/include/asm/current.h
+++ b/arch/arm64/include/asm/current.h
@@ -3,8 +3,6 @@
  
  #include <linux/compiler.h>
  
-#include <asm/sysreg.h>
-
  #ifndef __ASSEMBLY__
  
  struct task_struct;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h

index f21fd38943708f1f6b69f1431fd07538ff26a993..e7705e7bb07b133de4da9b2809a152f94ceb0b4b 100644 (file)
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -30,8 +30,7 @@
  
  #define __KVM_HAVE_ARCH_INTC_INITIALIZED
  
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_USER_MEM_SLOTS 512
  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
  #define KVM_HALT_POLL_NS_DEFAULT 500000
  
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h

index 69b2fd41503ca3764fed84f9d404b2e32cbfc939..345a072b5856d41477fab1f450eda1e213d201d3 100644 (file)
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t;
  #define __pgprot(x)    ((pgprot_t) { (x) } )
  
  #if CONFIG_PGTABLE_LEVELS == 2
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  #elif CONFIG_PGTABLE_LEVELS == 3
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopud.h>
+#elif CONFIG_PGTABLE_LEVELS == 4
+#include <asm-generic/5level-fixup.h>
  #endif
  
  #endif /* __ASM_PGTABLE_TYPES_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h

index e78ac26324bd809dcd5fa2f7f45465daba8c158d..bdbeb06dc11ede112de28b09c7608b3d5b0ce23b 100644 (file)
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
  #define __ARM_NR_compat_cacheflush     (__ARM_NR_COMPAT_BASE+2)
  #define __ARM_NR_compat_set_tls                (__ARM_NR_COMPAT_BASE+5)
  
-#define __NR_compat_syscalls           394
+#define __NR_compat_syscalls           398
  #endif
  
  #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h

index b7e8ef16ff0dc62b94a042e4a0b70c9884e4cb16..c66b51aab1958816e6d137e9ae9dc0dc0378cf7f 100644 (file)
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -809,6 +809,14 @@ __SYSCALL(__NR_copy_file_range, sys_copy_file_range)
  __SYSCALL(__NR_preadv2, compat_sys_preadv2)
  #define __NR_pwritev2 393
  __SYSCALL(__NR_pwritev2, compat_sys_pwritev2)
+#define __NR_pkey_mprotect 394
+__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
+#define __NR_pkey_alloc 395
+__SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
+#define __NR_pkey_free 396
+__SYSCALL(__NR_pkey_free, sys_pkey_free)
+#define __NR_statx 397
+__SYSCALL(__NR_statx, sys_statx)
  
  /*
   * Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c

index 75a0f8acef669ce5560f627f516dae54168a898d..fd691087dc9ad58ff0ff007f5ea7191a3f879380 100644 (file)
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu)
  }
  
  /**
- * cpu_suspend() - function to enter a low-power idle state
+ * arm_cpuidle_suspend() - function to enter a low-power idle state
   * @arg: argument to pass to CPU suspend operations
   *
   * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c

index 769f24ef628c1e9ffd167e0b0e634151cf36de3d..d7e90d97f5c405f2c348eaaff331df818b27906a 100644 (file)
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -131,11 +131,15 @@ u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset)
         /*
          * The kernel Image should not extend across a 1GB/32MB/512MB alignment
          * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
-        * happens, increase the KASLR offset by the size of the kernel image.
+        * happens, increase the KASLR offset by the size of the kernel image
+        * rounded up by SWAPPER_BLOCK_SIZE.
          */
         if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) !=
-           (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT))
-               offset = (offset + (u64)(_end - _text)) & mask;
+           (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) {
+               u64 kimg_sz = _end - _text;
+               offset = (offset + round_up(kimg_sz, SWAPPER_BLOCK_SIZE))
+                               & mask;
+       }
  
         if (IS_ENABLED(CONFIG_KASAN))
                 /*
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c

index 2a07aae5b8a26431edcdfd2534a856474fc00b44..c5c45942fb6e6693c5f8c195bb6596e2fa9f6ff2 100644 (file)
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
         return 0;
  }
  
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
-                                      unsigned long val, void *data)
-{
-       return NOTIFY_DONE;
-}
-
  static void __kprobes kprobe_handler(struct pt_regs *regs)
  {
         struct kprobe *p, *cur_kprobe;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c

index ef1caae02110eef59c4abb5dd5cbb8051d9cc269..9b1036570586f95379f035b8606220144cdc7837 100644 (file)
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -944,7 +944,7 @@ static bool have_cpu_die(void)
  #ifdef CONFIG_HOTPLUG_CPU
         int any_cpu = raw_smp_processor_id();
  
-       if (cpu_ops[any_cpu]->cpu_die)
+       if (cpu_ops[any_cpu] && cpu_ops[any_cpu]->cpu_die)
                 return true;
  #endif
         return false;
diff --git a/arch/arm64/kernel/vdso/.gitignore b/arch/arm64/kernel/vdso/.gitignore

index b8cc94e9698b69f17f7d127d2663a21f64e5e2eb..f8b69d84238eb4f7743c23b425e1a4900cb9cd7f 100644 (file)
--- a/arch/arm64/kernel/vdso/.gitignore
+++ b/arch/arm64/kernel/vdso/.gitignore
@@ -1,2 +1 @@
  vdso.lds
-vdso-offsets.h
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c

index 1bfe30dfbfe77ffa2395528e008c058bd93b648d..fa1b18e364fc9d73cec1c0fdb6626285c1d7adc2 100644 (file)
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -135,7 +135,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
         return ret;
  }
  
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+       u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+       kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
+                     hsr, esr_get_class_string(hsr));
+
+       kvm_inject_undefined(vcpu);
+       return 1;
+}
+
  static exit_handle_fn arm_exit_handlers[] = {
+       [0 ... ESR_ELx_EC_MAX]  = kvm_handle_unknown_ec,
         [ESR_ELx_EC_WFx]        = kvm_handle_wfx,
         [ESR_ELx_EC_CP15_32]    = kvm_handle_cp15_32,
         [ESR_ELx_EC_CP15_64]    = kvm_handle_cp15_64,
@@ -162,13 +174,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
         u32 hsr = kvm_vcpu_get_hsr(vcpu);
         u8 hsr_ec = ESR_ELx_EC(hsr);
  
-       if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
-           !arm_exit_handlers[hsr_ec]) {
-               kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
-                       hsr, esr_get_class_string(hsr));
-               BUG();
-       }
-
         return arm_exit_handlers[hsr_ec];
  }
  
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c

index e8e7ba2bc11f93abde92c6b91782ae776bdbcb73..9e1d2b75eecd606df6a6ccf632247ebc02149c67 100644 (file)
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -18,14 +18,62 @@
  #include <asm/kvm_hyp.h>
  #include <asm/tlbflush.h>
  
+static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
+{
+       u64 val;
+
+       /*
+        * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
+        * most TLB operations target EL2/EL0. In order to affect the
+        * guest TLBs (EL1/EL0), we need to change one of these two
+        * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
+        * let's flip TGE before executing the TLB operation.
+        */
+       write_sysreg(kvm->arch.vttbr, vttbr_el2);
+       val = read_sysreg(hcr_el2);
+       val &= ~HCR_TGE;
+       write_sysreg(val, hcr_el2);
+       isb();
+}
+
+static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
+{
+       write_sysreg(kvm->arch.vttbr, vttbr_el2);
+       isb();
+}
+
+static hyp_alternate_select(__tlb_switch_to_guest,
+                           __tlb_switch_to_guest_nvhe,
+                           __tlb_switch_to_guest_vhe,
+                           ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
+{
+       /*
+        * We're done with the TLB operation, let's restore the host's
+        * view of HCR_EL2.
+        */
+       write_sysreg(0, vttbr_el2);
+       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+}
+
+static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm)
+{
+       write_sysreg(0, vttbr_el2);
+}
+
+static hyp_alternate_select(__tlb_switch_to_host,
+                           __tlb_switch_to_host_nvhe,
+                           __tlb_switch_to_host_vhe,
+                           ARM64_HAS_VIRT_HOST_EXTN);
+
  void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
  {
         dsb(ishst);
  
         /* Switch to requested VMID */
         kvm = kern_hyp_va(kvm);
-       write_sysreg(kvm->arch.vttbr, vttbr_el2);
-       isb();
+       __tlb_switch_to_guest()(kvm);
  
         /*
          * We could do so much better if we had the VA as well.
@@ -46,7 +94,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
         dsb(ish);
         isb();
  
-       write_sysreg(0, vttbr_el2);
+       __tlb_switch_to_host()(kvm);
  }
  
  void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
@@ -55,14 +103,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
  
         /* Switch to requested VMID */
         kvm = kern_hyp_va(kvm);
-       write_sysreg(kvm->arch.vttbr, vttbr_el2);
-       isb();
+       __tlb_switch_to_guest()(kvm);
  
         __tlbi(vmalls12e1is);
         dsb(ish);
         isb();
  
-       write_sysreg(0, vttbr_el2);
+       __tlb_switch_to_host()(kvm);
  }
  
  void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
@@ -70,14 +117,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
         struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
  
         /* Switch to requested VMID */
-       write_sysreg(kvm->arch.vttbr, vttbr_el2);
-       isb();
+       __tlb_switch_to_guest()(kvm);
  
         __tlbi(vmalle1);
         dsb(nsh);
         isb();
  
-       write_sysreg(0, vttbr_el2);
+       __tlb_switch_to_host()(kvm);
  }
  
  void __hyp_text __kvm_flush_vm_context(void)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c

index 4bf899fb451baf652cafe6e3393be914af9674ba..1b35b8bddbfb07e778ab5c2bc511a86e04e2a255 100644 (file)
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -42,7 +42,20 @@
  #include <asm/pgtable.h>
  #include <asm/tlbflush.h>
  
-static const char *fault_name(unsigned int esr);
+struct fault_info {
+       int     (*fn)(unsigned long addr, unsigned int esr,
+                     struct pt_regs *regs);
+       int     sig;
+       int     code;
+       const char *name;
+};
+
+static const struct fault_info fault_info[];
+
+static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
+{
+       return fault_info + (esr & 63);
+}
  
  #ifdef CONFIG_KPROBES
  static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
@@ -197,10 +210,12 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
                             struct pt_regs *regs)
  {
         struct siginfo si;
+       const struct fault_info *inf;
  
         if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
+               inf = esr_to_fault_info(esr);
                 pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
-                       tsk->comm, task_pid_nr(tsk), fault_name(esr), sig,
+                       tsk->comm, task_pid_nr(tsk), inf->name, sig,
                         addr, esr);
                 show_pte(tsk->mm, addr);
                 show_regs(regs);
@@ -219,14 +234,16 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
  {
         struct task_struct *tsk = current;
         struct mm_struct *mm = tsk->active_mm;
+       const struct fault_info *inf;
  
         /*
          * If we are in kernel mode at this point, we have no context to
          * handle this fault with.
          */
-       if (user_mode(regs))
-               __do_user_fault(tsk, addr, esr, SIGSEGV, SEGV_MAPERR, regs);
-       else
+       if (user_mode(regs)) {
+               inf = esr_to_fault_info(esr);
+               __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs);
+       } else
                 __do_kernel_fault(mm, addr, esr, regs);
  }
  
@@ -488,12 +505,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
         return 1;
  }
  
-static const struct fault_info {
-       int     (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
-       int     sig;
-       int     code;
-       const char *name;
-} fault_info[] = {
+static const struct fault_info fault_info[] = {
         { do_bad,               SIGBUS,  0,             "ttbr address size fault"       },
         { do_bad,               SIGBUS,  0,             "level 1 address size fault"    },
         { do_bad,               SIGBUS,  0,             "level 2 address size fault"    },
@@ -560,19 +572,13 @@ static const struct fault_info {
         { do_bad,               SIGBUS,  0,             "unknown 63"                    },
  };
  
-static const char *fault_name(unsigned int esr)
-{
-       const struct fault_info *inf = fault_info + (esr & 63);
-       return inf->name;
-}
-
  /*
   * Dispatch a data abort to the relevant handler.
   */
  asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
                                          struct pt_regs *regs)
  {
-       const struct fault_info *inf = fault_info + (esr & 63);
+       const struct fault_info *inf = esr_to_fault_info(esr);
         struct siginfo info;
  
         if (!inf->fn(addr, esr, regs))
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c

index e25584d723960e73fb8eec8d1a5f48fa57197582..7514a000e361f45e21d51601b1d351cb40c9306e 100644 (file)
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -294,10 +294,6 @@ static __init int setup_hugepagesz(char *opt)
                 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
         } else if (ps == PUD_SIZE) {
                 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
-       } else if (ps == (PAGE_SIZE * CONT_PTES)) {
-               hugetlb_add_hstate(CONT_PTE_SHIFT);
-       } else if (ps == (PMD_SIZE * CONT_PMDS)) {
-               hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
         } else {
                 hugetlb_bad_size();
                 pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
@@ -306,13 +302,3 @@ static __init int setup_hugepagesz(char *opt)
         return 1;
  }
  __setup("hugepagesz=", setup_hugepagesz);
-
-#ifdef CONFIG_ARM64_64K_PAGES
-static __init int add_default_hugepagesz(void)
-{
-       if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
-               hugetlb_add_hstate(CONT_PTE_SHIFT);
-       return 0;
-}
-arch_initcall(add_default_hugepagesz);
-#endif
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c

index 55d1e9205543689a6883d983dc82cb8b9eb2be6a..687a358a37337af9cf7a0d50c27b0176cfbd2012 100644 (file)
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -162,7 +162,7 @@ void __init kasan_init(void)
         clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
  
         vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
-                        pfn_to_nid(virt_to_pfn(_text)));
+                        pfn_to_nid(virt_to_pfn(lm_alias(_text))));
  
         /*
          * vmemmap_populate() has populated the shadow region that covers the
diff --git a/arch/avr32/include/asm/pgtable-2level.h b/arch/avr32/include/asm/pgtable-2level.h

index 425dd567b5b955424ef4f995ddf49decdf4637fd..d5b1c63993ec29620b9306e734cbd3e3ee66bd01 100644 (file)
--- a/arch/avr32/include/asm/pgtable-2level.h
+++ b/arch/avr32/include/asm/pgtable-2level.h
@@ -8,6 +8,7 @@
  #ifndef __ASM_AVR32_PGTABLE_2LEVEL_H
  #define __ASM_AVR32_PGTABLE_2LEVEL_H
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  /*
diff --git a/arch/avr32/oprofile/backtrace.c b/arch/avr32/oprofile/backtrace.c

index 75d9ad6f99cf56e8071eff21157ed3da0dfef4b7..29cf2f191bfd289902c7f29b55ac896c79697944 100644 (file)
--- a/arch/avr32/oprofile/backtrace.c
+++ b/arch/avr32/oprofile/backtrace.c
@@ -14,7 +14,7 @@
   */
  
  #include <linux/oprofile.h>
-#include <linux/sched.h>
+#include <linux/ptrace.h>
  #include <linux/uaccess.h>
  
  /* The first two words of each frame on the stack look like this if we have
diff --git a/arch/c6x/kernel/ptrace.c b/arch/c6x/kernel/ptrace.c

index a27e1f02ce182d0e6805057bc9390e67df5e751c..8801dc98fd442a85cd09113ce6794040ac2e22dd 100644 (file)
--- a/arch/c6x/kernel/ptrace.c
+++ b/arch/c6x/kernel/ptrace.c
@@ -70,46 +70,6 @@ static int gpr_get(struct task_struct *target,
                                    0, sizeof(*regs));
  }
  
-static int gpr_set(struct task_struct *target,
-                  const struct user_regset *regset,
-                  unsigned int pos, unsigned int count,
-                  const void *kbuf, const void __user *ubuf)
-{
-       int ret;
-       struct pt_regs *regs = task_pt_regs(target);
-
-       /* Don't copyin TSR or CSR */
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                &regs,
-                                0, PT_TSR * sizeof(long));
-       if (ret)
-               return ret;
-
-       ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
-                                       PT_TSR * sizeof(long),
-                                       (PT_TSR + 1) * sizeof(long));
-       if (ret)
-               return ret;
-
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                &regs,
-                                (PT_TSR + 1) * sizeof(long),
-                                PT_CSR * sizeof(long));
-       if (ret)
-               return ret;
-
-       ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
-                                       PT_CSR * sizeof(long),
-                                       (PT_CSR + 1) * sizeof(long));
-       if (ret)
-               return ret;
-
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                &regs,
-                                (PT_CSR + 1) * sizeof(long), -1);
-       return ret;
-}
-
  enum c6x_regset {
         REGSET_GPR,
  };
@@ -121,7 +81,6 @@ static const struct user_regset c6x_regsets[] = {
                 .size = sizeof(u32),
                 .align = sizeof(u32),
                 .get = gpr_get,
-               .set = gpr_set
         },
  };
  
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c

index ae6903d7fdbe08c25a7fa23439d228b345d1a874..14970f11bbf2b60cc1d9e7ce26adffa1bf15f63e 100644 (file)
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -2086,7 +2086,7 @@ static void cryptocop_job_queue_close(void)
                 dma_in_cfg.en = regk_dma_no;
                 REG_WR(dma, IN_DMA_INST, rw_cfg, dma_in_cfg);
  
-               /* Disble the cryptocop. */
+               /* Disable the cryptocop. */
                 rw_cfg = REG_RD(strcop, regi_strcop, rw_cfg);
                 rw_cfg.en = 0;
                 REG_WR(strcop, regi_strcop, rw_cfg, rw_cfg);
diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h

index 2a3210ba4c720485c4ac29de9c9fa3b69b136726..fa3a73004cc570b564e1c36a66c644f1d86f0c94 100644 (file)
--- a/arch/cris/include/asm/pgtable.h
+++ b/arch/cris/include/asm/pgtable.h
@@ -6,6 +6,7 @@
  #define _CRIS_PGTABLE_H
  
  #include <asm/page.h>
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  #ifndef __ASSEMBLY__
diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h

index a0513d463a1fa86d39e4af642f68e6f279b69f3a..ab6e7e961b545c30805d5dccae26ffa3782db4f2 100644 (file)
--- a/arch/frv/include/asm/pgtable.h
+++ b/arch/frv/include/asm/pgtable.h
@@ -16,6 +16,7 @@
  #ifndef _ASM_PGTABLE_H
  #define _ASM_PGTABLE_H
  
+#include <asm-generic/5level-fixup.h>
  #include <asm/mem-layout.h>
  #include <asm/setup.h>
  #include <asm/processor.h>
diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h

index 8341db67821dd16ebefd70d2eb0e7b361b76a6ec..7d265d28ba5eecd2a6770c2dd161253b1a9a89b6 100644 (file)
--- a/arch/h8300/include/asm/pgtable.h
+++ b/arch/h8300/include/asm/pgtable.h
@@ -1,5 +1,6 @@
  #ifndef _H8300_PGTABLE_H
  #define _H8300_PGTABLE_H
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopud.h>
  #include <asm-generic/pgtable.h>
  #define pgtable_cache_init()   do { } while (0)
diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c

index 92075544a19ac03fae02b3efc28a986b0748ec1c..0dc1c8f622bc3fda818d5703896e99be485762ea 100644 (file)
--- a/arch/h8300/kernel/ptrace.c
+++ b/arch/h8300/kernel/ptrace.c
@@ -95,7 +95,8 @@ static int regs_get(struct task_struct *target,
         long *reg = (long *)&regs;
  
         /* build user regs in buffer */
-       for (r = 0; r < ARRAY_SIZE(register_offset); r++)
+       BUILD_BUG_ON(sizeof(regs) % sizeof(long) != 0);
+       for (r = 0; r < sizeof(regs) / sizeof(long); r++)
                 *reg++ = h8300_get_reg(target, r);
  
         return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -113,7 +114,8 @@ static int regs_set(struct task_struct *target,
         long *reg;
  
         /* build user regs in buffer */
-       for (reg = (long *)&regs, r = 0; r < ARRAY_SIZE(register_offset); r++)
+       BUILD_BUG_ON(sizeof(regs) % sizeof(long) != 0);
+       for (reg = (long *)&regs, r = 0; r < sizeof(regs) / sizeof(long); r++)
                 *reg++ = h8300_get_reg(target, r);
  
         ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -122,7 +124,7 @@ static int regs_set(struct task_struct *target,
                 return ret;
  
         /* write back to pt_regs */
-       for (reg = (long *)&regs, r = 0; r < ARRAY_SIZE(register_offset); r++)
+       for (reg = (long *)&regs, r = 0; r < sizeof(regs) / sizeof(long); r++)
                 h8300_put_reg(target, r, *reg++);
         return 0;
  }
diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c

index fe3b5673babaa49428a90c1c56d64abec3d0c871..f5ff3b794c8512ce430812fd07b5c85752294d79 100644 (file)
--- a/arch/h8300/kernel/ptrace_h.c
+++ b/arch/h8300/kernel/ptrace_h.c
@@ -9,7 +9,7 @@
   */
  
  #include <linux/linkage.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
  #include <asm/ptrace.h>
  
  #define BREAKINST 0x5730 /* trapa #3 */
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h

index 49eab8136ec307d3dbcb40fd98676ccc9f2b44a8..24a9177fb897b6f72fab8ff4277e1af76814de2f 100644 (file)
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -26,6 +26,7 @@
   */
  #include <linux/swap.h>
  #include <asm/page.h>
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  /* A handy thing to have if one has the RAM. Declared in head.S */
diff --git a/arch/ia64/include/asm/asm-prototypes.h b/arch/ia64/include/asm/asm-prototypes.h

new file mode 100644 (file)

index 0000000..a2c1398
--- /dev/null
+++ b/arch/ia64/include/asm/asm-prototypes.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_IA64_ASM_PROTOTYPES_H
+#define _ASM_IA64_ASM_PROTOTYPES_H
+
+#include <asm/cacheflush.h>
+#include <asm/checksum.h>
+#include <asm/esi.h>
+#include <asm/ftrace.h>
+#include <asm/page.h>
+#include <asm/pal.h>
+#include <asm/string.h>
+#include <asm/uaccess.h>
+#include <asm/unwind.h>
+#include <asm/xor.h>
+
+extern const char ia64_ivt[];
+
+signed int __divsi3(signed int, unsigned int);
+signed int __modsi3(signed int, unsigned int);
+
+signed long long __divdi3(signed long long, unsigned long long);
+signed long long __moddi3(signed long long, unsigned long long);
+
+unsigned int __udivsi3(unsigned int, unsigned int);
+unsigned int __umodsi3(unsigned int, unsigned int);
+
+unsigned long long __udivdi3(unsigned long long, unsigned long long);
+unsigned long long __umoddi3(unsigned long long, unsigned long long);
+
+#endif /* _ASM_IA64_ASM_PROTOTYPES_H */
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h

index 384794e665fc4a733b420d7ff73c38d6ab5bff8f..6cc22c8d8923e9c294f8736190b3e55e38e7698a 100644 (file)
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr;
  
  
  #if CONFIG_PGTABLE_LEVELS == 3
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopud.h>
  #endif
+#include <asm-generic/5level-fixup.h>
  #include <asm-generic/pgtable.h>
  
  #endif /* _ASM_IA64_PGTABLE_H */
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile

index 1f3d3877618fdc934ab20f07695476206fe35e00..0a40b14407b1692c7a684bb2ea689d130df36805 100644 (file)
--- a/arch/ia64/lib/Makefile
+++ b/arch/ia64/lib/Makefile
@@ -24,25 +24,25 @@ AFLAGS___modsi3.o   =            -DMODULO
  AFLAGS___umodsi3.o     = -DUNSIGNED -DMODULO
  
  $(obj)/__divdi3.o: $(src)/idiv64.S FORCE
-       $(call if_changed_dep,as_o_S)
+       $(call if_changed_rule,as_o_S)
  
  $(obj)/__udivdi3.o: $(src)/idiv64.S FORCE
-       $(call if_changed_dep,as_o_S)
+       $(call if_changed_rule,as_o_S)
  
  $(obj)/__moddi3.o: $(src)/idiv64.S FORCE
-       $(call if_changed_dep,as_o_S)
+       $(call if_changed_rule,as_o_S)
  
  $(obj)/__umoddi3.o: $(src)/idiv64.S FORCE
-       $(call if_changed_dep,as_o_S)
+       $(call if_changed_rule,as_o_S)
  
  $(obj)/__divsi3.o: $(src)/idiv32.S FORCE
-       $(call if_changed_dep,as_o_S)
+       $(call if_changed_rule,as_o_S)
  
  $(obj)/__udivsi3.o: $(src)/idiv32.S FORCE
-       $(call if_changed_dep,as_o_S)
+       $(call if_changed_rule,as_o_S)
  
  $(obj)/__modsi3.o: $(src)/idiv32.S FORCE
-       $(call if_changed_dep,as_o_S)
+       $(call if_changed_rule,as_o_S)
  
  $(obj)/__umodsi3.o: $(src)/idiv32.S FORCE
-       $(call if_changed_dep,as_o_S)
+       $(call if_changed_rule,as_o_S)
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig

index 048bf076f7df66a35fd4d11addd015e9ec285fc9..531cb9eb3319f4251bb6f4cf603b2a4bf90b7e80 100644 (file)
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -25,6 +25,7 @@ CONFIG_SUN_PARTITION=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_SYSV68_PARTITION=y
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_M68020=y
@@ -60,6 +61,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -71,6 +73,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -101,6 +104,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -298,6 +302,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -371,6 +377,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -383,6 +390,7 @@ CONFIG_VETH=m
  # CONFIG_NET_VENDOR_AMAZON is not set
  CONFIG_A2065=y
  CONFIG_ARIADNE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -404,7 +412,6 @@ CONFIG_ZORRO8390=y
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  # CONFIG_NET_VENDOR_SMSC is not set
  # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PPP=m
@@ -564,6 +571,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -594,6 +603,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -605,6 +615,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -629,4 +640,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig

index d4de24963f5f7434e5fab612629c23149fc0389c..ca91d39555da2dad5eb6413dd1892d1a070fe172 100644 (file)
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -26,6 +26,7 @@ CONFIG_SUN_PARTITION=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_SYSV68_PARTITION=y
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_M68020=y
@@ -58,6 +59,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -69,6 +71,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -99,6 +102,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -296,6 +300,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -353,6 +359,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -362,6 +369,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y
  CONFIG_VETH=m
  # CONFIG_NET_VENDOR_ALACRITECH is not set
  # CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -378,7 +386,6 @@ CONFIG_VETH=m
  # CONFIG_NET_VENDOR_SEEQ is not set
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PPP=m
@@ -523,6 +530,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -553,6 +562,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -564,6 +574,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -588,4 +599,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig

index fc0fd3f871f3348233c720465d2707fd97c8d94f..23a3d8a691e2239478299856316fc9bac1261ccd 100644 (file)
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -25,6 +25,7 @@ CONFIG_SUN_PARTITION=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_SYSV68_PARTITION=y
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_M68020=y
@@ -58,6 +59,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -69,6 +71,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -99,6 +102,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -296,6 +300,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -362,6 +368,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -372,6 +379,7 @@ CONFIG_VETH=m
  # CONFIG_NET_VENDOR_ALACRITECH is not set
  # CONFIG_NET_VENDOR_AMAZON is not set
  CONFIG_ATARILANCE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -389,7 +397,6 @@ CONFIG_NE2000=y
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  CONFIG_SMC91X=y
  # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PPP=m
@@ -544,6 +551,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -574,6 +583,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -585,6 +595,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -609,4 +620,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig

index 52e984a0aa696a503f458f2dd853913a1f32a52b..95deb95140fe9273ef2f70f670182103e5f03399 100644 (file)
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
  CONFIG_SUN_PARTITION=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_M68040=y
@@ -56,6 +57,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -67,6 +69,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -97,6 +100,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -294,6 +298,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -352,6 +358,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -361,6 +368,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y
  CONFIG_VETH=m
  # CONFIG_NET_VENDOR_ALACRITECH is not set
  # CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -377,7 +385,6 @@ CONFIG_BVME6000_NET=y
  # CONFIG_NET_VENDOR_SEEQ is not set
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PPP=m
@@ -515,6 +522,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -545,6 +554,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -556,6 +566,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -580,4 +591,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig

index aaeed4422cc97525600537135669e9da3f865b3e..afae6958db2d777591527d1d59dc3bd686f64772 100644 (file)
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -26,6 +26,7 @@ CONFIG_SUN_PARTITION=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_SYSV68_PARTITION=y
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_M68020=y
@@ -58,6 +59,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -69,6 +71,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -99,6 +102,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -296,6 +300,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -353,6 +359,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -363,6 +370,7 @@ CONFIG_VETH=m
  # CONFIG_NET_VENDOR_ALACRITECH is not set
  # CONFIG_NET_VENDOR_AMAZON is not set
  CONFIG_HPLANCE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -379,7 +387,6 @@ CONFIG_HPLANCE=y
  # CONFIG_NET_VENDOR_SEEQ is not set
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PPP=m
@@ -525,6 +532,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -555,6 +564,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -566,6 +576,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -590,4 +601,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig

index 3bbc9b2f0dac0fb890183d11d369655fdfd68201..b010734729a79e42b599c26919542faf4fbce31d 100644 (file)
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -25,6 +25,7 @@ CONFIG_SUN_PARTITION=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_SYSV68_PARTITION=y
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_M68020=y
@@ -57,6 +58,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -68,6 +70,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -98,6 +101,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -298,6 +302,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -369,6 +375,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -379,6 +386,7 @@ CONFIG_VETH=m
  # CONFIG_NET_VENDOR_ALACRITECH is not set
  # CONFIG_NET_VENDOR_AMAZON is not set
  CONFIG_MACMACE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -398,7 +406,6 @@ CONFIG_MAC8390=y
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  # CONFIG_NET_VENDOR_SMSC is not set
  # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PPP=m
@@ -547,6 +554,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -577,6 +586,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -588,6 +598,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -612,4 +623,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig

index 8f2c0decb2f8edd8030ffc1df2eddad2bd70a939..0e414549b235b0a04e9ed3feaa3786542c862bb3 100644 (file)
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -21,6 +21,7 @@ CONFIG_SOLARIS_X86_PARTITION=y
  CONFIG_UNIXWARE_DISKLABEL=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_M68020=y
@@ -67,6 +68,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -78,6 +80,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -108,6 +111,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -308,6 +312,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -402,6 +408,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -419,6 +426,7 @@ CONFIG_HPLANCE=y
  CONFIG_MVME147_NET=y
  CONFIG_SUN3LANCE=y
  CONFIG_MACMACE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -444,7 +452,6 @@ CONFIG_ZORRO8390=y
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  CONFIG_SMC91X=y
  # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PLIP=m
@@ -627,6 +634,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -657,6 +666,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -668,6 +678,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -692,4 +703,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig

index c743dd22e96f935f553a12648991546616a47291..b2e687a0ec3d477d2f8fbb50a7387e51a60fb5bc 100644 (file)
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
  CONFIG_SUN_PARTITION=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_M68030=y
@@ -55,6 +56,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -66,6 +68,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -96,6 +99,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -293,6 +297,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -351,6 +357,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -361,6 +368,7 @@ CONFIG_VETH=m
  # CONFIG_NET_VENDOR_ALACRITECH is not set
  # CONFIG_NET_VENDOR_AMAZON is not set
  CONFIG_MVME147_NET=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -377,7 +385,6 @@ CONFIG_MVME147_NET=y
  # CONFIG_NET_VENDOR_SEEQ is not set
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PPP=m
@@ -515,6 +522,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -545,6 +554,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -556,6 +566,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -580,4 +591,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig

index 2ccaca858f0533d79d7f4c6a52bf070f333eb8ce..cbd8ee24d1bc4e2f7c4f611df7cdd95c8b147e3b 100644 (file)
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
  CONFIG_SUN_PARTITION=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_M68040=y
@@ -56,6 +57,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -67,6 +69,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -97,6 +100,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -294,6 +298,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -352,6 +358,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -361,6 +368,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y
  CONFIG_VETH=m
  # CONFIG_NET_VENDOR_ALACRITECH is not set
  # CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -377,7 +385,6 @@ CONFIG_MVME16x_NET=y
  # CONFIG_NET_VENDOR_SEEQ is not set
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PPP=m
@@ -515,6 +522,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -545,6 +554,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -556,6 +566,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -580,4 +591,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig

index 5599f3fd5fcd44eab2e52ac460b73376be5cc052..1e82cc9443399a2cd67febf8b7f3fda0682e1990 100644 (file)
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -26,6 +26,7 @@ CONFIG_SUN_PARTITION=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_SYSV68_PARTITION=y
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_M68040=y
@@ -56,6 +57,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -67,6 +69,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -97,6 +100,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -294,6 +298,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -358,6 +364,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -369,6 +376,7 @@ CONFIG_VETH=m
  # CONFIG_NET_VENDOR_ALACRITECH is not set
  # CONFIG_NET_VENDOR_AMAZON is not set
  # CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -388,7 +396,6 @@ CONFIG_NE2000=y
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  # CONFIG_NET_VENDOR_SMSC is not set
  # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PLIP=m
@@ -538,6 +545,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -568,6 +577,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -579,6 +589,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -603,4 +614,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig

index 313bf0a562ad33496735210197879fcf86739ea2..f9e77f57a9725035d9f75a30ae4c0941c229d5d7 100644 (file)
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_SYSV68_PARTITION=y
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_SUN3=y
@@ -53,6 +54,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -64,6 +66,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -94,6 +97,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -291,6 +295,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -349,6 +355,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -359,6 +366,7 @@ CONFIG_VETH=m
  # CONFIG_NET_VENDOR_ALACRITECH is not set
  # CONFIG_NET_VENDOR_AMAZON is not set
  CONFIG_SUN3LANCE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_EZCHIP is not set
@@ -375,7 +383,6 @@ CONFIG_SUN3_82586=y
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  # CONFIG_NET_VENDOR_STMICRO is not set
  # CONFIG_NET_VENDOR_SUN is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PPP=m
@@ -517,6 +524,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -546,6 +555,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -557,6 +567,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -581,4 +592,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig

index 38b61365f769273f829980fa9cec2d9cc585e1f5..3c394fcfb36836beba293194b23b91c4ca31f2aa 100644 (file)
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
  # CONFIG_EFI_PARTITION is not set
  CONFIG_SYSV68_PARTITION=y
  CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
  CONFIG_KEXEC=y
  CONFIG_BOOTINFO_PROC=y
  CONFIG_SUN3X=y
@@ -53,6 +54,7 @@ CONFIG_NET_IPVTI=m
  CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_INET_AH=m
  CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
  CONFIG_INET_IPCOMP=m
  CONFIG_INET_XFRM_MODE_TRANSPORT=m
  CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -64,6 +66,7 @@ CONFIG_IPV6=m
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
  CONFIG_INET6_IPCOMP=m
  CONFIG_IPV6_ILA=m
  CONFIG_IPV6_VTI=m
@@ -94,6 +97,7 @@ CONFIG_NFT_NUMGEN=m
  CONFIG_NFT_CT=m
  CONFIG_NFT_SET_RBTREE=m
  CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
  CONFIG_NFT_COUNTER=m
  CONFIG_NFT_LOG=m
  CONFIG_NFT_LIMIT=m
@@ -291,6 +295,8 @@ CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_L3_MASTER_DEV=y
  CONFIG_AF_KCM=m
  # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
  CONFIG_NET_DEVLINK=m
  # CONFIG_UEVENT_HELPER is not set
  CONFIG_DEVTMPFS=y
@@ -349,6 +355,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
  CONFIG_MACVLAN=m
  CONFIG_MACVTAP=m
  CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
  CONFIG_VXLAN=m
  CONFIG_GENEVE=m
  CONFIG_GTP=m
@@ -359,6 +366,7 @@ CONFIG_VETH=m
  # CONFIG_NET_VENDOR_ALACRITECH is not set
  # CONFIG_NET_VENDOR_AMAZON is not set
  CONFIG_SUN3LANCE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -375,7 +383,6 @@ CONFIG_SUN3LANCE=y
  # CONFIG_NET_VENDOR_SEEQ is not set
  # CONFIG_NET_VENDOR_SOLARFLARE is not set
  # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
  # CONFIG_NET_VENDOR_VIA is not set
  # CONFIG_NET_VENDOR_WIZNET is not set
  CONFIG_PPP=m
@@ -517,6 +524,8 @@ CONFIG_NLS_MAC_TURKISH=m
  CONFIG_DLM=m
  # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
  CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_TEST_HEXDUMP=m
  CONFIG_TEST_STRING_HELPERS=m
@@ -547,6 +556,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_CRYPTO_LRW=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
  CONFIG_CRYPTO_XCBC=m
  CONFIG_CRYPTO_VMAC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -558,6 +568,7 @@ CONFIG_CRYPTO_SHA512=m
  CONFIG_CRYPTO_SHA3=m
  CONFIG_CRYPTO_TGR192=m
  CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
  CONFIG_CRYPTO_ANUBIS=m
  CONFIG_CRYPTO_BLOWFISH=m
  CONFIG_CRYPTO_CAMELLIA=m
@@ -582,4 +593,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
  CONFIG_CRYPTO_USER_API_RNG=m
  CONFIG_CRYPTO_USER_API_AEAD=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
  CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h

index b4a9b0d5928dfb33c0c6dd5ef507305f35214fe7..dda58cfe8c22a3ec65ba074c4c0baab3c957340b 100644 (file)
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -148,7 +148,7 @@ static inline void bfchg_mem_change_bit(int nr, volatile unsigned long *vaddr)
  #define __change_bit(nr, vaddr)        change_bit(nr, vaddr)
  
  
-static inline int test_bit(int nr, const unsigned long *vaddr)
+static inline int test_bit(int nr, const volatile unsigned long *vaddr)
  {
         return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0;
  }
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h

index a857d82ec5094abc30e353f25370365194a01194..aab1edd0d4bade511f2ea92230d3bd533a58ad34 100644 (file)
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -4,7 +4,7 @@
  #include <uapi/asm/unistd.h>
  
  
-#define NR_syscalls            379
+#define NR_syscalls            380
  
  #define __ARCH_WANT_OLD_READDIR
  #define __ARCH_WANT_OLD_STAT
diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h

index 9fe674bf911fd2a4e61d7119f9b91ffbdddf44f5..25589f5b8669631d5cf2441d7fb405a97c5561f5 100644 (file)
--- a/arch/m68k/include/uapi/asm/unistd.h
+++ b/arch/m68k/include/uapi/asm/unistd.h
@@ -384,5 +384,6 @@
  #define __NR_copy_file_range   376
  #define __NR_preadv2           377
  #define __NR_pwritev2          378
+#define __NR_statx             379
  
  #endif /* _UAPI_ASM_M68K_UNISTD_H_ */
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S

index d6fd6d9ced2474ab477b0becefbd5f695d256e96..8c9fcfafe0dd90ba5f67f860e7eafb351cb156ef 100644 (file)
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -399,3 +399,4 @@ ENTRY(sys_call_table)
         .long sys_copy_file_range
         .long sys_preadv2
         .long sys_pwritev2
+       .long sys_statx
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h

index ffa3a3a2ecadda8bed7cf5e7b1508cd98c43abf8..0c151e5af079288aeebb8deb55994ec0106f5507 100644 (file)
--- a/arch/metag/include/asm/pgtable.h
+++ b/arch/metag/include/asm/pgtable.h
@@ -6,6 +6,7 @@
  #define _METAG_PGTABLE_H
  
  #include <asm/pgtable-bits.h>
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h

index 273e61225c277ae67ba28dfae4ef9123e3ef34a9..07238b39638cd2c933219bb957bff1d80212d8bc 100644 (file)
--- a/arch/metag/include/asm/uaccess.h
+++ b/arch/metag/include/asm/uaccess.h
@@ -197,20 +197,21 @@ extern long __must_check strnlen_user(const char __user *src, long count);
  
  #define strlen_user(str) strnlen_user(str, 32767)
  
-extern unsigned long __must_check __copy_user_zeroing(void *to,
-                                                     const void __user *from,
-                                                     unsigned long n);
+extern unsigned long raw_copy_from_user(void *to, const void __user *from,
+                                       unsigned long n);
  
  static inline unsigned long
  copy_from_user(void *to, const void __user *from, unsigned long n)
  {
+       unsigned long res = n;
         if (likely(access_ok(VERIFY_READ, from, n)))
-               return __copy_user_zeroing(to, from, n);
-       memset(to, 0, n);
-       return n;
+               res = raw_copy_from_user(to, from, n);
+       if (unlikely(res))
+               memset(to + (n - res), 0, res);
+       return res;
  }
  
-#define __copy_from_user(to, from, n) __copy_user_zeroing(to, from, n)
+#define __copy_from_user(to, from, n) raw_copy_from_user(to, from, n)
  #define __copy_from_user_inatomic __copy_from_user
  
  extern unsigned long __must_check __copy_user(void __user *to,
diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c

index 5fd16ee5280c1aee8584d6fbc90d5bd74b4c1665..e615603a4b0ae95cb0731ce5e653a518763b7c92 100644 (file)
--- a/arch/metag/kernel/ptrace.c
+++ b/arch/metag/kernel/ptrace.c
@@ -26,6 +26,16 @@
   * user_regset definitions.
   */
  
+static unsigned long user_txstatus(const struct pt_regs *regs)
+{
+       unsigned long data = (unsigned long)regs->ctx.Flags;
+
+       if (regs->ctx.SaveMask & TBICTX_CBUF_BIT)
+               data |= USER_GP_REGS_STATUS_CATCH_BIT;
+
+       return data;
+}
+
  int metag_gp_regs_copyout(const struct pt_regs *regs,
                           unsigned int pos, unsigned int count,
                           void *kbuf, void __user *ubuf)
@@ -64,9 +74,7 @@ int metag_gp_regs_copyout(const struct pt_regs *regs,
         if (ret)
                 goto out;
         /* TXSTATUS */
-       data = (unsigned long)regs->ctx.Flags;
-       if (regs->ctx.SaveMask & TBICTX_CBUF_BIT)
-               data |= USER_GP_REGS_STATUS_CATCH_BIT;
+       data = user_txstatus(regs);
         ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
                                   &data, 4*25, 4*26);
         if (ret)
@@ -121,6 +129,7 @@ int metag_gp_regs_copyin(struct pt_regs *regs,
         if (ret)
                 goto out;
         /* TXSTATUS */
+       data = user_txstatus(regs);
         ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
                                  &data, 4*25, 4*26);
         if (ret)
@@ -246,6 +255,8 @@ int metag_rp_state_copyin(struct pt_regs *regs,
         unsigned long long *ptr;
         int ret, i;
  
+       if (count < 4*13)
+               return -EINVAL;
         /* Read the entire pipeline before making any changes */
         ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
                                  &rp, 0, 4*13);
@@ -305,7 +316,7 @@ static int metag_tls_set(struct task_struct *target,
                         const void *kbuf, const void __user *ubuf)
  {
         int ret;
-       void __user *tls;
+       void __user *tls = target->thread.tls_ptr;
  
         ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
         if (ret)
diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c

index b3ebfe9c8e886a5c0f8f309b1a390ac286208a86..2792fc621088bcd1c3d7bfe58b8560ec32e6f880 100644 (file)
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -29,7 +29,6 @@
                 COPY                                             \
                 "1:\n"                                           \
                 "       .section .fixup,\"ax\"\n"                \
-               "       MOV D1Ar1,#0\n"                          \
                 FIXUP                                            \
                 "       MOVT    D1Ar1,#HI(1b)\n"                 \
                 "       JUMP    D1Ar1,#LO(1b)\n"                 \
@@ -260,27 +259,31 @@
                 "MGETL  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
                 "22:\n"                                                 \
                 "MSETL  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
-               "SUB    %3, %3, #32\n"                                  \
                 "23:\n"                                                 \
-               "MGETL  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "SUB    %3, %3, #32\n"                                  \
                 "24:\n"                                                 \
+               "MGETL  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "25:\n"                                                 \
                 "MSETL  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "26:\n"                                                 \
                 "SUB    %3, %3, #32\n"                                  \
                 "DCACHE [%1+#-64], D0Ar6\n"                             \
                 "BR     $Lloop"id"\n"                                   \
                                                                         \
                 "MOV    RAPF, %1\n"                                     \
-               "25:\n"                                                 \
+               "27:\n"                                                 \
                 "MGETL  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
-               "26:\n"                                                 \
+               "28:\n"                                                 \
                 "MSETL  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "29:\n"                                                 \
                 "SUB    %3, %3, #32\n"                                  \
-               "27:\n"                                                 \
+               "30:\n"                                                 \
                 "MGETL  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
-               "28:\n"                                                 \
+               "31:\n"                                                 \
                 "MSETL  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "32:\n"                                                 \
                 "SUB    %0, %0, #8\n"                                   \
-               "29:\n"                                                 \
+               "33:\n"                                                 \
                 "SETL   [%0++], D0.7, D1.7\n"                           \
                 "SUB    %3, %3, #32\n"                                  \
                 "1:"                                                    \
@@ -312,11 +315,15 @@
                 "       .long 26b,3b\n"                                 \
                 "       .long 27b,3b\n"                                 \
                 "       .long 28b,3b\n"                                 \
-               "       .long 29b,4b\n"                                 \
+               "       .long 29b,3b\n"                                 \
+               "       .long 30b,3b\n"                                 \
+               "       .long 31b,3b\n"                                 \
+               "       .long 32b,3b\n"                                 \
+               "       .long 33b,4b\n"                                 \
                 "       .previous\n"                                    \
                 : "=r" (to), "=r" (from), "=r" (ret), "=d" (n)          \
                 : "0" (to), "1" (from), "2" (ret), "3" (n)              \
-               : "D1Ar1", "D0Ar2", "memory")
+               : "D1Ar1", "D0Ar2", "cc", "memory")
  
  /*     rewind 'to' and 'from'  pointers when a fault occurs
   *
@@ -342,7 +349,7 @@
  #define __asm_copy_to_user_64bit_rapf_loop(to, from, ret, n, id)\
         __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id,           \
                 "LSR    D0Ar2, D0Ar2, #8\n"                             \
-               "AND    D0Ar2, D0Ar2, #0x7\n"                           \
+               "ANDS   D0Ar2, D0Ar2, #0x7\n"                           \
                 "ADDZ   D0Ar2, D0Ar2, #4\n"                             \
                 "SUB    D0Ar2, D0Ar2, #1\n"                             \
                 "MOV    D1Ar1, #4\n"                                    \
@@ -403,47 +410,55 @@
                 "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
                 "22:\n"                                                 \
                 "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
-               "SUB    %3, %3, #16\n"                                  \
                 "23:\n"                                                 \
-               "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
-               "24:\n"                                                 \
-               "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
                 "SUB    %3, %3, #16\n"                                  \
-               "25:\n"                                                 \
+               "24:\n"                                                 \
                 "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
-               "26:\n"                                                 \
+               "25:\n"                                                 \
                 "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "26:\n"                                                 \
                 "SUB    %3, %3, #16\n"                                  \
                 "27:\n"                                                 \
                 "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
                 "28:\n"                                                 \
                 "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "29:\n"                                                 \
+               "SUB    %3, %3, #16\n"                                  \
+               "30:\n"                                                 \
+               "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "31:\n"                                                 \
+               "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "32:\n"                                                 \
                 "SUB    %3, %3, #16\n"                                  \
                 "DCACHE [%1+#-64], D0Ar6\n"                             \
                 "BR     $Lloop"id"\n"                                   \
                                                                         \
                 "MOV    RAPF, %1\n"                                     \
-               "29:\n"                                                 \
+               "33:\n"                                                 \
                 "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
-               "30:\n"                                                 \
+               "34:\n"                                                 \
                 "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "35:\n"                                                 \
                 "SUB    %3, %3, #16\n"                                  \
-               "31:\n"                                                 \
+               "36:\n"                                                 \
                 "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
-               "32:\n"                                                 \
+               "37:\n"                                                 \
                 "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "38:\n"                                                 \
                 "SUB    %3, %3, #16\n"                                  \
-               "33:\n"                                                 \
+               "39:\n"                                                 \
                 "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
-               "34:\n"                                                 \
+               "40:\n"                                                 \
                 "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "41:\n"                                                 \
                 "SUB    %3, %3, #16\n"                                  \
-               "35:\n"                                                 \
+               "42:\n"                                                 \
                 "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
-               "36:\n"                                                 \
+               "43:\n"                                                 \
                 "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "44:\n"                                                 \
                 "SUB    %0, %0, #4\n"                                   \
-               "37:\n"                                                 \
+               "45:\n"                                                 \
                 "SETD   [%0++], D0.7\n"                                 \
                 "SUB    %3, %3, #16\n"                                  \
                 "1:"                                                    \
@@ -483,11 +498,19 @@
                 "       .long 34b,3b\n"                                 \
                 "       .long 35b,3b\n"                                 \
                 "       .long 36b,3b\n"                                 \
-               "       .long 37b,4b\n"                                 \
+               "       .long 37b,3b\n"                                 \
+               "       .long 38b,3b\n"                                 \
+               "       .long 39b,3b\n"                                 \
+               "       .long 40b,3b\n"                                 \
+               "       .long 41b,3b\n"                                 \
+               "       .long 42b,3b\n"                                 \
+               "       .long 43b,3b\n"                                 \
+               "       .long 44b,3b\n"                                 \
+               "       .long 45b,4b\n"                                 \
                 "       .previous\n"                                    \
                 : "=r" (to), "=r" (from), "=r" (ret), "=d" (n)          \
                 : "0" (to), "1" (from), "2" (ret), "3" (n)              \
-               : "D1Ar1", "D0Ar2", "memory")
+               : "D1Ar1", "D0Ar2", "cc", "memory")
  
  /*     rewind 'to' and 'from'  pointers when a fault occurs
   *
@@ -513,7 +536,7 @@
  #define __asm_copy_to_user_32bit_rapf_loop(to, from, ret, n, id)\
         __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id,           \
                 "LSR    D0Ar2, D0Ar2, #8\n"                             \
-               "AND    D0Ar2, D0Ar2, #0x7\n"                           \
+               "ANDS   D0Ar2, D0Ar2, #0x7\n"                           \
                 "ADDZ   D0Ar2, D0Ar2, #4\n"                             \
                 "SUB    D0Ar2, D0Ar2, #1\n"                             \
                 "MOV    D1Ar1, #4\n"                                    \
@@ -538,23 +561,31 @@ unsigned long __copy_user(void __user *pdst, const void *psrc,
         if ((unsigned long) src & 1) {
                 __asm_copy_to_user_1(dst, src, retn);
                 n--;
+               if (retn)
+                       return retn + n;
         }
         if ((unsigned long) dst & 1) {
                 /* Worst case - byte copy */
                 while (n > 0) {
                         __asm_copy_to_user_1(dst, src, retn);
                         n--;
+                       if (retn)
+                               return retn + n;
                 }
         }
         if (((unsigned long) src & 2) && n >= 2) {
                 __asm_copy_to_user_2(dst, src, retn);
                 n -= 2;
+               if (retn)
+                       return retn + n;
         }
         if ((unsigned long) dst & 2) {
                 /* Second worst case - word copy */
                 while (n >= 2) {
                         __asm_copy_to_user_2(dst, src, retn);
                         n -= 2;
+                       if (retn)
+                               return retn + n;
                 }
         }
  
@@ -569,6 +600,8 @@ unsigned long __copy_user(void __user *pdst, const void *psrc,
                 while (n >= 8) {
                         __asm_copy_to_user_8x64(dst, src, retn);
                         n -= 8;
+                       if (retn)
+                               return retn + n;
                 }
         }
         if (n >= RAPF_MIN_BUF_SIZE) {
@@ -581,6 +614,8 @@ unsigned long __copy_user(void __user *pdst, const void *psrc,
                 while (n >= 8) {
                         __asm_copy_to_user_8x64(dst, src, retn);
                         n -= 8;
+                       if (retn)
+                               return retn + n;
                 }
         }
  #endif
@@ -588,11 +623,15 @@ unsigned long __copy_user(void __user *pdst, const void *psrc,
         while (n >= 16) {
                 __asm_copy_to_user_16(dst, src, retn);
                 n -= 16;
+               if (retn)
+                       return retn + n;
         }
  
         while (n >= 4) {
                 __asm_copy_to_user_4(dst, src, retn);
                 n -= 4;
+               if (retn)
+                       return retn + n;
         }
  
         switch (n) {
@@ -609,6 +648,10 @@ unsigned long __copy_user(void __user *pdst, const void *psrc,
                 break;
         }
  
+       /*
+        * If we get here, retn correctly reflects the number of failing
+        * bytes.
+        */
         return retn;
  }
  EXPORT_SYMBOL(__copy_user);
@@ -617,16 +660,14 @@ EXPORT_SYMBOL(__copy_user);
         __asm_copy_user_cont(to, from, ret,     \
                 "       GETB D1Ar1,[%1++]\n"    \
                 "2:     SETB [%0++],D1Ar1\n",   \
-               "3:     ADD  %2,%2,#1\n"        \
-               "       SETB [%0++],D1Ar1\n",   \
+               "3:     ADD  %2,%2,#1\n",       \
                 "       .long 2b,3b\n")
  
  #define __asm_copy_from_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
         __asm_copy_user_cont(to, from, ret,             \
                 "       GETW D1Ar1,[%1++]\n"            \
                 "2:     SETW [%0++],D1Ar1\n" COPY,      \
-               "3:     ADD  %2,%2,#2\n"                \
-               "       SETW [%0++],D1Ar1\n" FIXUP,     \
+               "3:     ADD  %2,%2,#2\n" FIXUP,         \
                 "       .long 2b,3b\n" TENTRY)
  
  #define __asm_copy_from_user_2(to, from, ret) \
@@ -636,145 +677,26 @@ EXPORT_SYMBOL(__copy_user);
         __asm_copy_from_user_2x_cont(to, from, ret,     \
                 "       GETB D1Ar1,[%1++]\n"            \
                 "4:     SETB [%0++],D1Ar1\n",           \
-               "5:     ADD  %2,%2,#1\n"                \
-               "       SETB [%0++],D1Ar1\n",           \
+               "5:     ADD  %2,%2,#1\n",               \
                 "       .long 4b,5b\n")
  
  #define __asm_copy_from_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
         __asm_copy_user_cont(to, from, ret,             \
                 "       GETD D1Ar1,[%1++]\n"            \
                 "2:     SETD [%0++],D1Ar1\n" COPY,      \
-               "3:     ADD  %2,%2,#4\n"                \
-               "       SETD [%0++],D1Ar1\n" FIXUP,     \
+               "3:     ADD  %2,%2,#4\n" FIXUP,         \
                 "       .long 2b,3b\n" TENTRY)
  
  #define __asm_copy_from_user_4(to, from, ret) \
         __asm_copy_from_user_4x_cont(to, from, ret, "", "", "")
  
-#define __asm_copy_from_user_5(to, from, ret) \
-       __asm_copy_from_user_4x_cont(to, from, ret,     \
-               "       GETB D1Ar1,[%1++]\n"            \
-               "4:     SETB [%0++],D1Ar1\n",           \
-               "5:     ADD  %2,%2,#1\n"                \
-               "       SETB [%0++],D1Ar1\n",           \
-               "       .long 4b,5b\n")
-
-#define __asm_copy_from_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-       __asm_copy_from_user_4x_cont(to, from, ret,     \
-               "       GETW D1Ar1,[%1++]\n"            \
-               "4:     SETW [%0++],D1Ar1\n" COPY,      \
-               "5:     ADD  %2,%2,#2\n"                \
-               "       SETW [%0++],D1Ar1\n" FIXUP,     \
-               "       .long 4b,5b\n" TENTRY)
-
-#define __asm_copy_from_user_6(to, from, ret) \
-       __asm_copy_from_user_6x_cont(to, from, ret, "", "", "")
-
-#define __asm_copy_from_user_7(to, from, ret) \
-       __asm_copy_from_user_6x_cont(to, from, ret,     \
-               "       GETB D1Ar1,[%1++]\n"            \
-               "6:     SETB [%0++],D1Ar1\n",           \
-               "7:     ADD  %2,%2,#1\n"                \
-               "       SETB [%0++],D1Ar1\n",           \
-               "       .long 6b,7b\n")
-
-#define __asm_copy_from_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-       __asm_copy_from_user_4x_cont(to, from, ret,     \
-               "       GETD D1Ar1,[%1++]\n"            \
-               "4:     SETD [%0++],D1Ar1\n" COPY,      \
-               "5:     ADD  %2,%2,#4\n"                        \
-               "       SETD [%0++],D1Ar1\n" FIXUP,             \
-               "       .long 4b,5b\n" TENTRY)
-
-#define __asm_copy_from_user_8(to, from, ret) \
-       __asm_copy_from_user_8x_cont(to, from, ret, "", "", "")
-
-#define __asm_copy_from_user_9(to, from, ret) \
-       __asm_copy_from_user_8x_cont(to, from, ret,     \
-               "       GETB D1Ar1,[%1++]\n"            \
-               "6:     SETB [%0++],D1Ar1\n",           \
-               "7:     ADD  %2,%2,#1\n"                \
-               "       SETB [%0++],D1Ar1\n",           \
-               "       .long 6b,7b\n")
-
-#define __asm_copy_from_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-       __asm_copy_from_user_8x_cont(to, from, ret,     \
-               "       GETW D1Ar1,[%1++]\n"            \
-               "6:     SETW [%0++],D1Ar1\n" COPY,      \
-               "7:     ADD  %2,%2,#2\n"                \
-               "       SETW [%0++],D1Ar1\n" FIXUP,     \
-               "       .long 6b,7b\n" TENTRY)
-
-#define __asm_copy_from_user_10(to, from, ret) \
-       __asm_copy_from_user_10x_cont(to, from, ret, "", "", "")
-
-#define __asm_copy_from_user_11(to, from, ret)         \
-       __asm_copy_from_user_10x_cont(to, from, ret,    \
-               "       GETB D1Ar1,[%1++]\n"            \
-               "8:     SETB [%0++],D1Ar1\n",           \
-               "9:     ADD  %2,%2,#1\n"                \
-               "       SETB [%0++],D1Ar1\n",           \
-               "       .long 8b,9b\n")
-
-#define __asm_copy_from_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-       __asm_copy_from_user_8x_cont(to, from, ret,     \
-               "       GETD D1Ar1,[%1++]\n"            \
-               "6:     SETD [%0++],D1Ar1\n" COPY,      \
-               "7:     ADD  %2,%2,#4\n"                \
-               "       SETD [%0++],D1Ar1\n" FIXUP,     \
-               "       .long 6b,7b\n" TENTRY)
-
-#define __asm_copy_from_user_12(to, from, ret) \
-       __asm_copy_from_user_12x_cont(to, from, ret, "", "", "")
-
-#define __asm_copy_from_user_13(to, from, ret) \
-       __asm_copy_from_user_12x_cont(to, from, ret,    \
-               "       GETB D1Ar1,[%1++]\n"            \
-               "8:     SETB [%0++],D1Ar1\n",           \
-               "9:     ADD  %2,%2,#1\n"                \
-               "       SETB [%0++],D1Ar1\n",           \
-               "       .long 8b,9b\n")
-
-#define __asm_copy_from_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-       __asm_copy_from_user_12x_cont(to, from, ret,    \
-               "       GETW D1Ar1,[%1++]\n"            \
-               "8:     SETW [%0++],D1Ar1\n" COPY,      \
-               "9:     ADD  %2,%2,#2\n"                \
-               "       SETW [%0++],D1Ar1\n" FIXUP,     \
-               "       .long 8b,9b\n" TENTRY)
-
-#define __asm_copy_from_user_14(to, from, ret) \
-       __asm_copy_from_user_14x_cont(to, from, ret, "", "", "")
-
-#define __asm_copy_from_user_15(to, from, ret) \
-       __asm_copy_from_user_14x_cont(to, from, ret,    \
-               "       GETB D1Ar1,[%1++]\n"            \
-               "10:    SETB [%0++],D1Ar1\n",           \
-               "11:    ADD  %2,%2,#1\n"                \
-               "       SETB [%0++],D1Ar1\n",           \
-               "       .long 10b,11b\n")
-
-#define __asm_copy_from_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-       __asm_copy_from_user_12x_cont(to, from, ret,    \
-               "       GETD D1Ar1,[%1++]\n"            \
-               "8:     SETD [%0++],D1Ar1\n" COPY,      \
-               "9:     ADD  %2,%2,#4\n"                \
-               "       SETD [%0++],D1Ar1\n" FIXUP,     \
-               "       .long 8b,9b\n" TENTRY)
-
-#define __asm_copy_from_user_16(to, from, ret) \
-       __asm_copy_from_user_16x_cont(to, from, ret, "", "", "")
-
  #define __asm_copy_from_user_8x64(to, from, ret) \
         asm volatile (                          \
                 "       GETL D0Ar2,D1Ar1,[%1++]\n"      \
                 "2:     SETL [%0++],D0Ar2,D1Ar1\n"      \
                 "1:\n"                                  \
                 "       .section .fixup,\"ax\"\n"       \
-               "       MOV D1Ar1,#0\n"                 \
-               "       MOV D0Ar2,#0\n"                 \
                 "3:     ADD  %2,%2,#8\n"                \
-               "       SETL [%0++],D0Ar2,D1Ar1\n"      \
                 "       MOVT    D0Ar2,#HI(1b)\n"        \
                 "       JUMP    D0Ar2,#LO(1b)\n"        \
                 "       .previous\n"                    \
@@ -789,36 +711,57 @@ EXPORT_SYMBOL(__copy_user);
   *
   *     Rationale:
   *             A fault occurs while reading from user buffer, which is the
- *             source. Since the fault is at a single address, we only
- *             need to rewind by 8 bytes.
+ *             source.
   *             Since we don't write to kernel buffer until we read first,
   *             the kernel buffer is at the right state and needn't be
- *             corrected.
+ *             corrected, but the source must be rewound to the beginning of
+ *             the block, which is LSM_STEP*8 bytes.
+ *             LSM_STEP is bits 10:8 in TXSTATUS which is already read
+ *             and stored in D0Ar2
+ *
+ *             NOTE: If a fault occurs at the last operation in M{G,S}ETL
+ *                     LSM_STEP will be 0. ie: we do 4 writes in our case, if
+ *                     a fault happens at the 4th write, LSM_STEP will be 0
+ *                     instead of 4. The code copes with that.
   */
  #define __asm_copy_from_user_64bit_rapf_loop(to, from, ret, n, id)     \
         __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id,           \
-               "SUB    %1, %1, #8\n")
+               "LSR    D0Ar2, D0Ar2, #5\n"                             \
+               "ANDS   D0Ar2, D0Ar2, #0x38\n"                          \
+               "ADDZ   D0Ar2, D0Ar2, #32\n"                            \
+               "SUB    %1, %1, D0Ar2\n")
  
  /*     rewind 'from' pointer when a fault occurs
   *
   *     Rationale:
   *             A fault occurs while reading from user buffer, which is the
- *             source. Since the fault is at a single address, we only
- *             need to rewind by 4 bytes.
+ *             source.
   *             Since we don't write to kernel buffer until we read first,
   *             the kernel buffer is at the right state and needn't be
- *             corrected.
+ *             corrected, but the source must be rewound to the beginning of
+ *             the block, which is LSM_STEP*4 bytes.
+ *             LSM_STEP is bits 10:8 in TXSTATUS which is already read
+ *             and stored in D0Ar2
+ *
+ *             NOTE: If a fault occurs at the last operation in M{G,S}ETL
+ *                     LSM_STEP will be 0. ie: we do 4 writes in our case, if
+ *                     a fault happens at the 4th write, LSM_STEP will be 0
+ *                     instead of 4. The code copes with that.
   */
  #define __asm_copy_from_user_32bit_rapf_loop(to, from, ret, n, id)     \
         __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id,           \
-               "SUB    %1, %1, #4\n")
+               "LSR    D0Ar2, D0Ar2, #6\n"                             \
+               "ANDS   D0Ar2, D0Ar2, #0x1c\n"                          \
+               "ADDZ   D0Ar2, D0Ar2, #16\n"                            \
+               "SUB    %1, %1, D0Ar2\n")
  
  
-/* Copy from user to kernel, zeroing the bytes that were inaccessible in
-   userland.  The return-value is the number of bytes that were
-   inaccessible.  */
-unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
-                                 unsigned long n)
+/*
+ * Copy from user to kernel. The return-value is the number of bytes that were
+ * inaccessible.
+ */
+unsigned long raw_copy_from_user(void *pdst, const void __user *psrc,
+                                unsigned long n)
  {
         register char *dst asm ("A0.2") = pdst;
         register const char __user *src asm ("A1.2") = psrc;
@@ -830,6 +773,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
         if ((unsigned long) src & 1) {
                 __asm_copy_from_user_1(dst, src, retn);
                 n--;
+               if (retn)
+                       return retn + n;
         }
         if ((unsigned long) dst & 1) {
                 /* Worst case - byte copy */
@@ -837,12 +782,14 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
                         __asm_copy_from_user_1(dst, src, retn);
                         n--;
                         if (retn)
-                               goto copy_exception_bytes;
+                               return retn + n;
                 }
         }
         if (((unsigned long) src & 2) && n >= 2) {
                 __asm_copy_from_user_2(dst, src, retn);
                 n -= 2;
+               if (retn)
+                       return retn + n;
         }
         if ((unsigned long) dst & 2) {
                 /* Second worst case - word copy */
@@ -850,16 +797,10 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
                         __asm_copy_from_user_2(dst, src, retn);
                         n -= 2;
                         if (retn)
-                               goto copy_exception_bytes;
+                               return retn + n;
                 }
         }
  
-       /* We only need one check after the unalignment-adjustments,
-          because if both adjustments were done, either both or
-          neither reference had an exception.  */
-       if (retn != 0)
-               goto copy_exception_bytes;
-
  #ifdef USE_RAPF
         /* 64 bit copy loop */
         if (!(((unsigned long) src | (unsigned long) dst) & 7)) {
@@ -872,7 +813,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
                         __asm_copy_from_user_8x64(dst, src, retn);
                         n -= 8;
                         if (retn)
-                               goto copy_exception_bytes;
+                               return retn + n;
                 }
         }
  
@@ -888,7 +829,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
                         __asm_copy_from_user_8x64(dst, src, retn);
                         n -= 8;
                         if (retn)
-                               goto copy_exception_bytes;
+                               return retn + n;
                 }
         }
  #endif
@@ -898,7 +839,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
                 n -= 4;
  
                 if (retn)
-                       goto copy_exception_bytes;
+                       return retn + n;
         }
  
         /* If we get here, there were no memory read faults.  */
@@ -924,21 +865,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
         /* If we get here, retn correctly reflects the number of failing
            bytes.  */
         return retn;
-
- copy_exception_bytes:
-       /* We already have "retn" bytes cleared, and need to clear the
-          remaining "n" bytes.  A non-optimized simple byte-for-byte in-line
-          memset is preferred here, since this isn't speed-critical code and
-          we'd rather have this a leaf-function than calling memset.  */
-       {
-               char *endp;
-               for (endp = dst + n; dst < endp; dst++)
-                       *dst = 0;
-       }
-
-       return retn + n;
  }
-EXPORT_SYMBOL(__copy_user_zeroing);
+EXPORT_SYMBOL(raw_copy_from_user);
  
  #define __asm_clear_8x64(to, ret) \
         asm volatile (                                  \
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h

index fd850879854dff3f79a73effd5fc782e9bd203d8..d506bb0893f94e67288fdb8a4b700749873e921b 100644 (file)
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t;
  #   else /* CONFIG_MMU */
  typedef struct { unsigned long ste[64]; }      pmd_t;
  typedef struct { pmd_t         pue[1]; }       pud_t;
-typedef struct { pud_t         pge[1]; }       pgd_t;
+typedef struct { pud_t         p4e[1]; }       p4d_t;
+typedef struct { p4d_t         pge[1]; }       pgd_t;
  #   endif /* CONFIG_MMU */
  
  # define pte_val(x)    ((x).pte)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig

index a008a9f03072deb900409ad2ef93a6bd65cdb48e..e0bb576410bbdf3bd85cea7a2bb6aae219656c56 100644 (file)
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1531,7 +1531,7 @@ config CPU_MIPS64_R6
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_MSA
         select GENERIC_CSUM
-       select MIPS_O32_FP64_SUPPORT if MIPS32_O32
+       select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32
         select HAVE_KVM
         help
           Choose this option to build a kernel for release 6 or later of the
diff --git a/arch/mips/Makefile b/arch/mips/Makefile

index 8ef9c02747fa95a753ea79b77ed2a177a60ac6ad..02a1787c888c09b0d36f0b9ea6bea0988c3134c2 100644 (file)
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -489,7 +489,7 @@ $(generic_defconfigs):
         $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
                 -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/generic_defconfig $^ \
                 $(foreach board,$(BOARDS),$(generic_config_dir)/board-$(board).config)
-       $(Q)$(MAKE) olddefconfig
+       $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
  
  #
  # Prevent generic merge_config rules attempting to merge single fragments
@@ -503,8 +503,8 @@ $(generic_config_dir)/%.config: ;
  #
  .PHONY: sead3_defconfig
  sead3_defconfig:
-       $(Q)$(MAKE) 32r2el_defconfig BOARDS=sead-3
+       $(Q)$(MAKE) -f $(srctree)/Makefile 32r2el_defconfig BOARDS=sead-3
  
  .PHONY: sead3micro_defconfig
  sead3micro_defconfig:
-       $(Q)$(MAKE) micro32r2el_defconfig BOARDS=sead-3
+       $(Q)$(MAKE) -f $(srctree)/Makefile micro32r2el_defconfig BOARDS=sead-3
diff --git a/arch/mips/cavium-octeon/cpu.c b/arch/mips/cavium-octeon/cpu.c

index a5b427909b5cac04d28c4da1b099342ee72df4ce..036d56cc459168a0087ce8eba3e69aae9f9197b6 100644 (file)
--- a/arch/mips/cavium-octeon/cpu.c
+++ b/arch/mips/cavium-octeon/cpu.c
@@ -10,7 +10,9 @@
  #include <linux/irqflags.h>
  #include <linux/notifier.h>
  #include <linux/prefetch.h>
+#include <linux/ptrace.h>
  #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
  
  #include <asm/cop2.h>
  #include <asm/current.h>
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c

index 4d22365844af30b9bdd24cfd521b6a8e0a7f7cff..cfb4a146cf1786a73a28a781f26b363757164968 100644 (file)
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
@@ -9,6 +9,7 @@
  #include <asm/cop2.h>
  #include <linux/export.h>
  #include <linux/interrupt.h>
+#include <linux/sched/task_stack.h>
  
  #include "octeon-crypto.h"
  
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c

index 4b94b7fbafa3602374477c99fd76acde79945565..3de786545ded10ac64f5fa77d2025fa3a9955fd9 100644 (file)
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -12,6 +12,7 @@
  #include <linux/kernel_stat.h>
  #include <linux/sched.h>
  #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
  #include <linux/init.h>
  #include <linux/export.h>
  
diff --git a/arch/mips/include/asm/asm-prototypes.h b/arch/mips/include/asm/asm-prototypes.h

index a160cf69bb92d3ec35ec20236015b8405f29e0a3..6e28971fe73ad7e8ec0ab07c5e6e87b68d76f857 100644 (file)
--- a/arch/mips/include/asm/asm-prototypes.h
+++ b/arch/mips/include/asm/asm-prototypes.h
@@ -3,3 +3,4 @@
  #include <asm/fpu.h>
  #include <asm-generic/asm-prototypes.h>
  #include <asm/uaccess.h>
+#include <asm/ftrace.h>
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h

index 321752bcbab6ec5fc6fa1d6a658b82bb1a58e7ed..a2813fe381cf5442ca5a4fb2a3f22d619d7ef557 100644 (file)
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -12,6 +12,7 @@
  
  #include <linux/sched.h>
  #include <linux/sched/task_stack.h>
+#include <linux/ptrace.h>
  #include <linux/thread_info.h>
  #include <linux/bitops.h>
  
@@ -20,6 +21,7 @@
  #include <asm/cpu-features.h>
  #include <asm/fpu_emulator.h>
  #include <asm/hazards.h>
+#include <asm/ptrace.h>
  #include <asm/processor.h>
  #include <asm/current.h>
  #include <asm/msa.h>
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h

index 956db6e201d1877ca6f6b3f8d48fc9ecd6799396..ddd1c918103bcc347c6a350d28d213ba314a90d9 100644 (file)
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -18,9 +18,24 @@
  #include <irq.h>
  
  #define IRQ_STACK_SIZE                 THREAD_SIZE
+#define IRQ_STACK_START                        (IRQ_STACK_SIZE - sizeof(unsigned long))
  
  extern void *irq_stack[NR_CPUS];
  
+/*
+ * The highest address on the IRQ stack contains a dummy frame put down in
+ * genex.S (handle_int & except_vec_vi_handler) which is structured as follows:
+ *
+ *   top ------------
+ *       | task sp  | <- irq_stack[cpu] + IRQ_STACK_START
+ *       ------------
+ *       |          | <- First frame of IRQ context
+ *       ------------
+ *
+ * task sp holds a copy of the task stack pointer where the struct pt_regs
+ * from exception entry can be found.
+ */
+
  static inline bool on_irq_stack(int cpu, unsigned long sp)
  {
         unsigned long low = (unsigned long)irq_stack[cpu];
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h

index d21f3da7bdb619402a438b923fda454b7525d204..6f94bed571c4416b917a52fe364172243a3a9fe6 100644 (file)
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -16,6 +16,7 @@
  #include <asm/cachectl.h>
  #include <asm/fixmap.h>
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  extern int temp_tlb_entry;
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h

index 514cbc0a6a6760fd84d77c3760aa9ce2ac682fd5..130a2a6c153156bd311e6646bbfd3b92fc6f7228 100644 (file)
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -17,6 +17,7 @@
  #include <asm/cachectl.h>
  #include <asm/fixmap.h>
  
+#define __ARCH_USE_5LEVEL_HACK
  #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
  #include <asm-generic/pgtable-nopmd.h>
  #else
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h

index f485afe51514765710eaf990b674e92378effe61..a8df44d60607baf6df350fdaf7ba6eaeefd2ff56 100644 (file)
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -127,7 +127,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
                 "       andi    %[ticket], %[ticket], 0xffff            \n"
                 "       bne     %[ticket], %[my_ticket], 4f             \n"
                 "        subu   %[ticket], %[my_ticket], %[ticket]      \n"
-               "2:                                                     \n"
+               "2:     .insn                                           \n"
                 "       .subsection 2                                   \n"
                 "4:     andi    %[ticket], %[ticket], 0xffff            \n"
                 "       sll     %[ticket], 5                            \n"
@@ -202,7 +202,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
                 "       sc      %[ticket], %[ticket_ptr]                \n"
                 "       beqz    %[ticket], 1b                           \n"
                 "        li     %[ticket], 1                            \n"
-               "2:                                                     \n"
+               "2:     .insn                                           \n"
                 "       .subsection 2                                   \n"
                 "3:     b       2b                                      \n"
                 "        li     %[ticket], 0                            \n"
@@ -382,7 +382,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
                 "       .set    reorder                                 \n"
                 __WEAK_LLSC_MB
                 "       li      %2, 1                                   \n"
-               "2:                                                     \n"
+               "2:     .insn                                           \n"
                 : "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
                 : GCC_OFF_SMALL_ASM() (rw->lock)
                 : "memory");
@@ -422,7 +422,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
                         "       lui     %1, 0x8000                      \n"
                         "       sc      %1, %0                          \n"
                         "       li      %2, 1                           \n"
-                       "2:                                             \n"
+                       "2:     .insn                                   \n"
                         : "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp),
                           "=&r" (ret)
                         : GCC_OFF_SMALL_ASM() (rw->lock)
diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h

index 3e940dbe02629ad6916f1870d2bf5a57086e89b8..78faf4292e907c175e0ff70e961a1c768f6df7c0 100644 (file)
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -386,17 +386,18 @@
  #define __NR_pkey_mprotect             (__NR_Linux + 363)
  #define __NR_pkey_alloc                        (__NR_Linux + 364)
  #define __NR_pkey_free                 (__NR_Linux + 365)
+#define __NR_statx                     (__NR_Linux + 366)
  
  
  /*
   * Offset of the last Linux o32 flavoured syscall
   */
-#define __NR_Linux_syscalls            365
+#define __NR_Linux_syscalls            366
  
  #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
  
  #define __NR_O32_Linux                 4000
-#define __NR_O32_Linux_syscalls                365
+#define __NR_O32_Linux_syscalls                366
  
  #if _MIPS_SIM == _MIPS_SIM_ABI64
  
@@ -730,16 +731,17 @@
  #define __NR_pkey_mprotect             (__NR_Linux + 323)
  #define __NR_pkey_alloc                        (__NR_Linux + 324)
  #define __NR_pkey_free                 (__NR_Linux + 325)
+#define __NR_statx                     (__NR_Linux + 326)
  
  /*
   * Offset of the last Linux 64-bit flavoured syscall
   */
-#define __NR_Linux_syscalls            325
+#define __NR_Linux_syscalls            326
  
  #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
  
  #define __NR_64_Linux                  5000
-#define __NR_64_Linux_syscalls         325
+#define __NR_64_Linux_syscalls         326
  
  #if _MIPS_SIM == _MIPS_SIM_NABI32
  
@@ -1077,15 +1079,16 @@
  #define __NR_pkey_mprotect             (__NR_Linux + 327)
  #define __NR_pkey_alloc                        (__NR_Linux + 328)
  #define __NR_pkey_free                 (__NR_Linux + 329)
+#define __NR_statx                     (__NR_Linux + 330)
  
  /*
   * Offset of the last N32 flavoured syscall
   */
-#define __NR_Linux_syscalls            329
+#define __NR_Linux_syscalls            330
  
  #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
  
  #define __NR_N32_Linux                 6000
-#define __NR_N32_Linux_syscalls                329
+#define __NR_N32_Linux_syscalls                330
  
  #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c

index bb5c5d34ba8152459eb4f9bfeace3e04315b7528..a670c0c11875d28e6b18d163066934aa27259139 100644 (file)
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -102,6 +102,7 @@ void output_thread_info_defines(void)
         DEFINE(_THREAD_SIZE, THREAD_SIZE);
         DEFINE(_THREAD_MASK, THREAD_MASK);
         DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
+       DEFINE(_IRQ_STACK_START, IRQ_STACK_START);
         BLANK();
  }
  
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c

index 804d2a2a19fe03175aa6ad6fa56f91238591f493..dd6a18bc10abd0c34a6717fe12bb7169a4589231 100644 (file)
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -80,7 +80,7 @@ static unsigned int calculate_min_delta(void)
                 }
  
                 /* Sorted insert of 75th percentile into buf2 */
-               for (k = 0; k < i; ++k) {
+               for (k = 0; k < i && k < ARRAY_SIZE(buf2); ++k) {
                         if (buf1[ARRAY_SIZE(buf1) - 1] < buf2[k]) {
                                 l = min_t(unsigned int,
                                           i, ARRAY_SIZE(buf2) - 1);
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S

index 59476a607adda07c5be321b8f21653668c1555b8..a00e87b0256d3d2a031ac1300239804edfcc8604 100644 (file)
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -361,7 +361,7 @@ LEAF(mips_cps_get_bootcfg)
         END(mips_cps_get_bootcfg)
  
  LEAF(mips_cps_boot_vpes)
-       PTR_L   ta2, COREBOOTCFG_VPEMASK(a0)
+       lw      ta2, COREBOOTCFG_VPEMASK(a0)
         PTR_L   ta3, COREBOOTCFG_VPECONFIG(a0)
  
  #if defined(CONFIG_CPU_MIPSR6)
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c

index 07718bb5fc9d8612d98cc78d46a7ca10851f8245..12422fd4af2335dab2f90f1b0b47684a5b08e377 100644 (file)
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1824,7 +1824,7 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
                 }
  
                 decode_configs(c);
-               c->options |= MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
+               c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
                 c->writecombine = _CACHE_UNCACHED_ACCELERATED;
                 break;
         default:
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c

index 6430bff21fff80a7bb6647c6af0198364cac7bdd..5c429d70e17f6f24cbcfd0fa912c67eccd11f28f 100644 (file)
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -257,7 +257,7 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
         else if ((prog_req.fr1 && prog_req.frdefault) ||
                  (prog_req.single && !prog_req.frdefault))
                 /* Make sure 64-bit MIPS III/IV/64R1 will not pick FR1 */
-               state->overall_fp_mode = ((current_cpu_data.fpu_id & MIPS_FPIR_F64) &&
+               state->overall_fp_mode = ((raw_current_cpu_data.fpu_id & MIPS_FPIR_F64) &&
                                           cpu_has_mips_r2_r6) ?
                                           FP_FR1 : FP_FR0;
         else if (prog_req.fr1)
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S

index 7ec9612cb0078a4c6ca9825f2e5f2a80e05d3508..ae810da4d499e6282638d8edadb722078b5f04cb 100644 (file)
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -215,9 +215,11 @@ NESTED(handle_int, PT_SIZE, sp)
         beq     t0, t1, 2f
  
         /* Switch to IRQ stack */
-       li      t1, _IRQ_STACK_SIZE
+       li      t1, _IRQ_STACK_START
         PTR_ADD sp, t0, t1
  
+       /* Save task's sp on IRQ stack so that unwinding can follow it */
+       LONG_S  s1, 0(sp)
  2:
         jal     plat_irq_dispatch
  
@@ -325,9 +327,11 @@ NESTED(except_vec_vi_handler, 0, sp)
         beq     t0, t1, 2f
  
         /* Switch to IRQ stack */
-       li      t1, _IRQ_STACK_SIZE
+       li      t1, _IRQ_STACK_START
         PTR_ADD sp, t0, t1
  
+       /* Save task's sp on IRQ stack so that unwinding can follow it */
+       LONG_S  s1, 0(sp)
  2:
         jalr    v0
  
@@ -519,7 +523,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
         BUILD_HANDLER reserved reserved sti verbose     /* others */
  
         .align  5
-       LEAF(handle_ri_rdhwr_vivt)
+       LEAF(handle_ri_rdhwr_tlbp)
         .set    push
         .set    noat
         .set    noreorder
@@ -538,7 +542,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
         .set    pop
         bltz    k1, handle_ri   /* slow path */
         /* fall thru */
-       END(handle_ri_rdhwr_vivt)
+       END(handle_ri_rdhwr_tlbp)
  
         LEAF(handle_ri_rdhwr)
         .set    push
diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c

index 1f4bd222ba765788fe8e9c1f14e382c59b60df62..eb6c0d582626b114fcb8d30f9fb28ee3472e8cc9 100644 (file)
--- a/arch/mips/kernel/kgdb.c
+++ b/arch/mips/kernel/kgdb.c
@@ -244,9 +244,6 @@ static int compute_signal(int tt)
  void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
  {
         int reg;
-       struct thread_info *ti = task_thread_info(p);
-       unsigned long ksp = (unsigned long)ti + THREAD_SIZE - 32;
-       struct pt_regs *regs = (struct pt_regs *)ksp - 1;
  #if (KGDB_GDB_REG_SIZE == 32)
         u32 *ptr = (u32 *)gdb_regs;
  #else
@@ -254,25 +251,46 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
  #endif
  
         for (reg = 0; reg < 16; reg++)
-               *(ptr++) = regs->regs[reg];
+               *(ptr++) = 0;
  
         /* S0 - S7 */
-       for (reg = 16; reg < 24; reg++)
-               *(ptr++) = regs->regs[reg];
+       *(ptr++) = p->thread.reg16;
+       *(ptr++) = p->thread.reg17;
+       *(ptr++) = p->thread.reg18;
+       *(ptr++) = p->thread.reg19;
+       *(ptr++) = p->thread.reg20;
+       *(ptr++) = p->thread.reg21;
+       *(ptr++) = p->thread.reg22;
+       *(ptr++) = p->thread.reg23;
  
         for (reg = 24; reg < 28; reg++)
                 *(ptr++) = 0;
  
         /* GP, SP, FP, RA */
-       for (reg = 28; reg < 32; reg++)
-               *(ptr++) = regs->regs[reg];
-
-       *(ptr++) = regs->cp0_status;
-       *(ptr++) = regs->lo;
-       *(ptr++) = regs->hi;
-       *(ptr++) = regs->cp0_badvaddr;
-       *(ptr++) = regs->cp0_cause;
-       *(ptr++) = regs->cp0_epc;
+       *(ptr++) = (long)p;
+       *(ptr++) = p->thread.reg29;
+       *(ptr++) = p->thread.reg30;
+       *(ptr++) = p->thread.reg31;
+
+       *(ptr++) = p->thread.cp0_status;
+
+       /* lo, hi */
+       *(ptr++) = 0;
+       *(ptr++) = 0;
+
+       /*
+        * BadVAddr, Cause
+        * Ideally these would come from the last exception frame up the stack
+        * but that requires unwinding, otherwise we can't know much for sure.
+        */
+       *(ptr++) = 0;
+       *(ptr++) = 0;
+
+       /*
+        * PC
+        * use return address (RA), i.e. the moment after return from resume()
+        */
+       *(ptr++) = p->thread.reg31;
  }
  
  void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c

index 8c35b3152e1eb35cab7f8f605fe48163310b5024..9452b02ce0797e7ac890d1785f50907d9e747b5d 100644 (file)
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1446,6 +1446,11 @@ static int mipsxx_pmu_handle_shared_irq(void)
         HANDLE_COUNTER(0)
         }
  
+#ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS
+       read_unlock(&pmuint_rwlock);
+#endif
+       resume_local_counters();
+
         /*
          * Do all the work for the pending perf events. We can do this
          * in here because the performance counter interrupt is a regular
@@ -1454,10 +1459,6 @@ static int mipsxx_pmu_handle_shared_irq(void)
         if (handled == IRQ_HANDLED)
                 irq_work_run();
  
-#ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS
-       read_unlock(&pmuint_rwlock);
-#endif
-       resume_local_counters();
         return handled;
  }
  
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c

index fb6b6b650719adf6943c9e8ed6524d7ccfbd5983..b68e10fc453d113dbdc67f11f2a7e59cb582d5a1 100644 (file)
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -488,31 +488,52 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
                                               unsigned long pc,
                                               unsigned long *ra)
  {
+       unsigned long low, high, irq_stack_high;
         struct mips_frame_info info;
         unsigned long size, ofs;
+       struct pt_regs *regs;
         int leaf;
-       extern void ret_from_irq(void);
-       extern void ret_from_exception(void);
  
         if (!stack_page)
                 return 0;
  
         /*
-        * If we reached the bottom of interrupt context,
-        * return saved pc in pt_regs.
+        * IRQ stacks start at IRQ_STACK_START
+        * task stacks at THREAD_SIZE - 32
          */
-       if (pc == (unsigned long)ret_from_irq ||
-           pc == (unsigned long)ret_from_exception) {
-               struct pt_regs *regs;
-               if (*sp >= stack_page &&
-                   *sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) {
-                       regs = (struct pt_regs *)*sp;
-                       pc = regs->cp0_epc;
-                       if (!user_mode(regs) && __kernel_text_address(pc)) {
-                               *sp = regs->regs[29];
-                               *ra = regs->regs[31];
-                               return pc;
-                       }
+       low = stack_page;
+       if (!preemptible() && on_irq_stack(raw_smp_processor_id(), *sp)) {
+               high = stack_page + IRQ_STACK_START;
+               irq_stack_high = high;
+       } else {
+               high = stack_page + THREAD_SIZE - 32;
+               irq_stack_high = 0;
+       }
+
+       /*
+        * If we reached the top of the interrupt stack, start unwinding
+        * the interrupted task stack.
+        */
+       if (unlikely(*sp == irq_stack_high)) {
+               unsigned long task_sp = *(unsigned long *)*sp;
+
+               /*
+                * Check that the pointer saved in the IRQ stack head points to
+                * something within the stack of the current task
+                */
+               if (!object_is_on_stack((void *)task_sp))
+                       return 0;
+
+               /*
+                * Follow pointer to tasks kernel stack frame where interrupted
+                * state was saved.
+                */
+               regs = (struct pt_regs *)task_sp;
+               pc = regs->cp0_epc;
+               if (!user_mode(regs) && __kernel_text_address(pc)) {
+                       *sp = regs->regs[29];
+                       *ra = regs->regs[31];
+                       return pc;
                 }
                 return 0;
         }
@@ -533,8 +554,7 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
         if (leaf < 0)
                 return 0;
  
-       if (*sp < stack_page ||
-           *sp + info.frame_size > stack_page + THREAD_SIZE - 32)
+       if (*sp < low || *sp + info.frame_size > high)
                 return 0;
  
         if (leaf)
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c

index 339601267265394d19e92deef167167375600e82..6931fe722a0b54dcfa5078c8686160018c1e6c64 100644 (file)
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -456,7 +456,8 @@ static int fpr_set(struct task_struct *target,
                                           &target->thread.fpu,
                                           0, sizeof(elf_fpregset_t));
  
-       for (i = 0; i < NUM_FPU_REGS; i++) {
+       BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+       for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
                 err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
                                          &fpr_val, i * sizeof(elf_fpreg_t),
                                          (i + 1) * sizeof(elf_fpreg_t));
diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c

index 9103bebc9a8eef76e3d524b63b74aa91936f3663..2d1a0c4387713c7862e06bbf79e8c904a07b2279 100644 (file)
--- a/arch/mips/kernel/relocate.c
+++ b/arch/mips/kernel/relocate.c
@@ -18,7 +18,7 @@
  #include <linux/kernel.h>
  #include <linux/libfdt.h>
  #include <linux/of_fdt.h>
-#include <linux/sched.h>
+#include <linux/sched/task.h>
  #include <linux/start_kernel.h>
  #include <linux/string.h>
  #include <linux/printk.h>
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S

index c29d397eee86cf48a05d7945efe3cdf4a38a7273..80ed68b2c95e4161ffe6f8f6e6fd0fd5855efb73 100644 (file)
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -600,3 +600,4 @@ EXPORT(sys_call_table)
         PTR     sys_pkey_mprotect
         PTR     sys_pkey_alloc
         PTR     sys_pkey_free                   /* 4365 */
+       PTR     sys_statx
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S

index 0687f96ee912698285a92abde87a7376897f076b..49765b44aa9b3bfaf923710d1c95ea0390d6b0f5 100644 (file)
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -438,4 +438,5 @@ EXPORT(sys_call_table)
         PTR     sys_pkey_mprotect
         PTR     sys_pkey_alloc
         PTR     sys_pkey_free                   /* 5325 */
+       PTR     sys_statx
         .size   sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S

index 0331ba39a065b8530818093d7b707921242a3672..90bad2d1b2d3e2f62afa505f14836d510a5c077b 100644 (file)
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -433,4 +433,5 @@ EXPORT(sysn32_call_table)
         PTR     sys_pkey_mprotect
         PTR     sys_pkey_alloc
         PTR     sys_pkey_free
+       PTR     sys_statx                       /* 6330 */
         .size   sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S

index 5a47042dd25f7ae7f93cee8a596f311bf17a9382..2dd70bd104e1a0ff816f87a6d1ded9f852a95de6 100644 (file)
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -588,4 +588,5 @@ EXPORT(sys32_call_table)
         PTR     sys_pkey_mprotect
         PTR     sys_pkey_alloc
         PTR     sys_pkey_free                   /* 4365 */
+       PTR     sys_statx
         .size   sys32_call_table,.-sys32_call_table
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c

index 3daa2cae50b0b976942772c7c10656d8f4ffd3ab..1b070a76fcdd4c2e5f62a84bfff8e40ad0a8721b 100644 (file)
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -11,6 +11,7 @@
  #include <linux/init.h>
  #include <linux/sched.h>
  #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
  #include <linux/mm.h>
  #include <linux/delay.h>
  #include <linux/smp.h>
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c

index 6d45f05538c8b37db5fff707d50daa42ef8b097a..795b4aaf89277be344e2e3e512a3642fb3085f5a 100644 (file)
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -422,13 +422,12 @@ void play_dead(void)
         local_irq_disable();
         idle_task_exit();
         cpu = smp_processor_id();
+       core = cpu_data[cpu].core;
         cpu_death = CPU_DEATH_POWER;
  
         pr_debug("CPU%d going offline\n", cpu);
  
         if (cpu_has_mipsmt || cpu_has_vp) {
-               core = cpu_data[cpu].core;
-
                 /* Look for another online VPE within the core */
                 for_each_online_cpu(cpu_death_sibling) {
                         if (cpu_data[cpu_death_sibling].core != core)
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c

index e077ea3e11fb36ee2d5f85f7e8415c97eeead1d9..e398cbc3d7767d50287d4c1e267b4e3e990efa4e 100644 (file)
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -23,6 +23,7 @@
  #include <linux/interrupt.h>
  #include <linux/irqchip/mips-gic.h>
  #include <linux/compiler.h>
+#include <linux/sched/task_stack.h>
  #include <linux/smp.h>
  
  #include <linux/atomic.h>
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c

index c7d17cfb32f67877cfd62d10c2e88181dcb74014..b49e7bf9f95023fe5837b7d7ed317693ef8ac0a2 100644 (file)
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -83,7 +83,7 @@ extern asmlinkage void handle_dbe(void);
  extern asmlinkage void handle_sys(void);
  extern asmlinkage void handle_bp(void);
  extern asmlinkage void handle_ri(void);
-extern asmlinkage void handle_ri_rdhwr_vivt(void);
+extern asmlinkage void handle_ri_rdhwr_tlbp(void);
  extern asmlinkage void handle_ri_rdhwr(void);
  extern asmlinkage void handle_cpu(void);
  extern asmlinkage void handle_ov(void);
@@ -2408,9 +2408,18 @@ void __init trap_init(void)
  
         set_except_vector(EXCCODE_SYS, handle_sys);
         set_except_vector(EXCCODE_BP, handle_bp);
-       set_except_vector(EXCCODE_RI, rdhwr_noopt ? handle_ri :
-                         (cpu_has_vtag_icache ?
-                          handle_ri_rdhwr_vivt : handle_ri_rdhwr));
+
+       if (rdhwr_noopt)
+               set_except_vector(EXCCODE_RI, handle_ri);
+       else {
+               if (cpu_has_vtag_icache)
+                       set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
+               else if (current_cpu_type() == CPU_LOONGSON3)
+                       set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
+               else
+                       set_except_vector(EXCCODE_RI, handle_ri_rdhwr);
+       }
+
         set_except_vector(EXCCODE_CPU, handle_cpu);
         set_except_vector(EXCCODE_OV, handle_ov);
         set_except_vector(EXCCODE_TR, handle_tr);
diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c

index 3c3aa05891dd78dd336ca80524870e21df6380ff..95bec460b651fd1cdad1b1e262463408742c7795 100644 (file)
--- a/arch/mips/lantiq/xway/sysctrl.c
+++ b/arch/mips/lantiq/xway/sysctrl.c
@@ -467,7 +467,7 @@ void __init ltq_soc_init(void)
  
                 if (!np_xbar)
                         panic("Failed to load xbar nodes from devicetree");
-               if (of_address_to_resource(np_pmu, 0, &res_xbar))
+               if (of_address_to_resource(np_xbar, 0, &res_xbar))
                         panic("Failed to get xbar resources");
                 if (!request_mem_region(res_xbar.start, resource_size(&res_xbar),
                         res_xbar.name))
diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/loongson-3/cop2-ex.c

index ea13764d0a035ccc5ec6cf443f021aa1f4f58103..621d6af5f6eb8ecc9e602ee085ceb39412678cc3 100644 (file)
--- a/arch/mips/loongson64/loongson-3/cop2-ex.c
+++ b/arch/mips/loongson64/loongson-3/cop2-ex.c
@@ -13,6 +13,7 @@
  #include <linux/init.h>
  #include <linux/sched.h>
  #include <linux/notifier.h>
+#include <linux/ptrace.h>
  
  #include <asm/fpu.h>
  #include <asm/cop2.h>
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c

index e7f798d55fbcca06eaa0fe07a6822c3aedd18cc8..3fe99cb271a9cad44c55dfbf841b9de0e9093974 100644 (file)
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1562,6 +1562,7 @@ static void probe_vcache(void)
         vcache_size = c->vcache.sets * c->vcache.ways * c->vcache.linesz;
  
         c->vcache.waybit = 0;
+       c->vcache.waysize = vcache_size / c->vcache.ways;
  
         pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n",
                 vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz);
@@ -1664,6 +1665,7 @@ static void __init loongson3_sc_init(void)
         /* Loongson-3 has 4 cores, 1MB scache for each. scaches are shared */
         scache_size *= 4;
         c->scache.waybit = 0;
+       c->scache.waysize = scache_size / c->scache.ways;
         pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n",
                scache_size >> 10, way_string[c->scache.ways], c->scache.linesz);
         if (scache_size)
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c

index 9bfee8988eaf11a8618ca4f6ce101a8b70ad2823..4f642e07c2b198b3c9e46e3bc9f64bbe2bcb6606 100644 (file)
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -760,7 +760,8 @@ static void build_huge_update_entries(u32 **p, unsigned int pte,
  static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r,
                                     struct uasm_label **l,
                                     unsigned int pte,
-                                   unsigned int ptr)
+                                   unsigned int ptr,
+                                   unsigned int flush)
  {
  #ifdef CONFIG_SMP
         UASM_i_SC(p, pte, 0, ptr);
@@ -769,6 +770,22 @@ static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r,
  #else
         UASM_i_SW(p, pte, 0, ptr);
  #endif
+       if (cpu_has_ftlb && flush) {
+               BUG_ON(!cpu_has_tlbinv);
+
+               UASM_i_MFC0(p, ptr, C0_ENTRYHI);
+               uasm_i_ori(p, ptr, ptr, MIPS_ENTRYHI_EHINV);
+               UASM_i_MTC0(p, ptr, C0_ENTRYHI);
+               build_tlb_write_entry(p, l, r, tlb_indexed);
+
+               uasm_i_xori(p, ptr, ptr, MIPS_ENTRYHI_EHINV);
+               UASM_i_MTC0(p, ptr, C0_ENTRYHI);
+               build_huge_update_entries(p, pte, ptr);
+               build_huge_tlb_write_entry(p, l, r, pte, tlb_random, 0);
+
+               return;
+       }
+
         build_huge_update_entries(p, pte, ptr);
         build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0);
  }
@@ -2199,7 +2216,7 @@ static void build_r4000_tlb_load_handler(void)
                 uasm_l_tlbl_goaround2(&l, p);
         }
         uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID));
-       build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
+       build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1);
  #endif
  
         uasm_l_nopage_tlbl(&l, p);
@@ -2254,7 +2271,7 @@ static void build_r4000_tlb_store_handler(void)
         build_tlb_probe_entry(&p);
         uasm_i_ori(&p, wr.r1, wr.r1,
                    _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
-       build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
+       build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1);
  #endif
  
         uasm_l_nopage_tlbs(&l, p);
@@ -2310,7 +2327,7 @@ static void build_r4000_tlb_modify_handler(void)
         build_tlb_probe_entry(&p);
         uasm_i_ori(&p, wr.r1, wr.r1,
                    _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
-       build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
+       build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 0);
  #endif
  
         uasm_l_nopage_tlbm(&l, p);
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c

index cb675ec6f283ee9d08071e9b112845bddd5c1594..54f56d5a96c46ec8c3b2b1a6e8bcdc06dbf5e02a 100644 (file)
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -232,6 +232,17 @@ void __init arch_init_irq(void)
  {
         int corehi_irq;
  
+       /*
+        * Preallocate the i8259's expected virq's here. Since irqchip_init()
+        * will probe the irqchips in hierarchial order, i8259 is probed last.
+        * If anything allocates a virq before the i8259 is probed, it will
+        * be given one of the i8259's expected range and consequently setup
+        * of the i8259 will fail.
+        */
+       WARN(irq_alloc_descs(I8259A_IRQ_BASE, I8259A_IRQ_BASE,
+                           16, numa_node_id()) < 0,
+               "Cannot reserve i8259 virqs at IRQ%d\n", I8259A_IRQ_BASE);
+
         i8259_set_poll(mips_pcibios_iack);
         irqchip_init();
  
diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c

index 10d86d54880ab8541eecf01f8d1f0dd2b3d6ee18..bddf1ef553a4f695d320d4f88eb7f28b079cec94 100644 (file)
--- a/arch/mips/netlogic/common/smp.c
+++ b/arch/mips/netlogic/common/smp.c
@@ -35,6 +35,7 @@
  #include <linux/kernel.h>
  #include <linux/delay.h>
  #include <linux/init.h>
+#include <linux/sched/task_stack.h>
  #include <linux/smp.h>
  #include <linux/irq.h>
  
diff --git a/arch/mips/netlogic/xlp/cop2-ex.c b/arch/mips/netlogic/xlp/cop2-ex.c

index 52bc5de420052ca4e604cd39a8ff4bfcbbba2fa1..21e439b3db707f4119229954e1eced768dd5162d 100644 (file)
--- a/arch/mips/netlogic/xlp/cop2-ex.c
+++ b/arch/mips/netlogic/xlp/cop2-ex.c
@@ -9,11 +9,14 @@
   * Copyright (C) 2009 Wind River Systems,
   *   written by Ralf Baechle <ralf@linux-mips.org>
   */
+#include <linux/capability.h>
  #include <linux/init.h>
  #include <linux/irqflags.h>
  #include <linux/notifier.h>
  #include <linux/prefetch.h>
+#include <linux/ptrace.h>
  #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
  
  #include <asm/cop2.h>
  #include <asm/current.h>
diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c

index 014649be158d95f0cce86493ac7186b7ceec73d8..3a84f6c0c840569aa4f9db2d15b31c8079cfd39e 100644 (file)
--- a/arch/mips/pci/pci-legacy.c
+++ b/arch/mips/pci/pci-legacy.c
@@ -190,7 +190,7 @@ void register_pci_controller(struct pci_controller *hose)
         }
  
         INIT_LIST_HEAD(&hose->list);
-       list_add(&hose->list, &controllers);
+       list_add_tail(&hose->list, &controllers);
  
         /*
          * Do not panic here but later - this might happen before console init.
diff --git a/arch/mips/ralink/rt3883.c b/arch/mips/ralink/rt3883.c

index c4ffd43d3996ac26af1afa42c9c8a876f7fc564c..48ce701557a451826fba7c7116420b731b7874e6 100644 (file)
--- a/arch/mips/ralink/rt3883.c
+++ b/arch/mips/ralink/rt3883.c
@@ -35,7 +35,7 @@ static struct rt2880_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) };
  static struct rt2880_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) };
  static struct rt2880_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) };
  static struct rt2880_pmx_func lna_a_func[] = { FUNC("lna a", 0, 32, 3) };
-static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna a", 0, 35, 3) };
+static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna g", 0, 35, 3) };
  static struct rt2880_pmx_func pci_func[] = {
         FUNC("pci-dev", 0, 40, 32),
         FUNC("pci-host2", 1, 40, 32),
@@ -43,7 +43,7 @@ static struct rt2880_pmx_func pci_func[] = {
         FUNC("pci-fnc", 3, 40, 32)
  };
  static struct rt2880_pmx_func ge1_func[] = { FUNC("ge1", 0, 72, 12) };
-static struct rt2880_pmx_func ge2_func[] = { FUNC("ge1", 0, 84, 12) };
+static struct rt2880_pmx_func ge2_func[] = { FUNC("ge2", 0, 84, 12) };
  
  static struct rt2880_pmx_group rt3883_pinmux_data[] = {
         GRP("i2c", i2c_func, 1, RT3883_GPIO_MODE_I2C),
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c

index 1f2a5bc4779e6a356a43f179608b98d605a8c343..75460e1e106b2c08799354139b869481d0b23abb 100644 (file)
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -9,6 +9,7 @@
  #include <linux/kernel.h>
  #include <linux/sched.h>
  #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
  #include <linux/seq_file.h>
  
  #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c

index d12879eb2b1fa9f0b58cfc03fbd51d8ea2394134..83efe03d5c600f695d073279c1d594a7ca60b471 100644 (file)
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -12,7 +12,9 @@
  #include <linux/signal.h>      /* for SIGBUS */
  #include <linux/sched.h>       /* schow_regs(), force_sig() */
  #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
  
+#include <asm/ptrace.h>
  #include <asm/sn/addrs.h>
  #include <asm/sn/arch.h>
  #include <asm/sn/sn0/hub.h>
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c

index f5ed45e8f442565ce0f7435585cff684ece5d764..4cd47d23d81a76105b5a5fc21a768cc50d478879 100644 (file)
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -8,10 +8,13 @@
   */
  #include <linux/init.h>
  #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
  #include <linux/topology.h>
  #include <linux/nodemask.h>
+
  #include <asm/page.h>
  #include <asm/processor.h>
+#include <asm/ptrace.h>
  #include <asm/sn/arch.h>
  #include <asm/sn/gda.h>
  #include <asm/sn/intr.h>
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c

index 57d8c7486fe6b75b2f903fbd2673d869c9d63001..c1f12a9cf305f4196bd23cbe01b7dc73dc852b26 100644 (file)
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -11,6 +11,7 @@
  #include <linux/kernel.h>
  #include <linux/sched.h>
  #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
  #include <asm/traps.h>
  #include <linux/uaccess.h>
  #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c

index 8bd415c8729f974a8e1e376d5a2b3e08c84e6053..b3b442def42383794289c6c07ea9a44ee5c4a873 100644 (file)
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -13,6 +13,7 @@
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/sched.h>
+#include <linux/sched/signal.h>
  #include <linux/notifier.h>
  #include <linux/delay.h>
  #include <linux/rtc/ds1685.h>
diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h

index 3810a6f740fdf67ffa6622fcf4706c1c841aa09a..dfe730a5ede04a0f3f22a877dd8cd4f42baa2652 100644 (file)
--- a/arch/mn10300/include/asm/page.h
+++ b/arch/mn10300/include/asm/page.h
@@ -57,6 +57,7 @@ typedef struct page *pgtable_t;
  #define __pgd(x)       ((pgd_t) { (x) })
  #define __pgprot(x)    ((pgprot_t) { (x) })
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  #endif /* !__ASSEMBLY__ */
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h

index 298393c3cb426ffa7889589a637f342cae5b90e0..db4f7d179220782ab05e46ab46b02ffa09d4a998 100644 (file)
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -22,6 +22,7 @@
  #include <asm/tlbflush.h>
  
  #include <asm/pgtable-bits.h>
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  #define FIRST_USER_ADDRESS     0UL
diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c

index 367c5426157ba14dfe8799664c3f11dd6eb9c8a2..3901b80d442021e17e96e29f33cea2b1c28ec345 100644 (file)
--- a/arch/nios2/kernel/prom.c
+++ b/arch/nios2/kernel/prom.c
@@ -48,6 +48,13 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
         return alloc_bootmem_align(size, align);
  }
  
+int __init early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size,
+                                            bool nomap)
+{
+       reserve_bootmem(base, size, BOOTMEM_DEFAULT);
+       return 0;
+}
+
  void __init early_init_devtree(void *params)
  {
         __be32 *dtb = (u32 *)__dtb_start;
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c

index 6e57ffa5db2769babe8c285f1e88e16fe13ed998..6044d9be28b4493323d362162e7e5ec6c56e1f33 100644 (file)
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -201,6 +201,9 @@ void __init setup_arch(char **cmdline_p)
         }
  #endif /* CONFIG_BLK_DEV_INITRD */
  
+       early_init_fdt_reserve_self();
+       early_init_fdt_scan_reserved_mem();
+
         unflatten_and_copy_device_tree();
  
         setup_cpuinfo();
diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h

index 5fcb9ac72693850f50060a4822445a09d81b8a80..f0a5d8b844d6b85b16eb6c170f8af86f73ad8440 100644 (file)
--- a/arch/openrisc/include/asm/cmpxchg.h
+++ b/arch/openrisc/include/asm/cmpxchg.h
@@ -77,7 +77,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
         return val;
  }
  
-#define xchg(ptr, with) \
-       ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
+#define xchg(ptr, with)                                                \
+       ({                                                              \
+               (__typeof__(*(ptr))) __xchg((unsigned long)(with),      \
+                                           (ptr),                      \
+                                           sizeof(*(ptr)));            \
+       })
  
  #endif /* __ASM_OPENRISC_CMPXCHG_H */
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h

index 3567aa7be55504d7b838b2e13b3d62d7929096c1..ff97374ca0693d526582b8c34e7f9f421f43ed48 100644 (file)
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -25,6 +25,7 @@
  #ifndef __ASM_OPENRISC_PGTABLE_H
  #define __ASM_OPENRISC_PGTABLE_H
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  #ifndef __ASSEMBLY__
diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h

index 140faa16685a2325f3a1b6cbf9cbb9c8e68fc913..1311e6b139916692bb5f81fbfd188a48b844d977 100644 (file)
--- a/arch/openrisc/include/asm/uaccess.h
+++ b/arch/openrisc/include/asm/uaccess.h
@@ -211,7 +211,7 @@ do {                                                                        \
         case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break;         \
         case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break;         \
         case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break;         \
-       case 8: __get_user_asm2(x, ptr, retval);                        \
+       case 8: __get_user_asm2(x, ptr, retval); break;                 \
         default: (x) = __get_user_bad();                                \
         }                                                               \
  } while (0)
diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c

index 5c4695d13542fc003054995b728ac468e18bd94c..ee3e604959e15c514bc91eb65118d8d04ea20b59 100644 (file)
--- a/arch/openrisc/kernel/or32_ksyms.c
+++ b/arch/openrisc/kernel/or32_ksyms.c
@@ -30,6 +30,7 @@
  #include <asm/hardirq.h>
  #include <asm/delay.h>
  #include <asm/pgalloc.h>
+#include <asm/pgtable.h>
  
  #define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name)
  
@@ -42,6 +43,9 @@ DECLARE_EXPORT(__muldi3);
  DECLARE_EXPORT(__ashrdi3);
  DECLARE_EXPORT(__ashldi3);
  DECLARE_EXPORT(__lshrdi3);
+DECLARE_EXPORT(__ucmpdi2);
  
+EXPORT_SYMBOL(empty_zero_page);
  EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(__clear_user);
  EXPORT_SYMBOL(memset);
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c

index 828a29110459e8cb9f1e85b1b5033f30ef0348dd..f8da545854f979c33a7b3116d26d822caa46c494 100644 (file)
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -90,6 +90,7 @@ void arch_cpu_idle(void)
  }
  
  void (*pm_power_off) (void) = machine_power_off;
+EXPORT_SYMBOL(pm_power_off);
  
  /*
   * When a process does an "exec", machine state like FPU and debug
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h

index 19c9c3c5f267eac813edf6c5fc6f358301d2a639..c7e15cc5c6683b423d028b1557fc0dc9b7dd5a16 100644 (file)
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -43,28 +43,9 @@ static inline void flush_kernel_dcache_page(struct page *page)
  
  #define flush_kernel_dcache_range(start,size) \
         flush_kernel_dcache_range_asm((start), (start)+(size));
-/* vmap range flushes and invalidates.  Architecturally, we don't need
- * the invalidate, because the CPU should refuse to speculate once an
- * area has been flushed, so invalidate is left empty */
-static inline void flush_kernel_vmap_range(void *vaddr, int size)
-{
-       unsigned long start = (unsigned long)vaddr;
-
-       flush_kernel_dcache_range_asm(start, start + size);
-}
-static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
-{
-       unsigned long start = (unsigned long)vaddr;
-       void *cursor = vaddr;
  
-       for ( ; cursor < vaddr + size; cursor += PAGE_SIZE) {
-               struct page *page = vmalloc_to_page(cursor);
-
-               if (test_and_clear_bit(PG_dcache_dirty, &page->flags))
-                       flush_kernel_dcache_page(page);
-       }
-       flush_kernel_dcache_range_asm(start, start + size);
-}
+void flush_kernel_vmap_range(void *vaddr, int size);
+void invalidate_kernel_vmap_range(void *vaddr, int size);
  
  #define flush_cache_vmap(start, end)           flush_cache_all()
  #define flush_cache_vunmap(start, end)         flush_cache_all()
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h

index fb4382c28259b3ff2f873014fce7e42f1373dac8..cbd4f4af8108bc8fa9ec36504589eac7107e6b1c 100644 (file)
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -32,16 +32,17 @@
   * that put_user is the same as __put_user, etc.
   */
  
-#define access_ok(type, uaddr, size) (1)
+#define access_ok(type, uaddr, size)   \
+       ( (uaddr) == (uaddr) )
  
  #define put_user __put_user
  #define get_user __get_user
  
  #if !defined(CONFIG_64BIT)
-#define LDD_USER(ptr)          __get_user_asm64(ptr)
+#define LDD_USER(val, ptr)     __get_user_asm64(val, ptr)
  #define STD_USER(x, ptr)       __put_user_asm64(x, ptr)
  #else
-#define LDD_USER(ptr)          __get_user_asm("ldd", ptr)
+#define LDD_USER(val, ptr)     __get_user_asm(val, "ldd", ptr)
  #define STD_USER(x, ptr)       __put_user_asm("std", x, ptr)
  #endif
  
@@ -63,6 +64,15 @@ struct exception_table_entry {
         ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
         ".previous\n"
  
+/*
+ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
+ * (with lowest bit set) for which the fault handler in fixup_exception() will
+ * load -EFAULT into %r8 for a read or write fault, and zeroes the target
+ * register in case of a read fault in get_user().
+ */
+#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
+       ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
+
  /*
   * The page fault handler stores, in a per-cpu area, the following information
   * if a fixup routine is available.
@@ -87,92 +97,116 @@ struct exception_data {
                 " mtsp %0,%%sr2\n\t"            \
                 : : "r"(get_fs()) : )
  
-#define __get_user(x, ptr)                               \
-({                                                       \
-       register long __gu_err __asm__ ("r8") = 0;       \
-       register long __gu_val __asm__ ("r9") = 0;       \
-                                                        \
-       load_sr2();                                      \
-       switch (sizeof(*(ptr))) {                        \
-           case 1: __get_user_asm("ldb", ptr); break;   \
-           case 2: __get_user_asm("ldh", ptr); break;   \
-           case 4: __get_user_asm("ldw", ptr); break;   \
-           case 8: LDD_USER(ptr);  break;               \
-           default: BUILD_BUG(); break;                 \
-       }                                                \
-                                                        \
-       (x) = (__force __typeof__(*(ptr))) __gu_val;     \
-       __gu_err;                                        \
+#define __get_user_internal(val, ptr)                  \
+({                                                     \
+       register long __gu_err __asm__ ("r8") = 0;      \
+                                                       \
+       switch (sizeof(*(ptr))) {                       \
+       case 1: __get_user_asm(val, "ldb", ptr); break; \
+       case 2: __get_user_asm(val, "ldh", ptr); break; \
+       case 4: __get_user_asm(val, "ldw", ptr); break; \
+       case 8: LDD_USER(val, ptr); break;              \
+       default: BUILD_BUG();                           \
+       }                                               \
+                                                       \
+       __gu_err;                                       \
  })
  
-#define __get_user_asm(ldx, ptr)                        \
-       __asm__("\n1:\t" ldx "\t0(%%sr2,%2),%0\n\t"     \
-               ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\
+#define __get_user(val, ptr)                           \
+({                                                     \
+       load_sr2();                                     \
+       __get_user_internal(val, ptr);                  \
+})
+
+#define __get_user_asm(val, ldx, ptr)                  \
+{                                                      \
+       register long __gu_val;                         \
+                                                       \
+       __asm__("1: " ldx " 0(%%sr2,%2),%0\n"           \
+               "9:\n"                                  \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
                 : "=r"(__gu_val), "=r"(__gu_err)        \
-               : "r"(ptr), "1"(__gu_err)               \
-               : "r1");
+               : "r"(ptr), "1"(__gu_err));             \
+                                                       \
+       (val) = (__force __typeof__(*(ptr))) __gu_val;  \
+}
  
  #if !defined(CONFIG_64BIT)
  
-#define __get_user_asm64(ptr)                          \
-       __asm__("\n1:\tldw 0(%%sr2,%2),%0"              \
-               "\n2:\tldw 4(%%sr2,%2),%R0\n\t"         \
-               ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_2)\
-               ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_get_user_skip_1)\
-               : "=r"(__gu_val), "=r"(__gu_err)        \
-               : "r"(ptr), "1"(__gu_err)               \
-               : "r1");
+#define __get_user_asm64(val, ptr)                     \
+{                                                      \
+       union {                                         \
+               unsigned long long      l;              \
+               __typeof__(*(ptr))      t;              \
+       } __gu_tmp;                                     \
+                                                       \
+       __asm__("   copy %%r0,%R0\n"                    \
+               "1: ldw 0(%%sr2,%2),%0\n"               \
+               "2: ldw 4(%%sr2,%2),%R0\n"              \
+               "9:\n"                                  \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
+               : "=&r"(__gu_tmp.l), "=r"(__gu_err)     \
+               : "r"(ptr), "1"(__gu_err));             \
+                                                       \
+       (val) = __gu_tmp.t;                             \
+}
  
  #endif /* !defined(CONFIG_64BIT) */
  
  
-#define __put_user(x, ptr)                                      \
+#define __put_user_internal(x, ptr)                            \
  ({                                                             \
         register long __pu_err __asm__ ("r8") = 0;              \
          __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x);      \
                                                                 \
-       load_sr2();                                             \
         switch (sizeof(*(ptr))) {                               \
-           case 1: __put_user_asm("stb", __x, ptr); break;     \
-           case 2: __put_user_asm("sth", __x, ptr); break;     \
-           case 4: __put_user_asm("stw", __x, ptr); break;     \
-           case 8: STD_USER(__x, ptr); break;                  \
-           default: BUILD_BUG(); break;                        \
-       }                                                       \
+       case 1: __put_user_asm("stb", __x, ptr); break;         \
+       case 2: __put_user_asm("sth", __x, ptr); break;         \
+       case 4: __put_user_asm("stw", __x, ptr); break;         \
+       case 8: STD_USER(__x, ptr); break;                      \
+       default: BUILD_BUG();                                   \
+       }                                                       \
                                                                 \
         __pu_err;                                               \
  })
  
+#define __put_user(x, ptr)                                     \
+({                                                             \
+       load_sr2();                                             \
+       __put_user_internal(x, ptr);                            \
+})
+
+
  /*
   * The "__put_user/kernel_asm()" macros tell gcc they read from memory
   * instead of writing. This is because they do not write to any memory
   * gcc knows about, so there are no aliasing issues. These macros must
- * also be aware that "fixup_put_user_skip_[12]" are executed in the
- * context of the fault, and any registers used there must be listed
- * as clobbers. In this case only "r1" is used by the current routines.
- * r8/r9 are already listed as err/val.
+ * also be aware that fixups are executed in the context of the fault,
+ * and any registers used there must be listed as clobbers.
+ * r8 is already listed as err.
   */
  
  #define __put_user_asm(stx, x, ptr)                         \
         __asm__ __volatile__ (                              \
-               "\n1:\t" stx "\t%2,0(%%sr2,%1)\n\t"         \
-               ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_1)\
+               "1: " stx " %2,0(%%sr2,%1)\n"               \
+               "9:\n"                                      \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)     \
                 : "=r"(__pu_err)                            \
-               : "r"(ptr), "r"(x), "0"(__pu_err)           \
-               : "r1")
+               : "r"(ptr), "r"(x), "0"(__pu_err))
  
  
  #if !defined(CONFIG_64BIT)
  
  #define __put_user_asm64(__val, ptr) do {                  \
         __asm__ __volatile__ (                              \
-               "\n1:\tstw %2,0(%%sr2,%1)"                  \
-               "\n2:\tstw %R2,4(%%sr2,%1)\n\t"             \
-               ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_2)\
-               ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_put_user_skip_1)\
+               "1: stw %2,0(%%sr2,%1)\n"                   \
+               "2: stw %R2,4(%%sr2,%1)\n"                  \
+               "9:\n"                                      \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)     \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)     \
                 : "=r"(__pu_err)                            \
-               : "r"(ptr), "r"(__val), "0"(__pu_err) \
-               : "r1");                                    \
+               : "r"(ptr), "r"(__val), "0"(__pu_err));     \
  } while (0)
  
  #endif /* !defined(CONFIG_64BIT) */
diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h

index 6b0741e7a7ed3ee4060d619a8999b50dab12dac3..667c99421003e4dd07c6d204bef7db08fa905933 100644 (file)
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -362,8 +362,9 @@
  #define __NR_copy_file_range   (__NR_Linux + 346)
  #define __NR_preadv2           (__NR_Linux + 347)
  #define __NR_pwritev2          (__NR_Linux + 348)
+#define __NR_statx             (__NR_Linux + 349)
  
-#define __NR_Linux_syscalls    (__NR_pwritev2 + 1)
+#define __NR_Linux_syscalls    (__NR_statx + 1)
  
  
  #define __IGNORE_select                /* newselect */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c

index 0dc72d5de861539e5c16ff2ecd49f205e37775e6..c32a0909521665b5f08c22ef37fa8d8f9c654012 100644 (file)
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -616,3 +616,25 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
                 __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
         }
  }
+
+void flush_kernel_vmap_range(void *vaddr, int size)
+{
+       unsigned long start = (unsigned long)vaddr;
+
+       if ((unsigned long)size > parisc_cache_flush_threshold)
+               flush_data_cache();
+       else
+               flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(flush_kernel_vmap_range);
+
+void invalidate_kernel_vmap_range(void *vaddr, int size)
+{
+       unsigned long start = (unsigned long)vaddr;
+
+       if ((unsigned long)size > parisc_cache_flush_threshold)
+               flush_data_cache();
+       else
+               flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(invalidate_kernel_vmap_range);
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c

index a0ecdb4abcc878b3805d7a2d0f845272b1fc372d..c66c943d93224f342cb71c97bd6a690bf8fb225b 100644 (file)
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -620,6 +620,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
                          */
                         *loc = fsel(val, addend); 
                         break;
+               case R_PARISC_SECREL32:
+                       /* 32-bit section relative address. */
+                       *loc = fsel(val, addend);
+                       break;
                 case R_PARISC_DPREL21L:
                         /* left 21 bit of relative address */
                         val = lrsel(val - dp, addend);
@@ -807,6 +811,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
                          */
                         *loc = fsel(val, addend); 
                         break;
+               case R_PARISC_SECREL32:
+                       /* 32-bit section relative address. */
+                       *loc = fsel(val, addend);
+                       break;
                 case R_PARISC_FPTR64:
                         /* 64-bit function address */
                         if(in_local(me, (void *)(val + addend))) {
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c

index 7484b3d11e0dbf83e58de4b64b08079b4a346bd7..c6d6272a934f03823b655cf07b38e7bbc01ca12e 100644 (file)
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -47,16 +47,6 @@ EXPORT_SYMBOL(__cmpxchg_u64);
  EXPORT_SYMBOL(lclear_user);
  EXPORT_SYMBOL(lstrnlen_user);
  
-/* Global fixups - defined as int to avoid creation of function pointers */
-extern int fixup_get_user_skip_1;
-extern int fixup_get_user_skip_2;
-extern int fixup_put_user_skip_1;
-extern int fixup_put_user_skip_2;
-EXPORT_SYMBOL(fixup_get_user_skip_1);
-EXPORT_SYMBOL(fixup_get_user_skip_2);
-EXPORT_SYMBOL(fixup_put_user_skip_1);
-EXPORT_SYMBOL(fixup_put_user_skip_2);
-
  #ifndef CONFIG_64BIT
  /* Needed so insmod can set dp value */
  extern int $global$;
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c

index e282a5131d77e10f62d274d4be426b076f655017..6017a5af2e6e2c8feb45de54adfb36865b65d3ee 100644 (file)
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -39,7 +39,7 @@
   *  the PDC INTRIGUE calls.  This is done to eliminate bugs introduced
   *  in various PDC revisions.  The code is much more maintainable
   *  and reliable this way vs having to debug on every version of PDC
- *  on every box. 
+ *  on every box.
   */
  
  #include <linux/capability.h>
@@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr);
  static int perf_release(struct inode *inode, struct file *file);
  static int perf_open(struct inode *inode, struct file *file);
  static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 
-       loff_t *ppos);
+static ssize_t perf_write(struct file *file, const char __user *buf,
+       size_t count, loff_t *ppos);
  static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
  static void perf_start_counters(void);
  static int perf_stop_counters(uint32_t *raddr);
@@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void);
  /*
   * configure:
   *
- * Configure the cpu with a given data image.  First turn off the counters, 
+ * Configure the cpu with a given data image.  First turn off the counters,
   * then download the image, then turn the counters back on.
   */
  static int perf_config(uint32_t *image_ptr)
@@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr)
         error = perf_stop_counters(raddr);
         if (error != 0) {
                 printk("perf_config: perf_stop_counters = %ld\n", error);
-               return -EINVAL; 
+               return -EINVAL;
         }
  
  printk("Preparing to write image\n");
@@ -242,7 +242,7 @@ printk("Preparing to write image\n");
         error = perf_write_image((uint64_t *)image_ptr);
         if (error != 0) {
                 printk("perf_config: DOWNLOAD = %ld\n", error);
-               return -EINVAL; 
+               return -EINVAL;
         }
  
  printk("Preparing to start counters\n");
@@ -254,7 +254,7 @@ printk("Preparing to start counters\n");
  }
  
  /*
- * Open the device and initialize all of its memory.  The device is only 
+ * Open the device and initialize all of its memory.  The device is only
   * opened once, but can be "queried" by multiple processes that know its
   * file descriptor.
   */
@@ -298,19 +298,19 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t
   * called on the processor that the download should happen
   * on.
   */
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 
-       loff_t *ppos)
+static ssize_t perf_write(struct file *file, const char __user *buf,
+       size_t count, loff_t *ppos)
  {
         size_t image_size;
         uint32_t image_type;
         uint32_t interface_type;
         uint32_t test;
  
-       if (perf_processor_interface == ONYX_INTF) 
+       if (perf_processor_interface == ONYX_INTF)
                 image_size = PCXU_IMAGE_SIZE;
-       else if (perf_processor_interface == CUDA_INTF) 
+       else if (perf_processor_interface == CUDA_INTF)
                 image_size = PCXW_IMAGE_SIZE;
-       else 
+       else
                 return -EFAULT;
  
         if (!capable(CAP_SYS_ADMIN))
@@ -330,22 +330,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
  
         /* First check the machine type is correct for
            the requested image */
-        if (((perf_processor_interface == CUDA_INTF) &&
-                      (interface_type != CUDA_INTF)) ||
-           ((perf_processor_interface == ONYX_INTF) &&
-                      (interface_type != ONYX_INTF))) 
+       if (((perf_processor_interface == CUDA_INTF) &&
+                       (interface_type != CUDA_INTF)) ||
+               ((perf_processor_interface == ONYX_INTF) &&
+                       (interface_type != ONYX_INTF)))
                 return -EINVAL;
  
         /* Next check to make sure the requested image
            is valid */
-       if (((interface_type == CUDA_INTF) && 
+       if (((interface_type == CUDA_INTF) &&
                        (test >= MAX_CUDA_IMAGES)) ||
-           ((interface_type == ONYX_INTF) && 
-                      (test >= MAX_ONYX_IMAGES))) 
+           ((interface_type == ONYX_INTF) &&
+                      (test >= MAX_ONYX_IMAGES)))
                 return -EINVAL;
  
         /* Copy the image into the processor */
-       if (interface_type == CUDA_INTF) 
+       if (interface_type == CUDA_INTF)
                 return perf_config(cuda_images[test]);
         else
                 return perf_config(onyx_images[test]);
@@ -359,7 +359,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
  static void perf_patch_images(void)
  {
  #if 0 /* FIXME!! */
-/* 
+/*
   * NOTE:  this routine is VERY specific to the current TLB image.
   * If the image is changed, this routine might also need to be changed.
   */
@@ -367,9 +367,9 @@ static void perf_patch_images(void)
         extern void $i_dtlb_miss_2_0();
         extern void PA2_0_iva();
  
-       /* 
+       /*
          * We can only use the lower 32-bits, the upper 32-bits should be 0
-        * anyway given this is in the kernel 
+        * anyway given this is in the kernel
          */
         uint32_t itlb_addr  = (uint32_t)&($i_itlb_miss_2_0);
         uint32_t dtlb_addr  = (uint32_t)&($i_dtlb_miss_2_0);
@@ -377,21 +377,21 @@ static void perf_patch_images(void)
  
         if (perf_processor_interface == ONYX_INTF) {
                 /* clear last 2 bytes */
-               onyx_images[TLBMISS][15] &= 0xffffff00;  
+               onyx_images[TLBMISS][15] &= 0xffffff00;
                 /* set 2 bytes */
                 onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
                 onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
                 onyx_images[TLBMISS][17] = itlb_addr;
  
                 /* clear last 2 bytes */
-               onyx_images[TLBHANDMISS][15] &= 0xffffff00;  
+               onyx_images[TLBHANDMISS][15] &= 0xffffff00;
                 /* set 2 bytes */
                 onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
                 onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
                 onyx_images[TLBHANDMISS][17] = itlb_addr;
  
                 /* clear last 2 bytes */
-               onyx_images[BIG_CPI][15] &= 0xffffff00;  
+               onyx_images[BIG_CPI][15] &= 0xffffff00;
                 /* set 2 bytes */
                 onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
                 onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
@@ -404,24 +404,24 @@ static void perf_patch_images(void)
  
         } else if (perf_processor_interface == CUDA_INTF) {
                 /* Cuda interface */
-               cuda_images[TLBMISS][16] =  
+               cuda_images[TLBMISS][16] =
                         (cuda_images[TLBMISS][16]&0xffff0000) |
                         ((dtlb_addr >> 8)&0x0000ffff);
-               cuda_images[TLBMISS][17] = 
+               cuda_images[TLBMISS][17] =
                         ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
                 cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
  
-               cuda_images[TLBHANDMISS][16] = 
+               cuda_images[TLBHANDMISS][16] =
                         (cuda_images[TLBHANDMISS][16]&0xffff0000) |
                         ((dtlb_addr >> 8)&0x0000ffff);
-               cuda_images[TLBHANDMISS][17] = 
+               cuda_images[TLBHANDMISS][17] =
                         ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
                 cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
  
-               cuda_images[BIG_CPI][16] = 
+               cuda_images[BIG_CPI][16] =
                         (cuda_images[BIG_CPI][16]&0xffff0000) |
                         ((dtlb_addr >> 8)&0x0000ffff);
-               cuda_images[BIG_CPI][17] = 
+               cuda_images[BIG_CPI][17] =
                         ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
                 cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
         } else {
@@ -433,7 +433,7 @@ static void perf_patch_images(void)
  
  /*
   * ioctl routine
- * All routines effect the processor that they are executed on.  Thus you 
+ * All routines effect the processor that they are executed on.  Thus you
   * must be running on the processor that you wish to change.
   */
  
@@ -459,7 +459,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                         }
  
                         /* copy out the Counters */
-                       if (copy_to_user((void __user *)arg, raddr, 
+                       if (copy_to_user((void __user *)arg, raddr,
                                         sizeof (raddr)) != 0) {
                                 error =  -EFAULT;
                                 break;
@@ -487,7 +487,7 @@ static const struct file_operations perf_fops = {
         .open = perf_open,
         .release = perf_release
  };
-       
+
  static struct miscdevice perf_dev = {
         MISC_DYNAMIC_MINOR,
         PA_PERF_DEV,
@@ -595,7 +595,7 @@ static int perf_stop_counters(uint32_t *raddr)
                 /* OR sticky2 (bit 1496) to counter2 bit 32 */
                 tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
                 raddr[2] = (uint32_t)tmp64;
-               
+
                 /* Counter3 is bits 1497 to 1528 */
                 tmp64 =  (userbuf[23] >> 7) & 0x00000000ffffffff;
                 /* OR sticky3 (bit 1529) to counter3 bit 32 */
@@ -617,7 +617,7 @@ static int perf_stop_counters(uint32_t *raddr)
                 userbuf[22] = 0;
                 userbuf[23] = 0;
  
-               /* 
+               /*
                  * Write back the zeroed bytes + the image given
                  * the read was destructive.
                  */
@@ -625,13 +625,13 @@ static int perf_stop_counters(uint32_t *raddr)
         } else {
  
                 /*
-                * Read RDR-15 which contains the counters and sticky bits 
+                * Read RDR-15 which contains the counters and sticky bits
                  */
                 if (!perf_rdr_read_ubuf(15, userbuf)) {
                         return -13;
                 }
  
-               /* 
+               /*
                  * Clear out the counters
                  */
                 perf_rdr_clear(15);
@@ -644,7 +644,7 @@ static int perf_stop_counters(uint32_t *raddr)
                 raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
                 raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
         }
- 
+
         return 0;
  }
  
@@ -682,7 +682,7 @@ static int perf_rdr_read_ubuf(uint32_t      rdr_num, uint64_t *buffer)
         i = tentry->num_words;
         while (i--) {
                 buffer[i] = 0;
-       }       
+       }
  
         /* Check for bits an even number of 64 */
         if ((xbits = width & 0x03f) != 0) {
@@ -808,18 +808,22 @@ static int perf_write_image(uint64_t *memaddr)
         }
  
         runway = ioremap_nocache(cpu_device->hpa.start, 4096);
+       if (!runway) {
+               pr_err("perf_write_image: ioremap failed!\n");
+               return -ENOMEM;
+       }
  
         /* Merge intrigue bits into Runway STATUS 0 */
         tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
-       __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), 
+       __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
                      runway + RUNWAY_STATUS);
-       
+
         /* Write RUNWAY DEBUG registers */
         for (i = 0; i < 8; i++) {
                 __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
         }
  
-       return 0; 
+       return 0;
  }
  
  /*
@@ -843,7 +847,7 @@ printk("perf_rdr_write\n");
                         perf_rdr_shift_out_U(rdr_num, buffer[i]);
                 } else {
                         perf_rdr_shift_out_W(rdr_num, buffer[i]);
-               }       
+               }
         }
  printk("perf_rdr_write done\n");
  }
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c

index 06f7ca7fe70b616b4d68353ae10dd5d409bbbcab..4516a5b53f38ef651c038e4231effa00fd6db19d 100644 (file)
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -142,6 +142,10 @@ void machine_power_off(void)
  
         printk(KERN_EMERG "System shut down completed.\n"
                "Please power this system off now.");
+
+       /* prevent soft lockup/stalled CPU messages for endless loop. */
+       rcu_sysrq_start();
+       for (;;);
  }
  
  void (*pm_power_off)(void) = machine_power_off;
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S

index 3cfef1de8061af183820e98ca97467d674a8c463..44aeaa9c039fc421421a5b1b7524495e0d225eba 100644 (file)
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -444,6 +444,7 @@
         ENTRY_SAME(copy_file_range)
         ENTRY_COMP(preadv2)
         ENTRY_COMP(pwritev2)
+       ENTRY_SAME(statx)
  
  
  .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile

index 8fa92b8d839abb98efb59bb4ac7689a40aeaf602..f2dac4d73b1b309cb2fbc28b744f948be8a35d11 100644 (file)
--- a/arch/parisc/lib/Makefile
+++ b/arch/parisc/lib/Makefile
@@ -2,7 +2,7 @@
  # Makefile for parisc-specific library files
  #
  
-lib-y  := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \
+lib-y  := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
            ucmpdi2.o delay.o
  
  obj-y  := iomap.o
diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S

deleted file mode 100644 (file)

index a5b72f2..0000000
--- a/arch/parisc/lib/fixup.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Linux/PA-RISC Project (http://www.parisc-linux.org/)
- *
- *  Copyright (C) 2004  Randolph Chung <tausq@debian.org>
- *
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2, or (at your option)
- *    any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- * 
- * Fixup routines for kernel exception handling.
- */
-#include <asm/asm-offsets.h>
-#include <asm/assembly.h>
-#include <asm/errno.h>
-#include <linux/linkage.h>
-
-#ifdef CONFIG_SMP
-       .macro  get_fault_ip t1 t2
-       loadgp
-       addil LT%__per_cpu_offset,%r27
-       LDREG RT%__per_cpu_offset(%r1),\t1
-       /* t2 = smp_processor_id() */
-       mfctl 30,\t2
-       ldw TI_CPU(\t2),\t2
-#ifdef CONFIG_64BIT
-       extrd,u \t2,63,32,\t2
-#endif
-       /* t2 = &__per_cpu_offset[smp_processor_id()]; */
-       LDREGX \t2(\t1),\t2 
-       addil LT%exception_data,%r27
-       LDREG RT%exception_data(%r1),\t1
-       /* t1 = this_cpu_ptr(&exception_data) */
-       add,l \t1,\t2,\t1
-       /* %r27 = t1->fault_gp - restore gp */
-       LDREG EXCDATA_GP(\t1), %r27
-       /* t1 = t1->fault_ip */
-       LDREG EXCDATA_IP(\t1), \t1
-       .endm
-#else
-       .macro  get_fault_ip t1 t2
-       loadgp
-       /* t1 = this_cpu_ptr(&exception_data) */
-       addil LT%exception_data,%r27
-       LDREG RT%exception_data(%r1),\t2
-       /* %r27 = t2->fault_gp - restore gp */
-       LDREG EXCDATA_GP(\t2), %r27
-       /* t1 = t2->fault_ip */
-       LDREG EXCDATA_IP(\t2), \t1
-       .endm
-#endif
-
-       .level LEVEL
-
-       .text
-       .section .fixup, "ax"
-
-       /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */
-ENTRY_CFI(fixup_get_user_skip_1)
-       get_fault_ip %r1,%r8
-       ldo 4(%r1), %r1
-       ldi -EFAULT, %r8
-       bv %r0(%r1)
-       copy %r0, %r9
-ENDPROC_CFI(fixup_get_user_skip_1)
-
-ENTRY_CFI(fixup_get_user_skip_2)
-       get_fault_ip %r1,%r8
-       ldo 8(%r1), %r1
-       ldi -EFAULT, %r8
-       bv %r0(%r1)
-       copy %r0, %r9
-ENDPROC_CFI(fixup_get_user_skip_2)
-
-       /* put_user() fixups, store -EFAULT in r8 */
-ENTRY_CFI(fixup_put_user_skip_1)
-       get_fault_ip %r1,%r8
-       ldo 4(%r1), %r1
-       bv %r0(%r1)
-       ldi -EFAULT, %r8
-ENDPROC_CFI(fixup_put_user_skip_1)
-
-ENTRY_CFI(fixup_put_user_skip_2)
-       get_fault_ip %r1,%r8
-       ldo 8(%r1), %r1
-       bv %r0(%r1)
-       ldi -EFAULT, %r8
-ENDPROC_CFI(fixup_put_user_skip_2)
-
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S

index 56845de6b5dfc9ba21aec0b4840ffec6fb41fac5..85c28bb80fb7433dfcfac2fb10f6cc121448119f 100644 (file)
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -5,6 +5,8 @@
   *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
   *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
   *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
+ *    Copyright (C) 2017 Helge Deller <deller@gmx.de>
+ *    Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
   *
   *
   *    This program is free software; you can redistribute it and/or modify
@@ -132,4 +134,321 @@ ENDPROC_CFI(lstrnlen_user)
  
         .procend
  
+
+
+/*
+ * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
+ *
+ * Inputs:
+ * - sr1 already contains space of source region
+ * - sr2 already contains space of destination region
+ *
+ * Returns:
+ * - number of bytes that could not be copied.
+ *   On success, this will be zero.
+ *
+ * This code is based on a C-implementation of a copy routine written by
+ * Randolph Chung, which in turn was derived from the glibc.
+ *
+ * Several strategies are tried to try to get the best performance for various
+ * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
+ * at a time using general registers.  Unaligned copies are handled either by
+ * aligning the destination and then using shift-and-write method, or in a few
+ * cases by falling back to a byte-at-a-time copy.
+ *
+ * Testing with various alignments and buffer sizes shows that this code is
+ * often >10x faster than a simple byte-at-a-time copy, even for strangely
+ * aligned operands. It is interesting to note that the glibc version of memcpy
+ * (written in C) is actually quite fast already. This routine is able to beat
+ * it by 30-40% for aligned copies because of the loop unrolling, but in some
+ * cases the glibc version is still slightly faster. This lends more
+ * credibility that gcc can generate very good code as long as we are careful.
+ *
+ * Possible optimizations:
+ * - add cache prefetching
+ * - try not to use the post-increment address modifiers; they may create
+ *   additional interlocks. Assumption is that those were only efficient on old
+ *   machines (pre PA8000 processors)
+ */
+
+       dst = arg0
+       src = arg1
+       len = arg2
+       end = arg3
+       t1  = r19
+       t2  = r20
+       t3  = r21
+       t4  = r22
+       srcspc = sr1
+       dstspc = sr2
+
+       t0 = r1
+       a1 = t1
+       a2 = t2
+       a3 = t3
+       a0 = t4
+
+       save_src = ret0
+       save_dst = ret1
+       save_len = r31
+
+ENTRY_CFI(pa_memcpy)
+       .proc
+       .callinfo NO_CALLS
+       .entry
+
+       /* Last destination address */
+       add     dst,len,end
+
+       /* short copy with less than 16 bytes? */
+       cmpib,COND(>>=),n 15,len,.Lbyte_loop
+
+       /* same alignment? */
+       xor     src,dst,t0
+       extru   t0,31,2,t1
+       cmpib,<>,n  0,t1,.Lunaligned_copy
+
+#ifdef CONFIG_64BIT
+       /* only do 64-bit copies if we can get aligned. */
+       extru   t0,31,3,t1
+       cmpib,<>,n  0,t1,.Lalign_loop32
+
+       /* loop until we are 64-bit aligned */
+.Lalign_loop64:
+       extru   dst,31,3,t1
+       cmpib,=,n       0,t1,.Lcopy_loop_16_start
+20:    ldb,ma  1(srcspc,src),t1
+21:    stb,ma  t1,1(dstspc,dst)
+       b       .Lalign_loop64
+       ldo     -1(len),len
+
+       ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_loop_16_start:
+       ldi     31,t0
+.Lcopy_loop_16:
+       cmpb,COND(>>=),n t0,len,.Lword_loop
+
+10:    ldd     0(srcspc,src),t1
+11:    ldd     8(srcspc,src),t2
+       ldo     16(src),src
+12:    std,ma  t1,8(dstspc,dst)
+13:    std,ma  t2,8(dstspc,dst)
+14:    ldd     0(srcspc,src),t1
+15:    ldd     8(srcspc,src),t2
+       ldo     16(src),src
+16:    std,ma  t1,8(dstspc,dst)
+17:    std,ma  t2,8(dstspc,dst)
+
+       ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
+       ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
+       ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
+
+       b       .Lcopy_loop_16
+       ldo     -32(len),len
+
+.Lword_loop:
+       cmpib,COND(>>=),n 3,len,.Lbyte_loop
+20:    ldw,ma  4(srcspc,src),t1
+21:    stw,ma  t1,4(dstspc,dst)
+       b       .Lword_loop
+       ldo     -4(len),len
+
+       ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+#endif /* CONFIG_64BIT */
+
+       /* loop until we are 32-bit aligned */
+.Lalign_loop32:
+       extru   dst,31,2,t1
+       cmpib,=,n       0,t1,.Lcopy_loop_8
+20:    ldb,ma  1(srcspc,src),t1
+21:    stb,ma  t1,1(dstspc,dst)
+       b       .Lalign_loop32
+       ldo     -1(len),len
+
+       ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+
+.Lcopy_loop_8:
+       cmpib,COND(>>=),n 15,len,.Lbyte_loop
+
+10:    ldw     0(srcspc,src),t1
+11:    ldw     4(srcspc,src),t2
+12:    stw,ma  t1,4(dstspc,dst)
+13:    stw,ma  t2,4(dstspc,dst)
+14:    ldw     8(srcspc,src),t1
+15:    ldw     12(srcspc,src),t2
+       ldo     16(src),src
+16:    stw,ma  t1,4(dstspc,dst)
+17:    stw,ma  t2,4(dstspc,dst)
+
+       ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
+       ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
+       ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
+
+       b       .Lcopy_loop_8
+       ldo     -16(len),len
+
+.Lbyte_loop:
+       cmpclr,COND(<>) len,%r0,%r0
+       b,n     .Lcopy_done
+20:    ldb     0(srcspc,src),t1
+       ldo     1(src),src
+21:    stb,ma  t1,1(dstspc,dst)
+       b       .Lbyte_loop
+       ldo     -1(len),len
+
+       ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_done:
+       bv      %r0(%r2)
+       sub     end,dst,ret0
+
+
+       /* src and dst are not aligned the same way. */
+       /* need to go the hard way */
+.Lunaligned_copy:
+       /* align until dst is 32bit-word-aligned */
+       extru   dst,31,2,t1
+       cmpib,=,n       0,t1,.Lcopy_dstaligned
+20:    ldb     0(srcspc,src),t1
+       ldo     1(src),src
+21:    stb,ma  t1,1(dstspc,dst)
+       b       .Lunaligned_copy
+       ldo     -1(len),len
+
+       ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_dstaligned:
+
+       /* store src, dst and len in safe place */
+       copy    src,save_src
+       copy    dst,save_dst
+       copy    len,save_len
+
+       /* len now needs give number of words to copy */
+       SHRREG  len,2,len
+
+       /*
+        * Copy from a not-aligned src to an aligned dst using shifts.
+        * Handles 4 words per loop.
+        */
+
+       depw,z src,28,2,t0
+       subi 32,t0,t0
+       mtsar t0
+       extru len,31,2,t0
+       cmpib,= 2,t0,.Lcase2
+       /* Make src aligned by rounding it down.  */
+       depi 0,31,2,src
+
+       cmpiclr,<> 3,t0,%r0
+       b,n .Lcase3
+       cmpiclr,<> 1,t0,%r0
+       b,n .Lcase1
+.Lcase0:
+       cmpb,COND(=) %r0,len,.Lcda_finish
+       nop
+
+1:     ldw,ma 4(srcspc,src), a3
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:     ldw,ma 4(srcspc,src), a0
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       b,n .Ldo3
+.Lcase1:
+1:     ldw,ma 4(srcspc,src), a2
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:     ldw,ma 4(srcspc,src), a3
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       ldo -1(len),len
+       cmpb,COND(=),n %r0,len,.Ldo0
+.Ldo4:
+1:     ldw,ma 4(srcspc,src), a0
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       shrpw a2, a3, %sar, t0
+1:     stw,ma t0, 4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo3:
+1:     ldw,ma 4(srcspc,src), a1
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       shrpw a3, a0, %sar, t0
+1:     stw,ma t0, 4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo2:
+1:     ldw,ma 4(srcspc,src), a2
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       shrpw a0, a1, %sar, t0
+1:     stw,ma t0, 4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo1:
+1:     ldw,ma 4(srcspc,src), a3
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       shrpw a1, a2, %sar, t0
+1:     stw,ma t0, 4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+       ldo -4(len),len
+       cmpb,COND(<>) %r0,len,.Ldo4
+       nop
+.Ldo0:
+       shrpw a2, a3, %sar, t0
+1:     stw,ma t0, 4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+
+.Lcda_rdfault:
+.Lcda_finish:
+       /* calculate new src, dst and len and jump to byte-copy loop */
+       sub     dst,save_dst,t0
+       add     save_src,t0,src
+       b       .Lbyte_loop
+       sub     save_len,t0,len
+
+.Lcase3:
+1:     ldw,ma 4(srcspc,src), a0
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:     ldw,ma 4(srcspc,src), a1
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       b .Ldo2
+       ldo 1(len),len
+.Lcase2:
+1:     ldw,ma 4(srcspc,src), a1
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:     ldw,ma 4(srcspc,src), a2
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       b .Ldo1
+       ldo 2(len),len
+
+
+       /* fault exception fixup handlers: */
+#ifdef CONFIG_64BIT
+.Lcopy16_fault:
+       b       .Lcopy_done
+10:    std,ma  t1,8(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+#endif
+
+.Lcopy8_fault:
+       b       .Lcopy_done
+10:    stw,ma  t1,4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+
+       .exit
+ENDPROC_CFI(pa_memcpy)
+       .procend
+
         .end
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c

index f82ff10ed974117a59f8713eba4281350feb44aa..b3d47ec1d80a241db8781518026d5a95e26ddb24 100644 (file)
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -2,7 +2,7 @@
   *    Optimized memory copy routines.
   *
   *    Copyright (C) 2004 Randolph Chung <tausq@debian.org>
- *    Copyright (C) 2013 Helge Deller <deller@gmx.de>
+ *    Copyright (C) 2013-2017 Helge Deller <deller@gmx.de>
   *
   *    This program is free software; you can redistribute it and/or modify
   *    it under the terms of the GNU General Public License as published by
@@ -21,474 +21,21 @@
   *    Portions derived from the GNU C Library
   *    Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
   *
- * Several strategies are tried to try to get the best performance for various
- * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using 
- * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
- * general registers.  Unaligned copies are handled either by aligning the 
- * destination and then using shift-and-write method, or in a few cases by 
- * falling back to a byte-at-a-time copy.
- *
- * I chose to implement this in C because it is easier to maintain and debug,
- * and in my experiments it appears that the C code generated by gcc (3.3/3.4
- * at the time of writing) is fairly optimal. Unfortunately some of the 
- * semantics of the copy routine (exception handling) is difficult to express
- * in C, so we have to play some tricks to get it to work.
- *
- * All the loads and stores are done via explicit asm() code in order to use
- * the right space registers. 
- * 
- * Testing with various alignments and buffer sizes shows that this code is 
- * often >10x faster than a simple byte-at-a-time copy, even for strangely
- * aligned operands. It is interesting to note that the glibc version
- * of memcpy (written in C) is actually quite fast already. This routine is 
- * able to beat it by 30-40% for aligned copies because of the loop unrolling, 
- * but in some cases the glibc version is still slightly faster. This lends 
- * more credibility that gcc can generate very good code as long as we are 
- * careful.
- *
- * TODO:
- * - cache prefetching needs more experimentation to get optimal settings
- * - try not to use the post-increment address modifiers; they create additional
- *   interlocks
- * - replace byte-copy loops with stybs sequences
   */
  
-#ifdef __KERNEL__
  #include <linux/module.h>
  #include <linux/compiler.h>
  #include <linux/uaccess.h>
-#define s_space "%%sr1"
-#define d_space "%%sr2"
-#else
-#include "memcpy.h"
-#define s_space "%%sr0"
-#define d_space "%%sr0"
-#define pa_memcpy new2_copy
-#endif
  
  DECLARE_PER_CPU(struct exception_data, exception_data);
  
-#define preserve_branch(label) do {                                    \
-       volatile int dummy = 0;                                         \
-       /* The following branch is never taken, it's just here to  */   \
-       /* prevent gcc from optimizing away our exception code. */      \
-       if (unlikely(dummy != dummy))                                   \
-               goto label;                                             \
-} while (0)
-
  #define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
  #define get_kernel_space() (0)
  
-#define MERGE(w0, sh_1, w1, sh_2)  ({                                  \
-       unsigned int _r;                                                \
-       asm volatile (                                                  \
-       "mtsar %3\n"                                                    \
-       "shrpw %1, %2, %%sar, %0\n"                                     \
-       : "=r"(_r)                                                      \
-       : "r"(w0), "r"(w1), "r"(sh_2)                                   \
-       );                                                              \
-       _r;                                                             \
-})
-#define THRESHOLD      16
-
-#ifdef DEBUG_MEMCPY
-#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
-#else
-#define DPRINTF(fmt, args...)
-#endif
-
-#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e)    \
-       __asm__ __volatile__ (                          \
-       "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \
-       ASM_EXCEPTIONTABLE_ENTRY(1b,_e)                 \
-       : _tt(_t), "+r"(_a)                             \
-       :                                               \
-       : "r8")
-
-#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e)   \
-       __asm__ __volatile__ (                          \
-       "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \
-       ASM_EXCEPTIONTABLE_ENTRY(1b,_e)                 \
-       : "+r"(_a)                                      \
-       : _tt(_t)                                       \
-       : "r8")
-
-#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
-#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
-#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
-#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
-#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
-#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
-
-#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e)        \
-       __asm__ __volatile__ (                          \
-       "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t"     \
-       ASM_EXCEPTIONTABLE_ENTRY(1b,_e)                 \
-       : _tt(_t)                                       \
-       : "r"(_a)                                       \
-       : "r8")
-
-#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e)       \
-       __asm__ __volatile__ (                          \
-       "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t"     \
-       ASM_EXCEPTIONTABLE_ENTRY(1b,_e)                 \
-       :                                               \
-       : _tt(_t), "r"(_a)                              \
-       : "r8")
-
-#define ldw(_s,_o,_a,_t,_e)    def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
-#define stw(_s,_t,_o,_a,_e)    def_store_insn(stw,"r",_s,_t,_o,_a,_e)
-
-#ifdef  CONFIG_PREFETCH
-static inline void prefetch_src(const void *addr)
-{
-       __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
-}
-
-static inline void prefetch_dst(const void *addr)
-{
-       __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
-}
-#else
-#define prefetch_src(addr) do { } while(0)
-#define prefetch_dst(addr) do { } while(0)
-#endif
-
-#define PA_MEMCPY_OK           0
-#define PA_MEMCPY_LOAD_ERROR   1
-#define PA_MEMCPY_STORE_ERROR  2
-
-/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
- * per loop.  This code is derived from glibc. 
- */
-static noinline unsigned long copy_dstaligned(unsigned long dst,
-                                       unsigned long src, unsigned long len)
-{
-       /* gcc complains that a2 and a3 may be uninitialized, but actually
-        * they cannot be.  Initialize a2/a3 to shut gcc up.
-        */
-       register unsigned int a0, a1, a2 = 0, a3 = 0;
-       int sh_1, sh_2;
-
-       /* prefetch_src((const void *)src); */
-
-       /* Calculate how to shift a word read at the memory operation
-          aligned srcp to make it aligned for copy.  */
-       sh_1 = 8 * (src % sizeof(unsigned int));
-       sh_2 = 8 * sizeof(unsigned int) - sh_1;
-
-       /* Make src aligned by rounding it down.  */
-       src &= -sizeof(unsigned int);
-
-       switch (len % 4)
-       {
-               case 2:
-                       /* a1 = ((unsigned int *) src)[0];
-                          a2 = ((unsigned int *) src)[1]; */
-                       ldw(s_space, 0, src, a1, cda_ldw_exc);
-                       ldw(s_space, 4, src, a2, cda_ldw_exc);
-                       src -= 1 * sizeof(unsigned int);
-                       dst -= 3 * sizeof(unsigned int);
-                       len += 2;
-                       goto do1;
-               case 3:
-                       /* a0 = ((unsigned int *) src)[0];
-                          a1 = ((unsigned int *) src)[1]; */
-                       ldw(s_space, 0, src, a0, cda_ldw_exc);
-                       ldw(s_space, 4, src, a1, cda_ldw_exc);
-                       src -= 0 * sizeof(unsigned int);
-                       dst -= 2 * sizeof(unsigned int);
-                       len += 1;
-                       goto do2;
-               case 0:
-                       if (len == 0)
-                               return PA_MEMCPY_OK;
-                       /* a3 = ((unsigned int *) src)[0];
-                          a0 = ((unsigned int *) src)[1]; */
-                       ldw(s_space, 0, src, a3, cda_ldw_exc);
-                       ldw(s_space, 4, src, a0, cda_ldw_exc);
-                       src -=-1 * sizeof(unsigned int);
-                       dst -= 1 * sizeof(unsigned int);
-                       len += 0;
-                       goto do3;
-               case 1:
-                       /* a2 = ((unsigned int *) src)[0];
-                          a3 = ((unsigned int *) src)[1]; */
-                       ldw(s_space, 0, src, a2, cda_ldw_exc);
-                       ldw(s_space, 4, src, a3, cda_ldw_exc);
-                       src -=-2 * sizeof(unsigned int);
-                       dst -= 0 * sizeof(unsigned int);
-                       len -= 1;
-                       if (len == 0)
-                               goto do0;
-                       goto do4;                       /* No-op.  */
-       }
-
-       do
-       {
-               /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
-do4:
-               /* a0 = ((unsigned int *) src)[0]; */
-               ldw(s_space, 0, src, a0, cda_ldw_exc);
-               /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
-               stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
-do3:
-               /* a1 = ((unsigned int *) src)[1]; */
-               ldw(s_space, 4, src, a1, cda_ldw_exc);
-               /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
-               stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
-do2:
-               /* a2 = ((unsigned int *) src)[2]; */
-               ldw(s_space, 8, src, a2, cda_ldw_exc);
-               /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
-               stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
-do1:
-               /* a3 = ((unsigned int *) src)[3]; */
-               ldw(s_space, 12, src, a3, cda_ldw_exc);
-               /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
-               stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
-
-               src += 4 * sizeof(unsigned int);
-               dst += 4 * sizeof(unsigned int);
-               len -= 4;
-       }
-       while (len != 0);
-
-do0:
-       /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
-       stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
-
-       preserve_branch(handle_load_error);
-       preserve_branch(handle_store_error);
-
-       return PA_MEMCPY_OK;
-
-handle_load_error:
-       __asm__ __volatile__ ("cda_ldw_exc:\n");
-       return PA_MEMCPY_LOAD_ERROR;
-
-handle_store_error:
-       __asm__ __volatile__ ("cda_stw_exc:\n");
-       return PA_MEMCPY_STORE_ERROR;
-}
-
-
-/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
- * In case of an access fault the faulty address can be read from the per_cpu
- * exception data struct. */
-static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
-                                       unsigned long len)
-{
-       register unsigned long src, dst, t1, t2, t3;
-       register unsigned char *pcs, *pcd;
-       register unsigned int *pws, *pwd;
-       register double *pds, *pdd;
-       unsigned long ret;
-
-       src = (unsigned long)srcp;
-       dst = (unsigned long)dstp;
-       pcs = (unsigned char *)srcp;
-       pcd = (unsigned char *)dstp;
-
-       /* prefetch_src((const void *)srcp); */
-
-       if (len < THRESHOLD)
-               goto byte_copy;
-
-       /* Check alignment */
-       t1 = (src ^ dst);
-       if (unlikely(t1 & (sizeof(double)-1)))
-               goto unaligned_copy;
-
-       /* src and dst have same alignment. */
-
-       /* Copy bytes till we are double-aligned. */
-       t2 = src & (sizeof(double) - 1);
-       if (unlikely(t2 != 0)) {
-               t2 = sizeof(double) - t2;
-               while (t2 && len) {
-                       /* *pcd++ = *pcs++; */
-                       ldbma(s_space, pcs, t3, pmc_load_exc);
-                       len--;
-                       stbma(d_space, t3, pcd, pmc_store_exc);
-                       t2--;
-               }
-       }
-
-       pds = (double *)pcs;
-       pdd = (double *)pcd;
-
-#if 0
-       /* Copy 8 doubles at a time */
-       while (len >= 8*sizeof(double)) {
-               register double r1, r2, r3, r4, r5, r6, r7, r8;
-               /* prefetch_src((char *)pds + L1_CACHE_BYTES); */
-               flddma(s_space, pds, r1, pmc_load_exc);
-               flddma(s_space, pds, r2, pmc_load_exc);
-               flddma(s_space, pds, r3, pmc_load_exc);
-               flddma(s_space, pds, r4, pmc_load_exc);
-               fstdma(d_space, r1, pdd, pmc_store_exc);
-               fstdma(d_space, r2, pdd, pmc_store_exc);
-               fstdma(d_space, r3, pdd, pmc_store_exc);
-               fstdma(d_space, r4, pdd, pmc_store_exc);
-
-#if 0
-               if (L1_CACHE_BYTES <= 32)
-                       prefetch_src((char *)pds + L1_CACHE_BYTES);
-#endif
-               flddma(s_space, pds, r5, pmc_load_exc);
-               flddma(s_space, pds, r6, pmc_load_exc);
-               flddma(s_space, pds, r7, pmc_load_exc);
-               flddma(s_space, pds, r8, pmc_load_exc);
-               fstdma(d_space, r5, pdd, pmc_store_exc);
-               fstdma(d_space, r6, pdd, pmc_store_exc);
-               fstdma(d_space, r7, pdd, pmc_store_exc);
-               fstdma(d_space, r8, pdd, pmc_store_exc);
-               len -= 8*sizeof(double);
-       }
-#endif
-
-       pws = (unsigned int *)pds;
-       pwd = (unsigned int *)pdd;
-
-word_copy:
-       while (len >= 8*sizeof(unsigned int)) {
-               register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
-               /* prefetch_src((char *)pws + L1_CACHE_BYTES); */
-               ldwma(s_space, pws, r1, pmc_load_exc);
-               ldwma(s_space, pws, r2, pmc_load_exc);
-               ldwma(s_space, pws, r3, pmc_load_exc);
-               ldwma(s_space, pws, r4, pmc_load_exc);
-               stwma(d_space, r1, pwd, pmc_store_exc);
-               stwma(d_space, r2, pwd, pmc_store_exc);
-               stwma(d_space, r3, pwd, pmc_store_exc);
-               stwma(d_space, r4, pwd, pmc_store_exc);
-
-               ldwma(s_space, pws, r5, pmc_load_exc);
-               ldwma(s_space, pws, r6, pmc_load_exc);
-               ldwma(s_space, pws, r7, pmc_load_exc);
-               ldwma(s_space, pws, r8, pmc_load_exc);
-               stwma(d_space, r5, pwd, pmc_store_exc);
-               stwma(d_space, r6, pwd, pmc_store_exc);
-               stwma(d_space, r7, pwd, pmc_store_exc);
-               stwma(d_space, r8, pwd, pmc_store_exc);
-               len -= 8*sizeof(unsigned int);
-       }
-
-       while (len >= 4*sizeof(unsigned int)) {
-               register unsigned int r1,r2,r3,r4;
-               ldwma(s_space, pws, r1, pmc_load_exc);
-               ldwma(s_space, pws, r2, pmc_load_exc);
-               ldwma(s_space, pws, r3, pmc_load_exc);
-               ldwma(s_space, pws, r4, pmc_load_exc);
-               stwma(d_space, r1, pwd, pmc_store_exc);
-               stwma(d_space, r2, pwd, pmc_store_exc);
-               stwma(d_space, r3, pwd, pmc_store_exc);
-               stwma(d_space, r4, pwd, pmc_store_exc);
-               len -= 4*sizeof(unsigned int);
-       }
-
-       pcs = (unsigned char *)pws;
-       pcd = (unsigned char *)pwd;
-
-byte_copy:
-       while (len) {
-               /* *pcd++ = *pcs++; */
-               ldbma(s_space, pcs, t3, pmc_load_exc);
-               stbma(d_space, t3, pcd, pmc_store_exc);
-               len--;
-       }
-
-       return PA_MEMCPY_OK;
-
-unaligned_copy:
-       /* possibly we are aligned on a word, but not on a double... */
-       if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) {
-               t2 = src & (sizeof(unsigned int) - 1);
-
-               if (unlikely(t2 != 0)) {
-                       t2 = sizeof(unsigned int) - t2;
-                       while (t2) {
-                               /* *pcd++ = *pcs++; */
-                               ldbma(s_space, pcs, t3, pmc_load_exc);
-                               stbma(d_space, t3, pcd, pmc_store_exc);
-                               len--;
-                               t2--;
-                       }
-               }
-
-               pws = (unsigned int *)pcs;
-               pwd = (unsigned int *)pcd;
-               goto word_copy;
-       }
-
-       /* Align the destination.  */
-       if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
-               t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
-               while (t2) {
-                       /* *pcd++ = *pcs++; */
-                       ldbma(s_space, pcs, t3, pmc_load_exc);
-                       stbma(d_space, t3, pcd, pmc_store_exc);
-                       len--;
-                       t2--;
-               }
-               dst = (unsigned long)pcd;
-               src = (unsigned long)pcs;
-       }
-
-       ret = copy_dstaligned(dst, src, len / sizeof(unsigned int));
-       if (ret)
-               return ret;
-
-       pcs += (len & -sizeof(unsigned int));
-       pcd += (len & -sizeof(unsigned int));
-       len %= sizeof(unsigned int);
-
-       preserve_branch(handle_load_error);
-       preserve_branch(handle_store_error);
-
-       goto byte_copy;
-
-handle_load_error:
-       __asm__ __volatile__ ("pmc_load_exc:\n");
-       return PA_MEMCPY_LOAD_ERROR;
-
-handle_store_error:
-       __asm__ __volatile__ ("pmc_store_exc:\n");
-       return PA_MEMCPY_STORE_ERROR;
-}
-
-
  /* Returns 0 for success, otherwise, returns number of bytes not transferred. */
-static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
-{
-       unsigned long ret, fault_addr, reference;
-       struct exception_data *d;
-
-       ret = pa_memcpy_internal(dstp, srcp, len);
-       if (likely(ret == PA_MEMCPY_OK))
-               return 0;
-
-       /* if a load or store fault occured we can get the faulty addr */
-       d = this_cpu_ptr(&exception_data);
-       fault_addr = d->fault_addr;
-
-       /* error in load or store? */
-       if (ret == PA_MEMCPY_LOAD_ERROR)
-               reference = (unsigned long) srcp;
-       else
-               reference = (unsigned long) dstp;
+extern unsigned long pa_memcpy(void *dst, const void *src,
+                               unsigned long len);
  
-       DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n",
-               ret, len, fault_addr, reference);
-
-       if (fault_addr >= reference)
-               return len - (fault_addr - reference);
-       else
-               return len;
-}
-
-#ifdef __KERNEL__
  unsigned long __copy_to_user(void __user *dst, const void *src,
                              unsigned long len)
  {
@@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t size)
  
         return __probe_kernel_read(dst, src, size);
  }
-
-#endif
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c

index deab89a8915a108a3fd98bef581605b59c094ab8..32ec22146141e56f9436bfbdfaccf0256820a552 100644 (file)
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -150,6 +150,23 @@ int fixup_exception(struct pt_regs *regs)
                 d->fault_space = regs->isr;
                 d->fault_addr = regs->ior;
  
+               /*
+                * Fix up get_user() and put_user().
+                * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
+                * bit in the relative address of the fixup routine to indicate
+                * that %r8 should be loaded with -EFAULT to report a userspace
+                * access error.
+                */
+               if (fix->fixup & 1) {
+                       regs->gr[8] = -EFAULT;
+
+                       /* zero target register for get_user() */
+                       if (parisc_acctyp(0, regs->iir) == VM_READ) {
+                               int treg = regs->iir & 0x1f;
+                               regs->gr[treg] = 0;
+                       }
+               }
+
                 regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup;
                 regs->iaoq[0] &= ~3;
                 /*
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig

index 494091762bd7f3a43afe15945943343c4cb23fe0..97a8bc8a095ce4199ad2e4e0c88c2933fc3b6987 100644 (file)
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -80,93 +80,99 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK
  config PPC
         bool
         default y
-       select BUILDTIME_EXTABLE_SORT
+       #
+       # Please keep this list sorted alphabetically.
+       #
+       select ARCH_HAS_DEVMEM_IS_ALLOWED
+       select ARCH_HAS_DMA_SET_COHERENT_MASK
+       select ARCH_HAS_ELF_RANDOMIZE
+       select ARCH_HAS_GCOV_PROFILE_ALL
+       select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE
+       select ARCH_HAS_SG_CHAIN
+       select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
+       select ARCH_HAS_UBSAN_SANITIZE_ALL
+       select ARCH_HAVE_NMI_SAFE_CMPXCHG
         select ARCH_MIGHT_HAVE_PC_PARPORT
         select ARCH_MIGHT_HAVE_PC_SERIO
+       select ARCH_SUPPORTS_ATOMIC_RMW
+       select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
+       select ARCH_USE_BUILTIN_BSWAP
+       select ARCH_USE_CMPXCHG_LOCKREF         if PPC64
+       select ARCH_WANT_IPC_PARSE_VERSION
         select BINFMT_ELF
-       select ARCH_HAS_ELF_RANDOMIZE
-       select OF
-       select OF_EARLY_FLATTREE
-       select OF_RESERVED_MEM
-       select HAVE_FTRACE_MCOUNT_RECORD
+       select BUILDTIME_EXTABLE_SORT
+       select CLONE_BACKWARDS
+       select DCACHE_WORD_ACCESS               if PPC64 && CPU_LITTLE_ENDIAN
+       select EDAC_ATOMIC_SCRUB
+       select EDAC_SUPPORT
+       select GENERIC_ATOMIC64                 if PPC32
+       select GENERIC_CLOCKEVENTS
+       select GENERIC_CLOCKEVENTS_BROADCAST    if SMP
+       select GENERIC_CMOS_UPDATE
+       select GENERIC_CPU_AUTOPROBE
+       select GENERIC_IRQ_SHOW
+       select GENERIC_IRQ_SHOW_LEVEL
+       select GENERIC_SMP_IDLE_THREAD
+       select GENERIC_STRNCPY_FROM_USER
+       select GENERIC_STRNLEN_USER
+       select GENERIC_TIME_VSYSCALL_OLD
+       select HAVE_ARCH_AUDITSYSCALL
+       select HAVE_ARCH_HARDENED_USERCOPY
+       select HAVE_ARCH_JUMP_LABEL
+       select HAVE_ARCH_KGDB
+       select HAVE_ARCH_SECCOMP_FILTER
+       select HAVE_ARCH_TRACEHOOK
+       select HAVE_CBPF_JIT                    if !PPC64
+       select HAVE_CONTEXT_TRACKING            if PPC64
+       select HAVE_DEBUG_KMEMLEAK
+       select HAVE_DEBUG_STACKOVERFLOW
+       select HAVE_DMA_API_DEBUG
         select HAVE_DYNAMIC_FTRACE
-       select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
-       select HAVE_FUNCTION_TRACER
+       select HAVE_DYNAMIC_FTRACE_WITH_REGS    if MPROFILE_KERNEL
+       select HAVE_EBPF_JIT                    if PPC64
+       select HAVE_EFFICIENT_UNALIGNED_ACCESS  if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
+       select HAVE_FTRACE_MCOUNT_RECORD
         select HAVE_FUNCTION_GRAPH_TRACER
+       select HAVE_FUNCTION_TRACER
         select HAVE_GCC_PLUGINS
-       select SYSCTL_EXCEPTION_TRACE
-       select VIRT_TO_BUS if !PPC64
+       select HAVE_GENERIC_RCU_GUP
+       select HAVE_HW_BREAKPOINT               if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
         select HAVE_IDE
         select HAVE_IOREMAP_PROT
-       select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
+       select HAVE_IRQ_EXIT_ON_IRQ_STACK
+       select HAVE_KERNEL_GZIP
         select HAVE_KPROBES
-       select HAVE_OPTPROBES if PPC64
-       select HAVE_ARCH_KGDB
         select HAVE_KRETPROBES
-       select HAVE_ARCH_TRACEHOOK
+       select HAVE_LIVEPATCH                   if HAVE_DYNAMIC_FTRACE_WITH_REGS
         select HAVE_MEMBLOCK
         select HAVE_MEMBLOCK_NODE_MAP
-       select HAVE_DMA_API_DEBUG
+       select HAVE_MOD_ARCH_SPECIFIC
+       select HAVE_NMI                         if PERF_EVENTS
         select HAVE_OPROFILE
-       select HAVE_DEBUG_KMEMLEAK
-       select ARCH_HAS_SG_CHAIN
-       select GENERIC_ATOMIC64 if PPC32
+       select HAVE_OPTPROBES                   if PPC64
         select HAVE_PERF_EVENTS
+       select HAVE_PERF_EVENTS_NMI             if PPC64
         select HAVE_PERF_REGS
         select HAVE_PERF_USER_STACK_DUMP
+       select HAVE_RCU_TABLE_FREE              if SMP
         select HAVE_REGS_AND_STACK_ACCESS_API
-       select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
-       select ARCH_WANT_IPC_PARSE_VERSION
-       select SPARSE_IRQ
+       select HAVE_SYSCALL_TRACEPOINTS
+       select HAVE_VIRT_CPU_ACCOUNTING
         select IRQ_DOMAIN
-       select GENERIC_IRQ_SHOW
-       select GENERIC_IRQ_SHOW_LEVEL
         select IRQ_FORCED_THREADING
-       select HAVE_RCU_TABLE_FREE if SMP
-       select HAVE_SYSCALL_TRACEPOINTS
-       select HAVE_CBPF_JIT if !PPC64
-       select HAVE_EBPF_JIT if PPC64
-       select HAVE_ARCH_JUMP_LABEL
-       select ARCH_HAVE_NMI_SAFE_CMPXCHG
-       select ARCH_HAS_GCOV_PROFILE_ALL
-       select GENERIC_SMP_IDLE_THREAD
-       select GENERIC_CMOS_UPDATE
-       select GENERIC_TIME_VSYSCALL_OLD
-       select GENERIC_CLOCKEVENTS
-       select GENERIC_CLOCKEVENTS_BROADCAST if SMP
-       select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
-       select GENERIC_STRNCPY_FROM_USER
-       select GENERIC_STRNLEN_USER
-       select HAVE_MOD_ARCH_SPECIFIC
         select MODULES_USE_ELF_RELA
-       select CLONE_BACKWARDS
-       select ARCH_USE_BUILTIN_BSWAP
-       select OLD_SIGSUSPEND
-       select OLD_SIGACTION if PPC32
-       select HAVE_DEBUG_STACKOVERFLOW
-       select HAVE_IRQ_EXIT_ON_IRQ_STACK
-       select ARCH_USE_CMPXCHG_LOCKREF if PPC64
-       select HAVE_ARCH_AUDITSYSCALL
-       select ARCH_SUPPORTS_ATOMIC_RMW
-       select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
         select NO_BOOTMEM
-       select HAVE_GENERIC_RCU_GUP
-       select HAVE_PERF_EVENTS_NMI if PPC64
-       select HAVE_NMI if PERF_EVENTS
-       select EDAC_SUPPORT
-       select EDAC_ATOMIC_SCRUB
-       select ARCH_HAS_DMA_SET_COHERENT_MASK
-       select ARCH_HAS_DEVMEM_IS_ALLOWED
-       select HAVE_ARCH_SECCOMP_FILTER
-       select ARCH_HAS_UBSAN_SANITIZE_ALL
-       select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
-       select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
-       select GENERIC_CPU_AUTOPROBE
-       select HAVE_VIRT_CPU_ACCOUNTING
-       select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
-       select HAVE_ARCH_HARDENED_USERCOPY
-       select HAVE_KERNEL_GZIP
-       select HAVE_CONTEXT_TRACKING if PPC64
+       select OF
+       select OF_EARLY_FLATTREE
+       select OF_RESERVED_MEM
+       select OLD_SIGACTION                    if PPC32
+       select OLD_SIGSUSPEND
+       select SPARSE_IRQ
+       select SYSCTL_EXCEPTION_TRACE
+       select VIRT_TO_BUS                      if !PPC64
+       #
+       # Please keep this list sorted alphabetically.
+       #
  
  config GENERIC_CSUM
         def_bool n
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile

index 31286fa7873c1df915814b9bc62156b140006cd2..19b0d1a819593081bc22164b9da26188c613bca8 100644 (file)
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -72,8 +72,15 @@ GNUTARGET    := powerpc
  MULTIPLEWORD   := -mmultiple
  endif
  
-cflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mbig-endian)
+ifdef CONFIG_PPC64
+cflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mabi=elfv1)
+cflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mcall-aixdesc)
+aflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mabi=elfv1)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN)     += -mabi=elfv2
+endif
+
  cflags-$(CONFIG_CPU_LITTLE_ENDIAN)     += -mlittle-endian
+cflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mbig-endian)
  ifneq ($(cc-name),clang)
    cflags-$(CONFIG_CPU_LITTLE_ENDIAN)   += -mno-strict-align
  endif
@@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
  CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
  AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2)
  else
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
  CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc)
+AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
  endif
  CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
  CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S

index 861e72109df2da0b54c98a94584b8b4ff853026b..f080abfc2f83fbd1e7d63846904a3a21ad820cee 100644 (file)
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -68,6 +68,7 @@ SECTIONS
    }
  
  #ifdef CONFIG_PPC64_BOOT_WRAPPER
+  . = ALIGN(256);
    .got :
    {
      __toc_start = .;
diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config

index 528ff0e714e6855a1e0450074ff4a7e61b57f036..c03d0fb166658720745aafc29af21bfbd22db5f1 100644 (file)
--- a/arch/powerpc/configs/85xx-hw.config
+++ b/arch/powerpc/configs/85xx-hw.config
@@ -16,9 +16,8 @@ CONFIG_DAVICOM_PHY=y
  CONFIG_DMADEVICES=y
  CONFIG_E1000E=y
  CONFIG_E1000=y
-CONFIG_EDAC_MM_EDAC=y
-CONFIG_EDAC_MPC85XX=y
  CONFIG_EDAC=y
+CONFIG_EDAC_MPC85XX=y
  CONFIG_EEPROM_AT24=y
  CONFIG_EEPROM_LEGACY=y
  CONFIG_FB_FSL_DIU=y
diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig

index c79283be5680dc2b2037185af565b08590b5ba9a..a917f7afb4f9bd7af7bcf3d17ec7ad03a7150049 100644 (file)
--- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig
+++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
@@ -155,7 +155,6 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
  CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
  CONFIG_USB_STORAGE=y
  CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
  CONFIG_EDAC_MPC85XX=y
  CONFIG_RTC_CLASS=y
  # CONFIG_RTC_INTF_PROC is not set
diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig

index dbd961de251e9195f6192eada9c952e2aba6ea0a..72900b84d3e05ae332fca49c0b2de241f578819f 100644 (file)
--- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
+++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
@@ -116,7 +116,6 @@ CONFIG_LEDS_TRIGGERS=y
  CONFIG_LEDS_TRIGGER_TIMER=y
  CONFIG_LEDS_TRIGGER_HEARTBEAT=y
  CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
  CONFIG_RTC_CLASS=y
  CONFIG_RTC_DRV_DS1307=y
  CONFIG_RTC_DRV_CMOS=y
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig

index 2d7fcbe047ac5b9699575e2bdd87b5f0c68f7480..aa564599e36885f2eadfbbf0b45599ae84d4f33e 100644 (file)
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -179,7 +179,6 @@ CONFIG_INFINIBAND_MTHCA=m
  CONFIG_INFINIBAND_IPOIB=m
  CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
  CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
  CONFIG_EDAC_CELL=y
  CONFIG_UIO=m
  CONFIG_EXT2_FS=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig

index 5553c5ce4274d3febcf0b1ad01ec7f163fe5ee1e..fe43ff47bd2f217631b98ec0851c62d7ef91fe10 100644 (file)
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -142,7 +142,6 @@ CONFIG_USB_UHCI_HCD=y
  CONFIG_USB_SL811_HCD=y
  CONFIG_USB_STORAGE=y
  CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
  CONFIG_EDAC_PASEMI=y
  CONFIG_RTC_CLASS=y
  CONFIG_RTC_DRV_DS1307=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig

index 4f1288b04303d032f21a994399dcd9c04b070930..f2e03f0320412442f0843ec78eea43a2a673a0f8 100644 (file)
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -262,7 +262,6 @@ CONFIG_INFINIBAND_IPOIB_CM=y
  CONFIG_INFINIBAND_SRP=m
  CONFIG_INFINIBAND_ISER=m
  CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
  CONFIG_EDAC_PASEMI=y
  CONFIG_RTC_CLASS=y
  CONFIG_RTC_DRV_DS1307=y
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig

index 11a3473f9e2ecf6674fc98d0e0ed424b88211340..6340e6c53c5446b5abb043ab1e185ab26ca650d9 100644 (file)
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -173,7 +173,6 @@ CONFIG_INFINIBAND_MTHCA=m
  CONFIG_INFINIBAND_IPOIB=m
  CONFIG_INFINIBAND_ISER=m
  CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
  CONFIG_RTC_CLASS=y
  CONFIG_RTC_DRV_DS1307=y
  CONFIG_FS_DAX=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig

index 1d2d69dd6409036565b4d1e9bfbb0a81f37da7e9..18d0d60dadbf126fc10fcd40eb65edb2fa749775 100644 (file)
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -988,8 +988,7 @@ CONFIG_LEDS_TRIGGER_BACKLIGHT=m
  CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
  CONFIG_ACCESSIBILITY=y
  CONFIG_A11Y_BRAILLE_CONSOLE=y
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=m
+CONFIG_EDAC=m
  CONFIG_RTC_CLASS=y
  # CONFIG_RTC_HCTOSYS is not set
  CONFIG_RTC_DRV_DS1307=m
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c

index 9fa046d56ebadd6ad25e62b5a29a853b123cd30a..f058e0c3e4d4b4d1e22b84973ad8cc054958a3c2 100644 (file)
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -33,10 +33,13 @@ static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len)
         }
  
         if (len & ~VMX_ALIGN_MASK) {
+               preempt_disable();
                 pagefault_disable();
                 enable_kernel_altivec();
                 crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+               disable_kernel_altivec();
                 pagefault_enable();
+               preempt_enable();
         }
  
         tail = len & VMX_ALIGN_MASK;
@@ -52,7 +55,7 @@ static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm)
  {
         u32 *key = crypto_tfm_ctx(tfm);
  
-       *key = 0;
+       *key = ~0;
  
         return 0;
  }
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h

index 73eb794d6163811c45729984f0d9fb06bbde31a9..bc5fdfd227886aa2cb359656faf2eef5eb6574b7 100644 (file)
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -51,6 +51,10 @@
  #define PPC_BIT(bit)           (1UL << PPC_BITLSHIFT(bit))
  #define PPC_BITMASK(bs, be)    ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
  
+/* Put a PPC bit into a "normal" bit position */
+#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit)                 \
+       ((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit))
+
  #include <asm/barrier.h>
  
  /* Macro for generating the ***_bits() functions */
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h

index 012223638815569bb424e58021f3f9a7196ff0f0..26ed228d4dc6b7dd089fc84142dbc8af6adb31f2 100644 (file)
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -1,6 +1,7 @@
  #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H
  #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  #include <asm/book3s/32/hash.h>
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h

index 1eeeb72c70158aa07775444e2fe160e4ef15223b..8f4d41936e5a90986c679876f66bfd15339de102 100644 (file)
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1,9 +1,12 @@
  #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
  #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
  
+#include <asm-generic/5level-fixup.h>
+
  #ifndef __ASSEMBLY__
  #include <linux/mmdebug.h>
  #endif
+
  /*
   * Common bits between hash and Radix page table
   */
@@ -347,23 +350,58 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
         __r;                                                    \
  })
  
+static inline int __pte_write(pte_t pte)
+{
+       return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
+}
+
+#ifdef CONFIG_NUMA_BALANCING
+#define pte_savedwrite pte_savedwrite
+static inline bool pte_savedwrite(pte_t pte)
+{
+       /*
+        * Saved write ptes are prot none ptes that doesn't have
+        * privileged bit sit. We mark prot none as one which has
+        * present and pviliged bit set and RWX cleared. To mark
+        * protnone which used to have _PAGE_WRITE set we clear
+        * the privileged bit.
+        */
+       return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
+}
+#else
+#define pte_savedwrite pte_savedwrite
+static inline bool pte_savedwrite(pte_t pte)
+{
+       return false;
+}
+#endif
+
+static inline int pte_write(pte_t pte)
+{
+       return __pte_write(pte) || pte_savedwrite(pte);
+}
+
  #define __HAVE_ARCH_PTEP_SET_WRPROTECT
  static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
                                       pte_t *ptep)
  {
-       if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0)
-               return;
-
-       pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+       if (__pte_write(*ptep))
+               pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+       else if (unlikely(pte_savedwrite(*ptep)))
+               pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
  }
  
  static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
                                            unsigned long addr, pte_t *ptep)
  {
-       if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0)
-               return;
-
-       pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+       /*
+        * We should not find protnone for hugetlb, but this complete the
+        * interface.
+        */
+       if (__pte_write(*ptep))
+               pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+       else if (unlikely(pte_savedwrite(*ptep)))
+               pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1);
  }
  
  #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
@@ -397,11 +435,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
         pte_update(mm, addr, ptep, ~0UL, 0, 0);
  }
  
-static inline int pte_write(pte_t pte)
-{
-       return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
-}
-
  static inline int pte_dirty(pte_t pte)
  {
         return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY));
@@ -465,19 +498,12 @@ static inline pte_t pte_clear_savedwrite(pte_t pte)
         VM_BUG_ON(!pte_protnone(pte));
         return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
  }
-
-#define pte_savedwrite pte_savedwrite
-static inline bool pte_savedwrite(pte_t pte)
+#else
+#define pte_clear_savedwrite pte_clear_savedwrite
+static inline pte_t pte_clear_savedwrite(pte_t pte)
  {
-       /*
-        * Saved write ptes are prot none ptes that doesn't have
-        * privileged bit sit. We mark prot none as one which has
-        * present and pviliged bit set and RWX cleared. To mark
-        * protnone which used to have _PAGE_WRITE set we clear
-        * the privileged bit.
-        */
-       VM_BUG_ON(!pte_protnone(pte));
-       return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
+       VM_WARN_ON(1);
+       return __pte(pte_val(pte) & ~_PAGE_WRITE);
  }
  #endif /* CONFIG_NUMA_BALANCING */
  
@@ -506,6 +532,8 @@ static inline unsigned long pte_pfn(pte_t pte)
  /* Generic modifiers for PTE bits */
  static inline pte_t pte_wrprotect(pte_t pte)
  {
+       if (unlikely(pte_savedwrite(pte)))
+               return pte_clear_savedwrite(pte);
         return __pte(pte_val(pte) & ~_PAGE_WRITE);
  }
  
@@ -926,6 +954,7 @@ static inline int pmd_protnone(pmd_t pmd)
  
  #define __HAVE_ARCH_PMD_WRITE
  #define pmd_write(pmd)         pte_write(pmd_pte(pmd))
+#define __pmd_write(pmd)       __pte_write(pmd_pte(pmd))
  #define pmd_savedwrite(pmd)    pte_savedwrite(pmd_pte(pmd))
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -982,11 +1011,10 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
  static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
                                       pmd_t *pmdp)
  {
-
-       if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_WRITE)) == 0)
-               return;
-
-       pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+       if (__pmd_write((*pmdp)))
+               pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+       else if (unlikely(pmd_savedwrite(*pmdp)))
+               pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
  }
  
  static inline int pmd_trans_huge(pmd_t pmd)
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h

index 4e63787dc3becfd6c9b832f50a8c2e1367bd4187..842124b199b5859f6d0f61cb6b7c09ed08854d96 100644 (file)
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
  
  #ifdef __powerpc64__
         res += (__force u64)addend;
-       return (__force __wsum)((u32)res + (res >> 32));
+       return (__force __wsum) from64to32(res);
  #else
         asm("addc %0,%0,%1;"
             "addze %0,%0;"
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h

index fd321eb423cb44fef259cf47547a05ee1cc1f6a8..155731557c9bc08673881520c13d6db825fd91b0 100644 (file)
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -70,8 +70,8 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err)
         std     r0,0(r1);                                       \
         ptesync;                                                \
         ld      r0,0(r1);                                       \
-1:     cmpd    cr0,r0,r0;                                      \
-       bne     1b;                                             \
+236:   cmpd    cr0,r0,r0;                                      \
+       bne     236b;                                           \
         IDLE_INST;                                              \
  
  #define        IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST)                   \
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h

index 93b9b84568e8175e4010b6544bb490685730408f..09bde6e34f5d524bd7b172f42b25484b1cca3f9d 100644 (file)
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -144,8 +144,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
  #define ARCH_DLINFO_CACHE_GEOMETRY                                     \
         NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size);           \
         NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i));     \
-       NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size);           \
-       NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i));     \
+       NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size);           \
+       NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d));     \
         NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size);             \
         NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2));       \
         NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size);             \
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h

index 14752eee3d0c44816a61b395970186c1e56d4a21..ed3beadd2cc515d1b8a99b4836b3f06c1a97a87c 100644 (file)
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -236,9 +236,9 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
         mtctr   reg;                                                    \
         bctr
  
-#define BRANCH_LINK_TO_FAR(reg, label)                                 \
-       __LOAD_FAR_HANDLER(reg, label);                                 \
-       mtctr   reg;                                                    \
+#define BRANCH_LINK_TO_FAR(label)                                      \
+       __LOAD_FAR_HANDLER(r12, label);                                 \
+       mtctr   r12;                                                    \
         bctrl
  
  /*
@@ -265,7 +265,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
  #define BRANCH_TO_COMMON(reg, label)                                   \
         b       label
  
-#define BRANCH_LINK_TO_FAR(reg, label)                                 \
+#define BRANCH_LINK_TO_FAR(label)                                      \
         bl      label
  
  #define BRANCH_TO_KVM(reg, label)                                      \
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h

index f97d8cb6bdf64fd8e147035d659b71016c33ecd0..ed62efe01e49ed1a2e37c6bbd6efbf41fed3e925 100644 (file)
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -66,6 +66,55 @@
  
  #define P8_DSISR_MC_SLB_ERRORS         (P7_DSISR_MC_SLB_ERRORS | \
                                          P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+
+/*
+ * Machine Check bits on power9
+ */
+#define P9_SRR1_MC_LOADSTORE(srr1)     (((srr1) >> PPC_BITLSHIFT(42)) & 1)
+
+#define P9_SRR1_MC_IFETCH(srr1)        (       \
+       PPC_BITEXTRACT(srr1, 45, 0) |   \
+       PPC_BITEXTRACT(srr1, 44, 1) |   \
+       PPC_BITEXTRACT(srr1, 43, 2) |   \
+       PPC_BITEXTRACT(srr1, 36, 3) )
+
+/* 0 is reserved */
+#define P9_SRR1_MC_IFETCH_UE                           1
+#define P9_SRR1_MC_IFETCH_SLB_PARITY                   2
+#define P9_SRR1_MC_IFETCH_SLB_MULTIHIT                 3
+#define P9_SRR1_MC_IFETCH_ERAT_MULTIHIT                        4
+#define P9_SRR1_MC_IFETCH_TLB_MULTIHIT                 5
+#define P9_SRR1_MC_IFETCH_UE_TLB_RELOAD                        6
+/* 7 is reserved */
+#define P9_SRR1_MC_IFETCH_LINK_TIMEOUT                 8
+#define P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT       9
+/* 10 ? */
+#define P9_SRR1_MC_IFETCH_RA                   11
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK         12
+#define P9_SRR1_MC_IFETCH_RA_ASYNC_STORE               13
+#define P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT     14
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN 15
+
+/* DSISR bits for machine check (On Power9) */
+#define P9_DSISR_MC_UE                                 (PPC_BIT(48))
+#define P9_DSISR_MC_UE_TABLEWALK                       (PPC_BIT(49))
+#define P9_DSISR_MC_LINK_LOAD_TIMEOUT                  (PPC_BIT(50))
+#define P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT             (PPC_BIT(51))
+#define P9_DSISR_MC_ERAT_MULTIHIT                      (PPC_BIT(52))
+#define P9_DSISR_MC_TLB_MULTIHIT_MFTLB                 (PPC_BIT(53))
+#define P9_DSISR_MC_USER_TLBIE                         (PPC_BIT(54))
+#define P9_DSISR_MC_SLB_PARITY_MFSLB                   (PPC_BIT(55))
+#define P9_DSISR_MC_SLB_MULTIHIT_MFSLB                 (PPC_BIT(56))
+#define P9_DSISR_MC_RA_LOAD                            (PPC_BIT(57))
+#define P9_DSISR_MC_RA_TABLEWALK                       (PPC_BIT(58))
+#define P9_DSISR_MC_RA_TABLEWALK_FOREIGN               (PPC_BIT(59))
+#define P9_DSISR_MC_RA_FOREIGN                         (PPC_BIT(60))
+
+/* SLB error bits */
+#define P9_DSISR_MC_SLB_ERRORS         (P9_DSISR_MC_ERAT_MULTIHIT | \
+                                        P9_DSISR_MC_SLB_PARITY_MFSLB | \
+                                        P9_DSISR_MC_SLB_MULTIHIT_MFSLB)
+
  enum MCE_Version {
         MCE_V1 = 1,
  };
@@ -93,6 +142,9 @@ enum MCE_ErrorType {
         MCE_ERROR_TYPE_SLB = 2,
         MCE_ERROR_TYPE_ERAT = 3,
         MCE_ERROR_TYPE_TLB = 4,
+       MCE_ERROR_TYPE_USER = 5,
+       MCE_ERROR_TYPE_RA = 6,
+       MCE_ERROR_TYPE_LINK = 7,
  };
  
  enum MCE_UeErrorType {
@@ -121,6 +173,32 @@ enum MCE_TlbErrorType {
         MCE_TLB_ERROR_MULTIHIT = 2,
  };
  
+enum MCE_UserErrorType {
+       MCE_USER_ERROR_INDETERMINATE = 0,
+       MCE_USER_ERROR_TLBIE = 1,
+};
+
+enum MCE_RaErrorType {
+       MCE_RA_ERROR_INDETERMINATE = 0,
+       MCE_RA_ERROR_IFETCH = 1,
+       MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
+       MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3,
+       MCE_RA_ERROR_LOAD = 4,
+       MCE_RA_ERROR_STORE = 5,
+       MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6,
+       MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7,
+       MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8,
+};
+
+enum MCE_LinkErrorType {
+       MCE_LINK_ERROR_INDETERMINATE = 0,
+       MCE_LINK_ERROR_IFETCH_TIMEOUT = 1,
+       MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2,
+       MCE_LINK_ERROR_LOAD_TIMEOUT = 3,
+       MCE_LINK_ERROR_STORE_TIMEOUT = 4,
+       MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5,
+};
+
  struct machine_check_event {
         enum MCE_Version        version:8;      /* 0x00 */
         uint8_t                 in_use;         /* 0x01 */
@@ -166,6 +244,30 @@ struct machine_check_event {
                         uint64_t        effective_address;
                         uint8_t         reserved_2[16];
                 } tlb_error;
+
+               struct {
+                       enum MCE_UserErrorType user_error_type:8;
+                       uint8_t         effective_address_provided;
+                       uint8_t         reserved_1[6];
+                       uint64_t        effective_address;
+                       uint8_t         reserved_2[16];
+               } user_error;
+
+               struct {
+                       enum MCE_RaErrorType ra_error_type:8;
+                       uint8_t         effective_address_provided;
+                       uint8_t         reserved_1[6];
+                       uint64_t        effective_address;
+                       uint8_t         reserved_2[16];
+               } ra_error;
+
+               struct {
+                       enum MCE_LinkErrorType link_error_type:8;
+                       uint8_t         effective_address_provided;
+                       uint8_t         reserved_1[6];
+                       uint64_t        effective_address;
+                       uint8_t         reserved_2[16];
+               } link_error;
         } u;
  };
  
@@ -176,8 +278,12 @@ struct mce_error_info {
                 enum MCE_SlbErrorType slb_error_type:8;
                 enum MCE_EratErrorType erat_error_type:8;
                 enum MCE_TlbErrorType tlb_error_type:8;
+               enum MCE_UserErrorType user_error_type:8;
+               enum MCE_RaErrorType ra_error_type:8;
+               enum MCE_LinkErrorType link_error_type:8;
         } u;
-       uint8_t         reserved[2];
+       enum MCE_Severity       severity:8;
+       enum MCE_Initiator      initiator:8;
  };
  
  #define MAX_MC_EVT     100
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h

index ba9921bf202e0c7f2d8579dfc6f31f25ad7cebd7..5134ade2e850162c70d288c1b293b38a7aae6a1c 100644 (file)
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -1,6 +1,7 @@
  #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H
  #define _ASM_POWERPC_NOHASH_32_PGTABLE_H
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h

index d0db98793dd83d0ddf5e8d60be2688e697e74491..9f4de0a1035efb3e6d615a86f6cab1e29362d339 100644 (file)
--- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
@@ -1,5 +1,8 @@
  #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
  #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
+
+#include <asm-generic/5level-fixup.h>
+
  /*
   * Entries per page directory level.  The PTE level must use a 64b record
   * for each page table entry.  The PMD and PGD level use a 32b record for
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h

index 55b28ef3409af5494a521b8a948966947555a84d..1facb584dd2962faf8ff334b9ca90e2840ee6d1a 100644 (file)
--- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
@@ -1,6 +1,7 @@
  #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
  #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopud.h>
  
  
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h

index 0cd8a3852763292eabe905b33960f888e875c978..e5805ad78e127ba456ba305abf09d30fdc38c5f4 100644 (file)
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -230,7 +230,7 @@ static inline int hugepd_ok(hugepd_t hpd)
         return ((hpd_val(hpd) & 0x4) != 0);
  #else
         /* We clear the top bit to indicate hugepd */
-       return ((hpd_val(hpd) & PD_HUGE) ==  0);
+       return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0);
  #endif
  }
  
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h

index d99bd442aacbe5747f605cd356de6aa3ae58e53a..e7d6d86563eeda924598b1079a491d0e5945c566 100644 (file)
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -284,6 +284,13 @@
  #define PPC_INST_BRANCH_COND           0x40800000
  #define PPC_INST_LBZCIX                        0x7c0006aa
  #define PPC_INST_STBCIX                        0x7c0007aa
+#define PPC_INST_LWZX                  0x7c00002e
+#define PPC_INST_LFSX                  0x7c00042e
+#define PPC_INST_STFSX                 0x7c00052e
+#define PPC_INST_LFDX                  0x7c0004ae
+#define PPC_INST_STFDX                 0x7c0005ae
+#define PPC_INST_LVX                   0x7c0000ce
+#define PPC_INST_STVX                  0x7c0001ce
  
  /* macros to insert fields into opcodes */
  #define ___PPC_RA(a)   (((a) & 0x1f) << 16)
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h

index 4a90634e83223c25bdf839f54ca9b3f91cb76330..35c00d7a0cf81a4773d0fad7dc2747923cd2c643 100644 (file)
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -160,12 +160,18 @@ struct of_drconf_cell {
  #define OV5_PFO_HW_ENCR                0x1120  /* PFO Encryption Accelerator */
  #define OV5_SUB_PROCESSORS     0x1501  /* 1,2,or 4 Sub-Processors supported */
  #define OV5_XIVE_EXPLOIT       0x1701  /* XIVE exploitation supported */
-#define OV5_MMU_RADIX_300      0x1880  /* ISA v3.00 radix MMU supported */
-#define OV5_MMU_HASH_300       0x1840  /* ISA v3.00 hash MMU supported */
-#define OV5_MMU_SEGM_RADIX     0x1820  /* radix mode (no segmentation) */
-#define OV5_MMU_PROC_TBL       0x1810  /* hcall selects SLB or proc table */
-#define OV5_MMU_SLB            0x1800  /* always use SLB */
-#define OV5_MMU_GTSE           0x1808  /* Guest translation shootdown */
+/* MMU Base Architecture */
+#define OV5_MMU_SUPPORT                0x18C0  /* MMU Mode Support Mask */
+#define OV5_MMU_HASH           0x1800  /* Hash MMU Only */
+#define OV5_MMU_RADIX          0x1840  /* Radix MMU Only */
+#define OV5_MMU_EITHER         0x1880  /* Hash or Radix Supported */
+#define OV5_MMU_DYNAMIC                0x18C0  /* Hash or Radix Can Switch Later */
+#define OV5_NMMU               0x1820  /* Nest MMU Available */
+/* Hash Table Extensions */
+#define OV5_HASH_SEG_TBL       0x1980  /* In Memory Segment Tables Available */
+#define OV5_HASH_GTSE          0x1940  /* Guest Translation Shoot Down Avail */
+/* Radix Table Extensions */
+#define OV5_RADIX_GTSE         0x1A40  /* Guest Translation Shoot Down Avail */
  
  /* Option Vector 6: IBM PAPR hints */
  #define OV6_LINUX              0x02    /* Linux is our OS */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h

index 4b369d83fe9ce1ea72b3f2a93590fb132d534512..1c9470881c4abe249fd943294c99e94f1916893b 100644 (file)
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -387,3 +387,4 @@ SYSCALL(copy_file_range)
  COMPAT_SYS_SPU(preadv2)
  COMPAT_SYS_SPU(pwritev2)
  SYSCALL(kexec_file_load)
+SYSCALL(statx)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h

index eb1acee91a2034c30d4277fe040cd797279f13b4..9ba11dbcaca98f88c53ee46c3bd009b22f13df01 100644 (file)
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
  #include <uapi/asm/unistd.h>
  
  
-#define NR_syscalls            383
+#define NR_syscalls            384
  
  #define __NR__exit __NR_exit
  
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h

index 2f26335a3c42a8141d29156f07105ca82761a98c..b85f1422885746d918131216fb45fd76bb99338a 100644 (file)
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -393,5 +393,6 @@
  #define __NR_preadv2           380
  #define __NR_pwritev2          381
  #define __NR_kexec_file_load   382
+#define __NR_statx             383
  
  #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c

index cbc7c42cdb7494be1fc10d67fcb49381f3b40d05..ec7a8b099dd960b43896ad2cd50b1e5f87f420f7 100644 (file)
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -807,14 +807,25 @@ int fix_alignment(struct pt_regs *regs)
         nb = aligninfo[instr].len;
         flags = aligninfo[instr].flags;
  
-       /* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */
-       if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) {
-               nb = 8;
-               flags = LD+SW;
-       } else if (IS_XFORM(instruction) &&
-                  ((instruction >> 1) & 0x3ff) == 660) {
-               nb = 8;
-               flags = ST+SW;
+       /*
+        * Handle some cases which give overlaps in the DSISR values.
+        */
+       if (IS_XFORM(instruction)) {
+               switch (get_xop(instruction)) {
+               case 532:       /* ldbrx */
+                       nb = 8;
+                       flags = LD+SW;
+                       break;
+               case 660:       /* stdbrx */
+                       nb = 8;
+                       flags = ST+SW;
+                       break;
+               case 20:        /* lwarx */
+               case 84:        /* ldarx */
+               case 116:       /* lharx */
+               case 276:       /* lqarx */
+                       return 0;       /* not emulated ever */
+               }
         }
  
         /* Byteswap little endian loads and stores */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c

index bb7a1890aeb7fb8e95cf8ca0c7aa53765e12eb45..e79b9daa873c1874485021676426ea47196a5a68 100644 (file)
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -77,6 +77,7 @@ extern void __flush_tlb_power8(unsigned int action);
  extern void __flush_tlb_power9(unsigned int action);
  extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
  extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
  #endif /* CONFIG_PPC64 */
  #if defined(CONFIG_E500)
  extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -540,6 +541,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
                 .cpu_setup              = __setup_cpu_power9,
                 .cpu_restore            = __restore_cpu_power9,
                 .flush_tlb              = __flush_tlb_power9,
+               .machine_check_early    = __machine_check_early_realmode_p9,
                 .platform               = "power9",
         },
         {       /* Power9 */
@@ -559,6 +561,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
                 .cpu_setup              = __setup_cpu_power9,
                 .cpu_restore            = __restore_cpu_power9,
                 .flush_tlb              = __flush_tlb_power9,
+               .machine_check_early    = __machine_check_early_realmode_p9,
                 .platform               = "power9",
         },
         {       /* Cell Broadband Engine */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S

index 6432d4bf08c889c128803cc5505546122118b964..767ef6d68c9ebf379dad6f065bcc0279a3021bb9 100644 (file)
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -689,7 +689,7 @@ resume_kernel:
  
         addi    r8,r1,INT_FRAME_SIZE    /* Get the kprobed function entry */
  
-       lwz     r3,GPR1(r1)
+       ld      r3,GPR1(r1)
         subi    r3,r3,INT_FRAME_SIZE    /* dst: Allocate a trampoline exception frame */
         mr      r4,r1                   /* src:  current exception frame */
         mr      r1,r3                   /* Reroute the trampoline frame to r1 */
@@ -703,8 +703,8 @@ resume_kernel:
         addi    r6,r6,8
         bdnz    2b
  
-       /* Do real store operation to complete stwu */
-       lwz     r5,GPR1(r1)
+       /* Do real store operation to complete stdu */
+       ld      r5,GPR1(r1)
         std     r8,0(r5)
  
         /* Clear _TIF_EMULATE_STACK_STORE flag */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S

index 857bf7c5b9465aff743b3e62094cc47d808c8c92..6353019966e6a3bd15afa32d7318c47810a1d8e9 100644 (file)
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -982,7 +982,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
         EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
         EXCEPTION_PROLOG_COMMON_3(0xe60)
         addi    r3,r1,STACK_FRAME_OVERHEAD
-       BRANCH_LINK_TO_FAR(r4, hmi_exception_realmode)
+       BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */
         /* Windup the stack. */
         /* Move original HSRR0 and HSRR1 into the respective regs */
         ld      r9,_MSR(r1)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S

index 5f61cc0349c063f1abfd736c13104559cab9d2b0..6fd08219248db7485a6d5c8227dee83664d29b38 100644 (file)
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -276,19 +276,21 @@ power_enter_stop:
   */
         andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
         clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
-       bne      1f
+       bne      .Lhandle_esl_ec_set
         IDLE_STATE_ENTER_SEQ(PPC_STOP)
         li      r3,0  /* Since we didn't lose state, return 0 */
         b       pnv_wakeup_noloss
+
+.Lhandle_esl_ec_set:
  /*
   * Check if the requested state is a deep idle state.
   */
-1:     LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
+       LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
         ld      r4,ADDROFF(pnv_first_deep_stop_state)(r5)
         cmpd    r3,r4
-       bge     2f
+       bge     .Lhandle_deep_stop
         IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
-2:
+.Lhandle_deep_stop:
  /*
   * Entering deep idle state.
   * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
@@ -447,9 +449,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
  _GLOBAL(pnv_wakeup_tb_loss)
         ld      r1,PACAR1(r13)
         /*
-        * Before entering any idle state, the NVGPRs are saved in the stack
-        * and they are restored before switching to the process context. Hence
-        * until they are restored, they are free to be used.
+        * Before entering any idle state, the NVGPRs are saved in the stack.
+        * If there was a state loss, or PACA_NAPSTATELOST was set, then the
+        * NVGPRs are restored. If we are here, it is likely that state is lost,
+        * but not guaranteed -- neither ISA207 nor ISA300 tests to reach
+        * here are the same as the test to restore NVGPRS:
+        * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300,
+        * and SRR1 test for restoring NVGPRs.
+        *
+        * We are about to clobber NVGPRs now, so set NAPSTATELOST to
+        * guarantee they will always be restored. This might be tightened
+        * with careful reading of specs (particularly for ISA300) but this
+        * is already a slow wakeup path and it's simpler to be safe.
+        */
+       li      r0,1
+       stb     r0,PACA_NAPSTATELOST(r13)
+
+       /*
          *
          * Save SRR1 and LR in NVGPRs as they might be clobbered in
          * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c

index c6923ff451311bfade14e7f68888f85bb69f7176..a1475e6aef3a519c70824d4dd432748097a7965e 100644 (file)
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -58,6 +58,15 @@ static void mce_set_error_info(struct machine_check_event *mce,
         case MCE_ERROR_TYPE_TLB:
                 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
                 break;
+       case MCE_ERROR_TYPE_USER:
+               mce->u.user_error.user_error_type = mce_err->u.user_error_type;
+               break;
+       case MCE_ERROR_TYPE_RA:
+               mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
+               break;
+       case MCE_ERROR_TYPE_LINK:
+               mce->u.link_error.link_error_type = mce_err->u.link_error_type;
+               break;
         case MCE_ERROR_TYPE_UNKNOWN:
         default:
                 break;
@@ -90,13 +99,14 @@ void save_mce_event(struct pt_regs *regs, long handled,
         mce->gpr3 = regs->gpr[3];
         mce->in_use = 1;
  
-       mce->initiator = MCE_INITIATOR_CPU;
         /* Mark it recovered if we have handled it and MSR(RI=1). */
         if (handled && (regs->msr & MSR_RI))
                 mce->disposition = MCE_DISPOSITION_RECOVERED;
         else
                 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
-       mce->severity = MCE_SEV_ERROR_SYNC;
+
+       mce->initiator = mce_err->initiator;
+       mce->severity = mce_err->severity;
  
         /*
          * Populate the mce error_type and type-specific error_type.
@@ -115,6 +125,15 @@ void save_mce_event(struct pt_regs *regs, long handled,
         } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
                 mce->u.erat_error.effective_address_provided = true;
                 mce->u.erat_error.effective_address = addr;
+       } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
+               mce->u.user_error.effective_address_provided = true;
+               mce->u.user_error.effective_address = addr;
+       } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
+               mce->u.ra_error.effective_address_provided = true;
+               mce->u.ra_error.effective_address = addr;
+       } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
+               mce->u.link_error.effective_address_provided = true;
+               mce->u.link_error.effective_address = addr;
         } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
                 mce->u.ue_error.effective_address_provided = true;
                 mce->u.ue_error.effective_address = addr;
@@ -239,6 +258,29 @@ void machine_check_print_event_info(struct machine_check_event *evt)
                 "Parity",
                 "Multihit",
         };
+       static const char *mc_user_types[] = {
+               "Indeterminate",
+               "tlbie(l) invalid",
+       };
+       static const char *mc_ra_types[] = {
+               "Indeterminate",
+               "Instruction fetch (bad)",
+               "Page table walk ifetch (bad)",
+               "Page table walk ifetch (foreign)",
+               "Load (bad)",
+               "Store (bad)",
+               "Page table walk Load/Store (bad)",
+               "Page table walk Load/Store (foreign)",
+               "Load/Store (foreign)",
+       };
+       static const char *mc_link_types[] = {
+               "Indeterminate",
+               "Instruction fetch (timeout)",
+               "Page table walk ifetch (timeout)",
+               "Load (timeout)",
+               "Store (timeout)",
+               "Page table walk Load/Store (timeout)",
+       };
  
         /* Print things out */
         if (evt->version != MCE_V1) {
@@ -315,6 +357,36 @@ void machine_check_print_event_info(struct machine_check_event *evt)
                         printk("%s    Effective address: %016llx\n",
                                level, evt->u.tlb_error.effective_address);
                 break;
+       case MCE_ERROR_TYPE_USER:
+               subtype = evt->u.user_error.user_error_type <
+                       ARRAY_SIZE(mc_user_types) ?
+                       mc_user_types[evt->u.user_error.user_error_type]
+                       : "Unknown";
+               printk("%s  Error type: User [%s]\n", level, subtype);
+               if (evt->u.user_error.effective_address_provided)
+                       printk("%s    Effective address: %016llx\n",
+                              level, evt->u.user_error.effective_address);
+               break;
+       case MCE_ERROR_TYPE_RA:
+               subtype = evt->u.ra_error.ra_error_type <
+                       ARRAY_SIZE(mc_ra_types) ?
+                       mc_ra_types[evt->u.ra_error.ra_error_type]
+                       : "Unknown";
+               printk("%s  Error type: Real address [%s]\n", level, subtype);
+               if (evt->u.ra_error.effective_address_provided)
+                       printk("%s    Effective address: %016llx\n",
+                              level, evt->u.ra_error.effective_address);
+               break;
+       case MCE_ERROR_TYPE_LINK:
+               subtype = evt->u.link_error.link_error_type <
+                       ARRAY_SIZE(mc_link_types) ?
+                       mc_link_types[evt->u.link_error.link_error_type]
+                       : "Unknown";
+               printk("%s  Error type: Link [%s]\n", level, subtype);
+               if (evt->u.link_error.effective_address_provided)
+                       printk("%s    Effective address: %016llx\n",
+                              level, evt->u.link_error.effective_address);
+               break;
         default:
         case MCE_ERROR_TYPE_UNKNOWN:
                 printk("%s  Error type: Unknown\n", level);
@@ -341,6 +413,18 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt)
                 if (evt->u.tlb_error.effective_address_provided)
                         return evt->u.tlb_error.effective_address;
                 break;
+       case MCE_ERROR_TYPE_USER:
+               if (evt->u.user_error.effective_address_provided)
+                       return evt->u.user_error.effective_address;
+               break;
+       case MCE_ERROR_TYPE_RA:
+               if (evt->u.ra_error.effective_address_provided)
+                       return evt->u.ra_error.effective_address;
+               break;
+       case MCE_ERROR_TYPE_LINK:
+               if (evt->u.link_error.effective_address_provided)
+                       return evt->u.link_error.effective_address;
+               break;
         default:
         case MCE_ERROR_TYPE_UNKNOWN:
                 break;
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c

index 7353991c4ecee6d8a6ecacdce4ab96815ffdfb12..763d6f58caa8ca140c8afb1260555b0ea1c1d2a0 100644 (file)
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -116,6 +116,51 @@ static void flush_and_reload_slb(void)
  }
  #endif
  
+static void flush_erat(void)
+{
+       asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+}
+
+#define MCE_FLUSH_SLB 1
+#define MCE_FLUSH_TLB 2
+#define MCE_FLUSH_ERAT 3
+
+static int mce_flush(int what)
+{
+#ifdef CONFIG_PPC_STD_MMU_64
+       if (what == MCE_FLUSH_SLB) {
+               flush_and_reload_slb();
+               return 1;
+       }
+#endif
+       if (what == MCE_FLUSH_ERAT) {
+               flush_erat();
+               return 1;
+       }
+       if (what == MCE_FLUSH_TLB) {
+               if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+                       cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat)
+{
+       if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB))
+               dsisr &= ~slb;
+       if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT))
+               dsisr &= ~erat;
+       if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB))
+               dsisr &= ~tlb;
+       /* Any other errors we don't understand? */
+       if (dsisr)
+               return 0;
+       return 1;
+}
+
  static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
  {
         long handled = 1;
@@ -281,6 +326,9 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
         long handled = 1;
         struct mce_error_info mce_error_info = { 0 };
  
+       mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+       mce_error_info.initiator = MCE_INITIATOR_CPU;
+
         srr1 = regs->msr;
         nip = regs->nip;
  
@@ -352,6 +400,9 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
         long handled = 1;
         struct mce_error_info mce_error_info = { 0 };
  
+       mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+       mce_error_info.initiator = MCE_INITIATOR_CPU;
+
         srr1 = regs->msr;
         nip = regs->nip;
  
@@ -372,3 +423,189 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
         save_mce_event(regs, handled, &mce_error_info, nip, addr);
         return handled;
  }
+
+static int mce_handle_derror_p9(struct pt_regs *regs)
+{
+       uint64_t dsisr = regs->dsisr;
+
+       return mce_handle_flush_derrors(dsisr,
+                       P9_DSISR_MC_SLB_PARITY_MFSLB |
+                       P9_DSISR_MC_SLB_MULTIHIT_MFSLB,
+
+                       P9_DSISR_MC_TLB_MULTIHIT_MFTLB,
+
+                       P9_DSISR_MC_ERAT_MULTIHIT);
+}
+
+static int mce_handle_ierror_p9(struct pt_regs *regs)
+{
+       uint64_t srr1 = regs->msr;
+
+       switch (P9_SRR1_MC_IFETCH(srr1)) {
+       case P9_SRR1_MC_IFETCH_SLB_PARITY:
+       case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+               return mce_flush(MCE_FLUSH_SLB);
+       case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+               return mce_flush(MCE_FLUSH_TLB);
+       case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+               return mce_flush(MCE_FLUSH_ERAT);
+       default:
+               return 0;
+       }
+}
+
+static void mce_get_derror_p9(struct pt_regs *regs,
+               struct mce_error_info *mce_err, uint64_t *addr)
+{
+       uint64_t dsisr = regs->dsisr;
+
+       mce_err->severity = MCE_SEV_ERROR_SYNC;
+       mce_err->initiator = MCE_INITIATOR_CPU;
+
+       if (dsisr & P9_DSISR_MC_USER_TLBIE)
+               *addr = regs->nip;
+       else
+               *addr = regs->dar;
+
+       if (dsisr & P9_DSISR_MC_UE) {
+               mce_err->error_type = MCE_ERROR_TYPE_UE;
+               mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+       } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) {
+               mce_err->error_type = MCE_ERROR_TYPE_UE;
+               mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+       } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) {
+               mce_err->error_type = MCE_ERROR_TYPE_LINK;
+               mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT;
+       } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) {
+               mce_err->error_type = MCE_ERROR_TYPE_LINK;
+               mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT;
+       } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) {
+               mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+               mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+       } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+               mce_err->error_type = MCE_ERROR_TYPE_TLB;
+               mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+       } else if (dsisr & P9_DSISR_MC_USER_TLBIE) {
+               mce_err->error_type = MCE_ERROR_TYPE_USER;
+               mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE;
+       } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) {
+               mce_err->error_type = MCE_ERROR_TYPE_SLB;
+               mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+       } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) {
+               mce_err->error_type = MCE_ERROR_TYPE_SLB;
+               mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+       } else if (dsisr & P9_DSISR_MC_RA_LOAD) {
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD;
+       } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) {
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+       } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) {
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+       } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) {
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+       }
+}
+
+static void mce_get_ierror_p9(struct pt_regs *regs,
+               struct mce_error_info *mce_err, uint64_t *addr)
+{
+       uint64_t srr1 = regs->msr;
+
+       switch (P9_SRR1_MC_IFETCH(srr1)) {
+       case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+       case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+               mce_err->severity = MCE_SEV_FATAL;
+               break;
+       default:
+               mce_err->severity = MCE_SEV_ERROR_SYNC;
+               break;
+       }
+
+       mce_err->initiator = MCE_INITIATOR_CPU;
+
+       *addr = regs->nip;
+
+       switch (P9_SRR1_MC_IFETCH(srr1)) {
+       case P9_SRR1_MC_IFETCH_UE:
+               mce_err->error_type = MCE_ERROR_TYPE_UE;
+               mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+               break;
+       case P9_SRR1_MC_IFETCH_SLB_PARITY:
+               mce_err->error_type = MCE_ERROR_TYPE_SLB;
+               mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+               break;
+       case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+               mce_err->error_type = MCE_ERROR_TYPE_SLB;
+               mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+               break;
+       case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+               mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+               mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+               break;
+       case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+               mce_err->error_type = MCE_ERROR_TYPE_TLB;
+               mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+               break;
+       case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+               mce_err->error_type = MCE_ERROR_TYPE_UE;
+               mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+               break;
+       case P9_SRR1_MC_IFETCH_LINK_TIMEOUT:
+               mce_err->error_type = MCE_ERROR_TYPE_LINK;
+               mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT;
+               break;
+       case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT:
+               mce_err->error_type = MCE_ERROR_TYPE_LINK;
+               mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT;
+               break;
+       case P9_SRR1_MC_IFETCH_RA:
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH;
+               break;
+       case P9_SRR1_MC_IFETCH_RA_TABLEWALK:
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH;
+               break;
+       case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_STORE;
+               break;
+       case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+               mce_err->error_type = MCE_ERROR_TYPE_LINK;
+               mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT;
+               break;
+       case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN:
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN;
+               break;
+       default:
+               break;
+       }
+}
+
+long __machine_check_early_realmode_p9(struct pt_regs *regs)
+{
+       uint64_t nip, addr;
+       long handled;
+       struct mce_error_info mce_error_info = { 0 };
+
+       nip = regs->nip;
+
+       if (P9_SRR1_MC_LOADSTORE(regs->msr)) {
+               handled = mce_handle_derror_p9(regs);
+               mce_get_derror_p9(regs, &mce_error_info, &addr);
+       } else {
+               handled = mce_handle_ierror_p9(regs);
+               mce_get_ierror_p9(regs, &mce_error_info, &addr);
+       }
+
+       /* Handle UE error. */
+       if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+               handled = mce_handle_ue_error(regs);
+
+       save_mce_event(regs, handled, &mce_error_info, nip, addr);
+       return handled;
+}
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S

index ae179cb1bb3c02a21b19e2793ea71697c658c7cf..c119044cad0d58e94bb0d4a7557ed27da8f7a390 100644 (file)
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -67,7 +67,7 @@ PPC64_CACHES:
   *   flush all bytes from start through stop-1 inclusive
   */
  
-_GLOBAL(flush_icache_range)
+_GLOBAL_TOC(flush_icache_range)
  BEGIN_FTR_SECTION
         PURGE_PREFETCHED_INS
         blr
@@ -120,7 +120,7 @@ EXPORT_SYMBOL(flush_icache_range)
   *
   *    flush all bytes from start to stop-1 inclusive
   */
-_GLOBAL(flush_dcache_range)
+_GLOBAL_TOC(flush_dcache_range)
  
  /*
   * Flush the data cache to memory 
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c

index a3944540fe0d56b0245f65ffcc0c0719f3c57a54..1c1b44ec7642a531e116fa0d04b6269dd38e93b5 100644 (file)
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start;
  static unsigned long __initdata prom_tce_alloc_end;
  #endif
  
+static bool __initdata prom_radix_disable;
+
+struct platform_support {
+       bool hash_mmu;
+       bool radix_mmu;
+       bool radix_gtse;
+};
+
  /* Platforms codes are now obsolete in the kernel. Now only used within this
   * file and ultimately gone too. Feel free to change them if you need, they
   * are not shared with anything outside of this file anymore
@@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void)
                 prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000);
  #endif
         }
+
+       opt = strstr(prom_cmd_line, "disable_radix");
+       if (opt) {
+               prom_debug("Radix disabled from cmdline\n");
+               prom_radix_disable = true;
+       }
  }
  
  #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
@@ -695,6 +709,8 @@ struct option_vector5 {
         u8 byte22;
         u8 intarch;
         u8 mmu;
+       u8 hash_ext;
+       u8 radix_ext;
  } __packed;
  
  struct option_vector6 {
@@ -850,8 +866,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
                 .reserved3 = 0,
                 .subprocessors = 1,
                 .intarch = 0,
-               .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) |
-                       OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE),
+               .mmu = 0,
+               .hash_ext = 0,
+               .radix_ext = 0,
         },
  
         /* option vector 6: IBM PAPR hints */
@@ -990,6 +1007,92 @@ static int __init prom_count_smt_threads(void)
  
  }
  
+static void __init prom_parse_mmu_model(u8 val,
+                                       struct platform_support *support)
+{
+       switch (val) {
+       case OV5_FEAT(OV5_MMU_DYNAMIC):
+       case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */
+               prom_debug("MMU - either supported\n");
+               support->radix_mmu = !prom_radix_disable;
+               support->hash_mmu = true;
+               break;
+       case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */
+               prom_debug("MMU - radix only\n");
+               if (prom_radix_disable) {
+                       /*
+                        * If we __have__ to do radix, we're better off ignoring
+                        * the command line rather than not booting.
+                        */
+                       prom_printf("WARNING: Ignoring cmdline option disable_radix\n");
+               }
+               support->radix_mmu = true;
+               break;
+       case OV5_FEAT(OV5_MMU_HASH):
+               prom_debug("MMU - hash only\n");
+               support->hash_mmu = true;
+               break;
+       default:
+               prom_debug("Unknown mmu support option: 0x%x\n", val);
+               break;
+       }
+}
+
+static void __init prom_parse_platform_support(u8 index, u8 val,
+                                              struct platform_support *support)
+{
+       switch (index) {
+       case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */
+               prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support);
+               break;
+       case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */
+               if (val & OV5_FEAT(OV5_RADIX_GTSE)) {
+                       prom_debug("Radix - GTSE supported\n");
+                       support->radix_gtse = true;
+               }
+               break;
+       }
+}
+
+static void __init prom_check_platform_support(void)
+{
+       struct platform_support supported = {
+               .hash_mmu = false,
+               .radix_mmu = false,
+               .radix_gtse = false
+       };
+       int prop_len = prom_getproplen(prom.chosen,
+                                      "ibm,arch-vec-5-platform-support");
+       if (prop_len > 1) {
+               int i;
+               u8 vec[prop_len];
+               prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n",
+                          prop_len);
+               prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support",
+                            &vec, sizeof(vec));
+               for (i = 0; i < prop_len; i += 2) {
+                       prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2
+                                                                 , vec[i]
+                                                                 , vec[i + 1]);
+                       prom_parse_platform_support(vec[i], vec[i + 1],
+                                                   &supported);
+               }
+       }
+
+       if (supported.radix_mmu && supported.radix_gtse) {
+               /* Radix preferred - but we require GTSE for now */
+               prom_debug("Asking for radix with GTSE\n");
+               ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
+               ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE);
+       } else if (supported.hash_mmu) {
+               /* Default to hash mmu (if we can) */
+               prom_debug("Asking for hash\n");
+               ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH);
+       } else {
+               /* We're probably on a legacy hypervisor */
+               prom_debug("Assuming legacy hash support\n");
+       }
+}
  
  static void __init prom_send_capabilities(void)
  {
@@ -997,6 +1100,9 @@ static void __init prom_send_capabilities(void)
         prom_arg_t ret;
         u32 cores;
  
+       /* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */
+       prom_check_platform_support();
+
         root = call_prom("open", 1, 1, ADDR("/"));
         if (root != 0) {
                 /* We need to tell the FW about the number of cores we support.
@@ -2993,6 +3099,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
          */
         prom_check_initrd(r3, r4);
  
+       /*
+        * Do early parsing of command line
+        */
+       early_cmdline_parse();
+
  #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
         /*
          * On pSeries, inform the firmware about our capabilities
@@ -3008,11 +3119,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
         if (of_platform != PLATFORM_POWERMAC)
                 copy_and_flush(0, kbase, 0x100, 0);
  
-       /*
-        * Do early parsing of command line
-        */
-       early_cmdline_parse();
-
         /*
          * Initialize memory management within prom_init
          */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c

index adf2084f214b2bd01d5aa3ef2a613e66b7b66a05..f997154dfc41bb9dc12d514890d6c425f197048b 100644 (file)
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -236,6 +236,15 @@ static void cpu_ready_for_interrupts(void)
                 mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
         }
  
+       /*
+        * Fixup HFSCR:TM based on CPU features. The bit is set by our
+        * early asm init because at that point we haven't updated our
+        * CPU features from firmware and device-tree. Here we have,
+        * so let's do it.
+        */
+       if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP))
+               mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
+
         /* Set IR and DR in PACA MSR */
         get_paca()->kernel_msr = MSR_KERNEL;
  }
@@ -408,7 +417,10 @@ static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
         info->line_size = lsize;
         info->block_size = bsize;
         info->log_block_size = __ilog2(bsize);
-       info->blocks_per_page = PAGE_SIZE / bsize;
+       if (bsize)
+               info->blocks_per_page = PAGE_SIZE / bsize;
+       else
+               info->blocks_per_page = 0;
  
         if (sets == 0)
                 info->assoc = 0xffff;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c

index f3158fb16de34b69acdb593ce39de7d88437e094..710e491206ed0a11ff96b4994d8dbb91f264b94a 100644 (file)
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -601,7 +601,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                                                          hva, NULL, NULL);
                         if (ptep) {
                                 pte = kvmppc_read_update_linux_pte(ptep, 1);
-                               if (pte_write(pte))
+                               if (__pte_write(pte))
                                         write_ok = 1;
                         }
                         local_irq_restore(flags);
@@ -1487,6 +1487,10 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
         /* start new resize */
  
         resize = kzalloc(sizeof(*resize), GFP_KERNEL);
+       if (!resize) {
+               ret = -ENOMEM;
+               goto out;
+       }
         resize->order = shift;
         resize->kvm = kvm;
         INIT_WORK(&resize->work, resize_hpt_prepare_work);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c

index 6fca970373ee90eee718912c48d34a3ebab3ff37..ce6f2121fffe46857bf4b250c06ad3916ac24aeb 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -256,7 +256,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
                 }
                 pte = kvmppc_read_update_linux_pte(ptep, writing);
                 if (pte_present(pte) && !pte_protnone(pte)) {
-                       if (writing && !pte_write(pte))
+                       if (writing && !__pte_write(pte))
                                 /* make the actual HPTE be read-only */
                                 ptel = hpte_make_readonly(ptel);
                         is_ci = pte_ci(pte);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile

index 0e649d72fe8d0d3a95f5b1c5216e4bf6f2a4bdd2..2b5e09020cfe379abfb3c56dd497215b07eb8931 100644 (file)
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -20,6 +20,7 @@ obj64-y       += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
  
  obj64-$(CONFIG_SMP)    += locks.o
  obj64-$(CONFIG_ALTIVEC)        += vmx-helper.o
+obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
  
  obj-y                  += checksum_$(BITS).o checksum_wrappers.o
  
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c

index 846dba2c6360002b5343dc17d5b0744fb5525e85..9c542ec70c5bc8b77810e34f9ab8c96e8cd51522 100644 (file)
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
                 goto instr_done;
  
         case LARX:
-               if (regs->msr & MSR_LE)
-                       return 0;
                 if (op.ea & (size - 1))
                         break;          /* can't handle misaligned */
                 if (!address_ok(regs, op.ea, size))
@@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
                 goto ldst_done;
  
         case STCX:
-               if (regs->msr & MSR_LE)
-                       return 0;
                 if (op.ea & (size - 1))
                         break;          /* can't handle misaligned */
                 if (!address_ok(regs, op.ea, size))
@@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
                 goto ldst_done;
  
         case LOAD:
-               if (regs->msr & MSR_LE)
-                       return 0;
                 err = read_mem(&regs->gpr[op.reg], op.ea, size, regs);
                 if (!err) {
                         if (op.type & SIGNEXT)
@@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
  
  #ifdef CONFIG_PPC_FPU
         case LOAD_FP:
-               if (regs->msr & MSR_LE)
-                       return 0;
                 if (size == 4)
                         err = do_fp_load(op.reg, do_lfs, op.ea, size, regs);
                 else
@@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
  #endif
  #ifdef CONFIG_ALTIVEC
         case LOAD_VMX:
-               if (regs->msr & MSR_LE)
-                       return 0;
                 err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs);
                 goto ldst_done;
  #endif
  #ifdef CONFIG_VSX
         case LOAD_VSX:
-               if (regs->msr & MSR_LE)
-                       return 0;
                 err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs);
                 goto ldst_done;
  #endif
@@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
                 goto instr_done;
  
         case STORE:
-               if (regs->msr & MSR_LE)
-                       return 0;
                 if ((op.type & UPDATE) && size == sizeof(long) &&
                     op.reg == 1 && op.update_reg == 1 &&
                     !(regs->msr & MSR_PR) &&
@@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
  
  #ifdef CONFIG_PPC_FPU
         case STORE_FP:
-               if (regs->msr & MSR_LE)
-                       return 0;
                 if (size == 4)
                         err = do_fp_store(op.reg, do_stfs, op.ea, size, regs);
                 else
@@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
  #endif
  #ifdef CONFIG_ALTIVEC
         case STORE_VMX:
-               if (regs->msr & MSR_LE)
-                       return 0;
                 err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs);
                 goto ldst_done;
  #endif
  #ifdef CONFIG_VSX
         case STORE_VSX:
-               if (regs->msr & MSR_LE)
-                       return 0;
                 err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs);
                 goto ldst_done;
  #endif
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c

new file mode 100644 (file)

index 0000000..2534c14
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -0,0 +1,434 @@
+/*
+ * Simple sanity test for emulate_step load/store instructions.
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "emulate_step_test: " fmt
+
+#include <linux/ptrace.h>
+#include <asm/sstep.h>
+#include <asm/ppc-opcode.h>
+
+#define IMM_L(i)               ((uintptr_t)(i) & 0xffff)
+
+/*
+ * Defined with TEST_ prefix so it does not conflict with other
+ * definitions.
+ */
+#define TEST_LD(r, base, i)    (PPC_INST_LD | ___PPC_RT(r) |           \
+                                       ___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZ(r, base, i)   (PPC_INST_LWZ | ___PPC_RT(r) |          \
+                                       ___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZX(t, a, b)     (PPC_INST_LWZX | ___PPC_RT(t) |         \
+                                       ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STD(r, base, i)   (PPC_INST_STD | ___PPC_RS(r) |          \
+                                       ___PPC_RA(base) | ((i) & 0xfffc))
+#define TEST_LDARX(t, a, b, eh)        (PPC_INST_LDARX | ___PPC_RT(t) |        \
+                                       ___PPC_RA(a) | ___PPC_RB(b) |   \
+                                       __PPC_EH(eh))
+#define TEST_STDCX(s, a, b)    (PPC_INST_STDCX | ___PPC_RS(s) |        \
+                                       ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LFSX(t, a, b)     (PPC_INST_LFSX | ___PPC_RT(t) |         \
+                                       ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STFSX(s, a, b)    (PPC_INST_STFSX | ___PPC_RS(s) |        \
+                                       ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LFDX(t, a, b)     (PPC_INST_LFDX | ___PPC_RT(t) |         \
+                                       ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STFDX(s, a, b)    (PPC_INST_STFDX | ___PPC_RS(s) |        \
+                                       ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LVX(t, a, b)      (PPC_INST_LVX | ___PPC_RT(t) |          \
+                                       ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STVX(s, a, b)     (PPC_INST_STVX | ___PPC_RS(s) |         \
+                                       ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LXVD2X(s, a, b)   (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b))
+#define TEST_STXVD2X(s, a, b)  (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b))
+
+
+static void __init init_pt_regs(struct pt_regs *regs)
+{
+       static unsigned long msr;
+       static bool msr_cached;
+
+       memset(regs, 0, sizeof(struct pt_regs));
+
+       if (likely(msr_cached)) {
+               regs->msr = msr;
+               return;
+       }
+
+       asm volatile("mfmsr %0" : "=r"(regs->msr));
+
+       regs->msr |= MSR_FP;
+       regs->msr |= MSR_VEC;
+       regs->msr |= MSR_VSX;
+
+       msr = regs->msr;
+       msr_cached = true;
+}
+
+static void __init show_result(char *ins, char *result)
+{
+       pr_info("%-14s : %s\n", ins, result);
+}
+
+static void __init test_ld(void)
+{
+       struct pt_regs regs;
+       unsigned long a = 0x23;
+       int stepped = -1;
+
+       init_pt_regs(&regs);
+       regs.gpr[3] = (unsigned long) &a;
+
+       /* ld r5, 0(r3) */
+       stepped = emulate_step(&regs, TEST_LD(5, 3, 0));
+
+       if (stepped == 1 && regs.gpr[5] == a)
+               show_result("ld", "PASS");
+       else
+               show_result("ld", "FAIL");
+}
+
+static void __init test_lwz(void)
+{
+       struct pt_regs regs;
+       unsigned int a = 0x4545;
+       int stepped = -1;
+
+       init_pt_regs(&regs);
+       regs.gpr[3] = (unsigned long) &a;
+
+       /* lwz r5, 0(r3) */
+       stepped = emulate_step(&regs, TEST_LWZ(5, 3, 0));
+
+       if (stepped == 1 && regs.gpr[5] == a)
+               show_result("lwz", "PASS");
+       else
+               show_result("lwz", "FAIL");
+}
+
+static void __init test_lwzx(void)
+{
+       struct pt_regs regs;
+       unsigned int a[3] = {0x0, 0x0, 0x1234};
+       int stepped = -1;
+
+       init_pt_regs(&regs);
+       regs.gpr[3] = (unsigned long) a;
+       regs.gpr[4] = 8;
+       regs.gpr[5] = 0x8765;
+
+       /* lwzx r5, r3, r4 */
+       stepped = emulate_step(&regs, TEST_LWZX(5, 3, 4));
+       if (stepped == 1 && regs.gpr[5] == a[2])
+               show_result("lwzx", "PASS");
+       else
+               show_result("lwzx", "FAIL");
+}
+
+static void __init test_std(void)
+{
+       struct pt_regs regs;
+       unsigned long a = 0x1234;
+       int stepped = -1;
+
+       init_pt_regs(&regs);
+       regs.gpr[3] = (unsigned long) &a;
+       regs.gpr[5] = 0x5678;
+
+       /* std r5, 0(r3) */
+       stepped = emulate_step(&regs, TEST_STD(5, 3, 0));
+       if (stepped == 1 || regs.gpr[5] == a)
+               show_result("std", "PASS");
+       else
+               show_result("std", "FAIL");
+}
+
+static void __init test_ldarx_stdcx(void)
+{
+       struct pt_regs regs;
+       unsigned long a = 0x1234;
+       int stepped = -1;
+       unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */
+
+       init_pt_regs(&regs);
+       asm volatile("mfcr %0" : "=r"(regs.ccr));
+
+
+       /*** ldarx ***/
+
+       regs.gpr[3] = (unsigned long) &a;
+       regs.gpr[4] = 0;
+       regs.gpr[5] = 0x5678;
+
+       /* ldarx r5, r3, r4, 0 */
+       stepped = emulate_step(&regs, TEST_LDARX(5, 3, 4, 0));
+
+       /*
+        * Don't touch 'a' here. Touching 'a' can do Load/store
+        * of 'a' which result in failure of subsequent stdcx.
+        * Instead, use hardcoded value for comparison.
+        */
+       if (stepped <= 0 || regs.gpr[5] != 0x1234) {
+               show_result("ldarx / stdcx.", "FAIL (ldarx)");
+               return;
+       }
+
+
+       /*** stdcx. ***/
+
+       regs.gpr[5] = 0x9ABC;
+
+       /* stdcx. r5, r3, r4 */
+       stepped = emulate_step(&regs, TEST_STDCX(5, 3, 4));
+
+       /*
+        * Two possible scenarios that indicates successful emulation
+        * of stdcx. :
+        *  1. Reservation is active and store is performed. In this
+        *     case cr0.eq bit will be set to 1.
+        *  2. Reservation is not active and store is not performed.
+        *     In this case cr0.eq bit will be set to 0.
+        */
+       if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq))
+                       || (regs.gpr[5] != a && !(regs.ccr & cr0_eq))))
+               show_result("ldarx / stdcx.", "PASS");
+       else
+               show_result("ldarx / stdcx.", "FAIL (stdcx.)");
+}
+
+#ifdef CONFIG_PPC_FPU
+static void __init test_lfsx_stfsx(void)
+{
+       struct pt_regs regs;
+       union {
+               float a;
+               int b;
+       } c;
+       int cached_b;
+       int stepped = -1;
+
+       init_pt_regs(&regs);
+
+
+       /*** lfsx ***/
+
+       c.a = 123.45;
+       cached_b = c.b;
+
+       regs.gpr[3] = (unsigned long) &c.a;
+       regs.gpr[4] = 0;
+
+       /* lfsx frt10, r3, r4 */
+       stepped = emulate_step(&regs, TEST_LFSX(10, 3, 4));
+
+       if (stepped == 1)
+               show_result("lfsx", "PASS");
+       else
+               show_result("lfsx", "FAIL");
+
+
+       /*** stfsx ***/
+
+       c.a = 678.91;
+
+       /* stfsx frs10, r3, r4 */
+       stepped = emulate_step(&regs, TEST_STFSX(10, 3, 4));
+
+       if (stepped == 1 && c.b == cached_b)
+               show_result("stfsx", "PASS");
+       else
+               show_result("stfsx", "FAIL");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+       struct pt_regs regs;
+       union {
+               double a;
+               long b;
+       } c;
+       long cached_b;
+       int stepped = -1;
+
+       init_pt_regs(&regs);
+
+
+       /*** lfdx ***/
+
+       c.a = 123456.78;
+       cached_b = c.b;
+
+       regs.gpr[3] = (unsigned long) &c.a;
+       regs.gpr[4] = 0;
+
+       /* lfdx frt10, r3, r4 */
+       stepped = emulate_step(&regs, TEST_LFDX(10, 3, 4));
+
+       if (stepped == 1)
+               show_result("lfdx", "PASS");
+       else
+               show_result("lfdx", "FAIL");
+
+
+       /*** stfdx ***/
+
+       c.a = 987654.32;
+
+       /* stfdx frs10, r3, r4 */
+       stepped = emulate_step(&regs, TEST_STFDX(10, 3, 4));
+
+       if (stepped == 1 && c.b == cached_b)
+               show_result("stfdx", "PASS");
+       else
+               show_result("stfdx", "FAIL");
+}
+#else
+static void __init test_lfsx_stfsx(void)
+{
+       show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+       show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+       show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+       show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+static void __init test_lvx_stvx(void)
+{
+       struct pt_regs regs;
+       union {
+               vector128 a;
+               u32 b[4];
+       } c;
+       u32 cached_b[4];
+       int stepped = -1;
+
+       init_pt_regs(&regs);
+
+
+       /*** lvx ***/
+
+       cached_b[0] = c.b[0] = 923745;
+       cached_b[1] = c.b[1] = 2139478;
+       cached_b[2] = c.b[2] = 9012;
+       cached_b[3] = c.b[3] = 982134;
+
+       regs.gpr[3] = (unsigned long) &c.a;
+       regs.gpr[4] = 0;
+
+       /* lvx vrt10, r3, r4 */
+       stepped = emulate_step(&regs, TEST_LVX(10, 3, 4));
+
+       if (stepped == 1)
+               show_result("lvx", "PASS");
+       else
+               show_result("lvx", "FAIL");
+
+
+       /*** stvx ***/
+
+       c.b[0] = 4987513;
+       c.b[1] = 84313948;
+       c.b[2] = 71;
+       c.b[3] = 498532;
+
+       /* stvx vrs10, r3, r4 */
+       stepped = emulate_step(&regs, TEST_STVX(10, 3, 4));
+
+       if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+           cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+               show_result("stvx", "PASS");
+       else
+               show_result("stvx", "FAIL");
+}
+#else
+static void __init test_lvx_stvx(void)
+{
+       show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)");
+       show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)");
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvd2x_stxvd2x(void)
+{
+       struct pt_regs regs;
+       union {
+               vector128 a;
+               u32 b[4];
+       } c;
+       u32 cached_b[4];
+       int stepped = -1;
+
+       init_pt_regs(&regs);
+
+
+       /*** lxvd2x ***/
+
+       cached_b[0] = c.b[0] = 18233;
+       cached_b[1] = c.b[1] = 34863571;
+       cached_b[2] = c.b[2] = 834;
+       cached_b[3] = c.b[3] = 6138911;
+
+       regs.gpr[3] = (unsigned long) &c.a;
+       regs.gpr[4] = 0;
+
+       /* lxvd2x vsr39, r3, r4 */
+       stepped = emulate_step(&regs, TEST_LXVD2X(39, 3, 4));
+
+       if (stepped == 1)
+               show_result("lxvd2x", "PASS");
+       else
+               show_result("lxvd2x", "FAIL");
+
+
+       /*** stxvd2x ***/
+
+       c.b[0] = 21379463;
+       c.b[1] = 87;
+       c.b[2] = 374234;
+       c.b[3] = 4;
+
+       /* stxvd2x vsr39, r3, r4 */
+       stepped = emulate_step(&regs, TEST_STXVD2X(39, 3, 4));
+
+       if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+           cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+               show_result("stxvd2x", "PASS");
+       else
+               show_result("stxvd2x", "FAIL");
+}
+#else
+static void __init test_lxvd2x_stxvd2x(void)
+{
+       show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)");
+       show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+static int __init test_emulate_step(void)
+{
+       test_ld();
+       test_lwz();
+       test_lwzx();
+       test_std();
+       test_ldarx_stdcx();
+       test_lfsx_stfsx();
+       test_lfdx_stfdx();
+       test_lvx_stvx();
+       test_lxvd2x_stxvd2x();
+
+       return 0;
+}
+late_initcall(test_emulate_step);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c

index cc332608e65664f40f337cd4418c70a164b6259d..65bb8f33b399bf13bcedd115f4ce86ed1c0dad5b 100644 (file)
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -638,6 +638,10 @@ static void native_flush_hash_range(unsigned long number, int local)
         unsigned long psize = batch->psize;
         int ssize = batch->ssize;
         int i;
+       unsigned int use_local;
+
+       use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) &&
+               mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use();
  
         local_irq_save(flags);
  
@@ -667,8 +671,7 @@ static void native_flush_hash_range(unsigned long number, int local)
                 } pte_iterate_hashed_end();
         }
  
-       if (mmu_has_feature(MMU_FTR_TLBIEL) &&
-           mmu_psize_defs[psize].tlbiel && local) {
+       if (use_local) {
                 asm volatile("ptesync":::"memory");
                 for (i = 0; i < number; i++) {
                         vpn = batch->vpn[i];
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c

index 6aa3b76aa0d66b0b0d53b7e30685ab26745e2603..c22f207aa6564ba93df40ecc03713de04cb20f99 100644 (file)
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -356,25 +356,48 @@ static void early_check_vec5(void)
         unsigned long root, chosen;
         int size;
         const u8 *vec5;
+       u8 mmu_supported;
  
         root = of_get_flat_dt_root();
         chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
-       if (chosen == -FDT_ERR_NOTFOUND)
+       if (chosen == -FDT_ERR_NOTFOUND) {
+               cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
                 return;
+       }
         vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
-       if (!vec5)
+       if (!vec5) {
+               cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
                 return;
-       if (size <= OV5_INDX(OV5_MMU_RADIX_300) ||
-           !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300)))
-               /* Hypervisor doesn't support radix */
+       }
+       if (size <= OV5_INDX(OV5_MMU_SUPPORT)) {
                 cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+               return;
+       }
+
+       /* Check for supported configuration */
+       mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] &
+                       OV5_FEAT(OV5_MMU_SUPPORT);
+       if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) {
+               /* Hypervisor only supports radix - check enabled && GTSE */
+               if (!early_radix_enabled()) {
+                       pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
+               }
+               if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
+                                               OV5_FEAT(OV5_RADIX_GTSE))) {
+                       pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n");
+               }
+               /* Do radix anyway - the hypervisor said we had to */
+               cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+       } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
+               /* Hypervisor only supports hash - disable radix */
+               cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+       }
  }
  
  void __init mmu_early_init_devtree(void)
  {
         /* Disable radix mode based on kernel command line. */
-       /* We don't yet have the machinery to do radix as a guest. */
-       if (disable_radix || !(mfmsr() & MSR_HV))
+       if (disable_radix)
                 cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
  
         /*
@@ -383,7 +406,7 @@ void __init mmu_early_init_devtree(void)
          * even though the ibm,architecture-vec-5 property created by
          * skiboot doesn't have the necessary bits set.
          */
-       if (early_radix_enabled() && !(mfmsr() & MSR_HV))
+       if (!(mfmsr() & MSR_HV))
                 early_check_vec5();
  
         if (early_radix_enabled())
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c

index 2a590a98e65215a8fceb413dd0c0b693a1adc9aa..c28165d8970b64de6bef88fa14a7945a6a75abc5 100644 (file)
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -186,6 +186,10 @@ static void __init radix_init_pgtable(void)
          */
         register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
         pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
+       asm volatile("ptesync" : : : "memory");
+       asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+                    "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
+       asm volatile("eieio; tlbsync; ptesync" : : : "memory");
  }
  
  static void __init radix_init_partition_table(void)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c

index 595dd718ea8718b010fed1ca5c08f5f121f674c0..2ff13249f87a61759f015d7fff93bd014dba6347 100644 (file)
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -188,6 +188,8 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
                         sdsync = POWER7P_MMCRA_SDAR_VALID;
                 else if (ppmu->flags & PPMU_ALT_SIPR)
                         sdsync = POWER6_MMCRA_SDSYNC;
+               else if (ppmu->flags & PPMU_NO_SIAR)
+                       sdsync = MMCRA_SAMPLE_ENABLE;
                 else
                         sdsync = MMCRA_SDSYNC;
  
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c

index e79fb5fb817dbe21cd19f633d89ca3bbbf51ad0c..cd951fd231c4040ba653f32cf485eebb22d1d805 100644 (file)
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -65,12 +65,41 @@ static bool is_event_valid(u64 event)
         return !(event & ~valid_mask);
  }
  
-static u64 mmcra_sdar_mode(u64 event)
+static inline bool is_event_marked(u64 event)
  {
-       if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
-               return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+       if (event & EVENT_IS_MARKED)
+               return true;
+
+       return false;
+}
  
-       return MMCRA_SDAR_MODE_TLB;
+static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
+{
+       /*
+        * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
+        * continous sampling mode.
+        *
+        * Incase of Power8:
+        * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
+        * mode and will be un-changed when setting MMCRA[63] (Marked events).
+        *
+        * Incase of Power9:
+        * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
+        *               or if group already have any marked events.
+        * Non-Marked events (for DD1):
+        *      MMCRA[SDAR_MODE] will be set to 0b01
+        * For rest
+        *      MMCRA[SDAR_MODE] will be set from event code.
+        */
+       if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+               if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
+                       *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
+               else if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
+                       *mmcra |=  p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+               else if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+                       *mmcra |= MMCRA_SDAR_MODE_TLB;
+       } else
+               *mmcra |= MMCRA_SDAR_MODE_TLB;
  }
  
  static u64 thresh_cmp_val(u64 value)
@@ -180,7 +209,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
                 value |= CNST_L1_QUAL_VAL(cache);
         }
  
-       if (event & EVENT_IS_MARKED) {
+       if (is_event_marked(event)) {
                 mask  |= CNST_SAMPLE_MASK;
                 value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
         }
@@ -276,7 +305,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
                 }
  
                 /* In continuous sampling mode, update SDAR on TLB miss */
-               mmcra |= mmcra_sdar_mode(event[i]);
+               mmcra_sdar_mode(event[i], &mmcra);
  
                 if (event[i] & EVENT_IS_L1) {
                         cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
@@ -285,7 +314,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
                         mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
                 }
  
-               if (event[i] & EVENT_IS_MARKED) {
+               if (is_event_marked(event[i])) {
                         mmcra |= MMCRA_SAMPLE_ENABLE;
  
                         val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h

index cf9bd89901595cc38b793bc916a2096d873054eb..899210f14ee432ea4b63cc7de6f7ea1a6da7a404 100644 (file)
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -246,6 +246,7 @@
  #define MMCRA_THR_CMP_SHIFT            32
  #define MMCRA_SDAR_MODE_SHIFT          42
  #define MMCRA_SDAR_MODE_TLB            (1ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_NO_UPDATES     ~(0x3ull << MMCRA_SDAR_MODE_SHIFT)
  #define MMCRA_IFM_SHIFT                        30
  
  /* MMCR1 Threshold Compare bit constant for power9 */
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S

index 6693f75e93d1629b51cd3104f24b84528a00969f..da8a0f7a035c1026b3403542c7ec2d97efa50376 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -39,8 +39,8 @@ opal_tracepoint_refcount:
  BEGIN_FTR_SECTION;                                             \
         b       1f;                                             \
  END_FTR_SECTION(0, 1);                                         \
-       ld      r12,opal_tracepoint_refcount@toc(r2);           \
-       cmpdi   r12,0;                                          \
+       ld      r11,opal_tracepoint_refcount@toc(r2);           \
+       cmpdi   r11,0;                                          \
         bne-    LABEL;                                          \
  1:
  
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c

index 86d9fde93c175f86dac6f40de0d68aff2455b0c6..e0f856bfbfe8f3c6ecfa70e737b1b2496725d563 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -395,7 +395,6 @@ static int opal_recover_mce(struct pt_regs *regs,
                                         struct machine_check_event *evt)
  {
         int recovered = 0;
-       uint64_t ea = get_mce_fault_addr(evt);
  
         if (!(regs->msr & MSR_RI)) {
                 /* If MSR_RI isn't set, we cannot recover */
@@ -404,26 +403,18 @@ static int opal_recover_mce(struct pt_regs *regs,
         } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
                 /* Platform corrected itself */
                 recovered = 1;
-       } else if (ea && !is_kernel_addr(ea)) {
+       } else if (evt->severity == MCE_SEV_FATAL) {
+               /* Fatal machine check */
+               pr_err("Machine check interrupt is fatal\n");
+               recovered = 0;
+       } else if ((evt->severity == MCE_SEV_ERROR_SYNC) &&
+                       (user_mode(regs) && !is_global_init(current))) {
                 /*
-                * Faulting address is not in kernel text. We should be fine.
-                * We need to find which process uses this address.
                  * For now, kill the task if we have received exception when
                  * in userspace.
                  *
                  * TODO: Queue up this address for hwpoisioning later.
                  */
-               if (user_mode(regs) && !is_global_init(current)) {
-                       _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
-                       recovered = 1;
-               } else
-                       recovered = 0;
-       } else if (user_mode(regs) && !is_global_init(current) &&
-               evt->severity == MCE_SEV_ERROR_SYNC) {
-               /*
-                * If we have received a synchronous error when in userspace
-                * kill the task.
-                */
                 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
                 recovered = 1;
         }
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c

index 6901a06da2f90bddf70386667eaea5c4fe3505a7..e36738291c320575523422e139d4642e04142bd5 100644 (file)
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1775,17 +1775,20 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
  }
  
  static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
-                                  struct pci_bus *bus)
+                                  struct pci_bus *bus,
+                                  bool add_to_group)
  {
         struct pci_dev *dev;
  
         list_for_each_entry(dev, &bus->devices, bus_list) {
                 set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
                 set_dma_offset(&dev->dev, pe->tce_bypass_base);
-               iommu_add_device(&dev->dev);
+               if (add_to_group)
+                       iommu_add_device(&dev->dev);
  
                 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
-                       pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+                       pnv_ioda_setup_bus_dma(pe, dev->subordinate,
+                                       add_to_group);
         }
  }
  
@@ -2191,7 +2194,7 @@ found:
                 set_iommu_table_base(&pe->pdev->dev, tbl);
                 iommu_add_device(&pe->pdev->dev);
         } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
-               pnv_ioda_setup_bus_dma(pe, pe->pbus);
+               pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
  
         return;
   fail:
@@ -2426,6 +2429,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
  
         pnv_pci_ioda2_set_bypass(pe, false);
         pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+       if (pe->pbus)
+               pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
         pnv_ioda2_table_free(tbl);
  }
  
@@ -2435,6 +2440,8 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
                                                 table_group);
  
         pnv_pci_ioda2_setup_default_config(pe);
+       if (pe->pbus)
+               pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
  }
  
  static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
@@ -2624,6 +2631,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
         level_shift = entries_shift + 3;
         level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
  
+       if ((level_shift - 3) * levels + page_shift >= 60)
+               return -EINVAL;
+
         /* Allocate TCE table */
         addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
                         levels, tce_table_size, &offset, &total_allocated);
@@ -2728,7 +2738,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
         if (pe->flags & PNV_IODA_PE_DEV)
                 iommu_add_device(&pe->pdev->dev);
         else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
-               pnv_ioda_setup_bus_dma(pe, pe->pbus);
+               pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
  }
  
  #ifdef CONFIG_PCI_MSI
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c

index 251060cf171364f1dd4dd08452189133dd713265..8b1fe895daa3f076bf57b6b9fe3283a6ba686fad 100644 (file)
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -751,7 +751,9 @@ void __init hpte_init_pseries(void)
         mmu_hash_ops.flush_hash_range    = pSeries_lpar_flush_hash_range;
         mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
         mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
-       mmu_hash_ops.resize_hpt          = pseries_lpar_resize_hpt;
+
+       if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+               mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
  }
  
  void radix_init_pseries(void)
diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S

index f9760ccf40323674cecb3f4f14cc98aa8e615cfe..3696ea6c4826b9740398113d207b1679318db2f8 100644 (file)
--- a/arch/powerpc/purgatory/trampoline.S
+++ b/arch/powerpc/purgatory/trampoline.S
@@ -116,13 +116,13 @@ dt_offset:
  
         .data
         .balign 8
-.globl sha256_digest
-sha256_digest:
+.globl purgatory_sha256_digest
+purgatory_sha256_digest:
         .skip   32
-       .size sha256_digest, . - sha256_digest
+       .size purgatory_sha256_digest, . - purgatory_sha256_digest
  
         .balign 8
-.globl sha_regions
-sha_regions:
+.globl purgatory_sha_regions
+purgatory_sha_regions:
         .skip   8 * 2 * 16
-       .size sha_regions, . - sha_regions
+       .size purgatory_sha_regions, . - purgatory_sha_regions
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c

index ada29eaed6e280c08f6d3ee5671c58da9eb06e38..f523ac88315070873eede1c978312569d48953a7 100644 (file)
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -274,7 +274,9 @@ failed:
                         if (bank->disk->major > 0)
                                 unregister_blkdev(bank->disk->major,
                                                 bank->disk->disk_name);
-                       del_gendisk(bank->disk);
+                       if (bank->disk->flags & GENHD_FL_UP)
+                               del_gendisk(bank->disk);
+                       put_disk(bank->disk);
                 }
                 device->dev.platform_data = NULL;
                 if (bank->io_addr != 0)
@@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device)
         device_remove_file(&device->dev, &dev_attr_ecc);
         free_irq(bank->irq_id, device);
         del_gendisk(bank->disk);
+       put_disk(bank->disk);
         iounmap((void __iomem *) bank->io_addr);
         kfree(bank);
  
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c

index f9670eabfcfa70ca338aa0c5f2e10217803c7162..b53f80f0b4d822b8ecc77271ee7ece8b734bee5a 100644 (file)
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void)
  
  static void icp_opal_set_cpu_priority(unsigned char cppr)
  {
+       /*
+        * Here be dragons. The caller has asked to allow only IPI's and not
+        * external interrupts. But OPAL XIVE doesn't support that. So instead
+        * of allowing no interrupts allow all. That's still not right, but
+        * currently the only caller who does this is xics_migrate_irqs_away()
+        * and it works in that case.
+        */
+       if (cppr >= DEFAULT_PRIORITY)
+               cppr = LOWEST_PRIORITY;
+
         xics_set_base_cppr(cppr);
         opal_int_set_cppr(cppr);
         iosync();
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c

index 69d858e51ac76f121741337031465cb1fd1ccebb..23efe4e42172210ee7784a9e8db07d6e2f087f7b 100644 (file)
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -20,6 +20,7 @@
  #include <linux/of.h>
  #include <linux/slab.h>
  #include <linux/spinlock.h>
+#include <linux/delay.h>
  
  #include <asm/prom.h>
  #include <asm/io.h>
@@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void)
         /* Remove ourselves from the global interrupt queue */
         xics_set_cpu_giq(xics_default_distrib_server, 0);
  
-       /* Allow IPIs again... */
-       icp_ops->set_priority(DEFAULT_PRIORITY);
-
         for_each_irq_desc(virq, desc) {
                 struct irq_chip *chip;
                 long server;
@@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void)
  unlock:
                 raw_spin_unlock_irqrestore(&desc->lock, flags);
         }
+
+       /* Allow "sufficient" time to drop any inflight IRQ's */
+       mdelay(5);
+
+       /*
+        * Allow IPIs again. This is done at the very end, after migrating all
+        * interrupts, the expectation is that we'll only get woken up by an IPI
+        * interrupt beyond this point, but leave externals masked just to be
+        * safe. If we're using icp-opal this may actually allow all
+        * interrupts anyway, but that should be OK.
+        */
+       icp_ops->set_priority(DEFAULT_PRIORITY);
+
  }
  #endif /* CONFIG_HOTPLUG_CPU */
  
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c

index fa95041fa9f6844a2ab13ac4dee90967a6a0f84b..33ca29333e1808ae4dc0f9e8875902ae2bb307f5 100644 (file)
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -141,31 +141,34 @@ static void check_ipl_parmblock(void *start, unsigned long size)
  
  unsigned long decompress_kernel(void)
  {
-       unsigned long output_addr;
-       unsigned char *output;
+       void *output, *kernel_end;
  
-       output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL;
-       check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start);
-       memset(&_bss, 0, &_ebss - &_bss);
-       free_mem_ptr = (unsigned long)&_end;
-       free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
-       output = (unsigned char *) output_addr;
+       output = (void *) ALIGN((unsigned long) &_end + HEAP_SIZE, PAGE_SIZE);
+       kernel_end = output + SZ__bss_start;
+       check_ipl_parmblock((void *) 0, (unsigned long) kernel_end);
  
  #ifdef CONFIG_BLK_DEV_INITRD
         /*
          * Move the initrd right behind the end of the decompressed
-        * kernel image.
+        * kernel image. This also prevents initrd corruption caused by
+        * bss clearing since kernel_end will always be located behind the
+        * current bss section..
          */
-       if (INITRD_START && INITRD_SIZE &&
-           INITRD_START < (unsigned long) output + SZ__bss_start) {
-               check_ipl_parmblock(output + SZ__bss_start,
-                                   INITRD_START + INITRD_SIZE);
-               memmove(output + SZ__bss_start,
-                       (void *) INITRD_START, INITRD_SIZE);
-               INITRD_START = (unsigned long) output + SZ__bss_start;
+       if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) {
+               check_ipl_parmblock(kernel_end, INITRD_SIZE);
+               memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE);
+               INITRD_START = (unsigned long) kernel_end;
         }
  #endif
  
+       /*
+        * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be
+        * initialized afterwards since they reside in bss.
+        */
+       memset(&_bss, 0, &_ebss - &_bss);
+       free_mem_ptr = (unsigned long) &_end;
+       free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
+
         puts("Uncompressing Linux... ");
         __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error);
         puts("Ok, booting the kernel.\n");
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig

index 143b1e00b818493f4cb683c251e1d90ef6a5aa9e..4b176fe83da4c6abeeaec2144635c87337cb3388 100644 (file)
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -609,7 +609,7 @@ CONFIG_SCHED_TRACER=y
  CONFIG_FTRACE_SYSCALLS=y
  CONFIG_STACK_TRACER=y
  CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
  CONFIG_FUNCTION_PROFILER=y
  CONFIG_HIST_TRIGGERS=y
  CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig

index f05d2d6e10872a417cfb67a9624d7d74f56e5cc6..0de46cc397f6fe7a89f7e26c569287a41dc8a7fb 100644 (file)
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -560,7 +560,7 @@ CONFIG_SCHED_TRACER=y
  CONFIG_FTRACE_SYSCALLS=y
  CONFIG_STACK_TRACER=y
  CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
  CONFIG_FUNCTION_PROFILER=y
  CONFIG_HIST_TRIGGERS=y
  CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig

index 2358bf33c5efcf2790643f0b8bbd2a8c80a2fc8f..e167557b434c201e421c5bda3d263849859d7609 100644 (file)
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -558,7 +558,7 @@ CONFIG_SCHED_TRACER=y
  CONFIG_FTRACE_SYSCALLS=y
  CONFIG_STACK_TRACER=y
  CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
  CONFIG_FUNCTION_PROFILER=y
  CONFIG_HIST_TRIGGERS=y
  CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c

index d69ea495c4d748748618b27d7414671529e1f41a..716b17238599f63107b27b6860c030b63dd757ba 100644 (file)
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -474,8 +474,11 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
                         ret = blkcipher_walk_done(desc, walk, nbytes - n);
                 }
                 if (k < n) {
-                       if (__ctr_paes_set_key(ctx) != 0)
+                       if (__ctr_paes_set_key(ctx) != 0) {
+                               if (locked)
+                                       spin_unlock(&ctrblk_lock);
                                 return blkcipher_walk_done(desc, walk, -EIO);
+                       }
                 }
         }
         if (locked)
diff --git a/arch/s390/defconfig b/arch/s390/defconfig

index 68bfd09f1b02ec23dad7ba4931db828f1286d890..97189dbaf34b2a36dade0738a64eb65bae38aafc 100644 (file)
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y
  CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
  CONFIG_STACK_TRACER=y
  CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
  CONFIG_FUNCTION_PROFILER=y
  CONFIG_TRACE_ENUM_MAP_FILE=y
  CONFIG_KPROBES_SANITY_TEST=y
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h

index d1c407ddf7032de5a43d08aa48438abda7ab1e91..9072bf63a846148c008da47a5ed3a73313b382a3 100644 (file)
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -8,31 +8,27 @@
  #define _S390_CPUTIME_H
  
  #include <linux/types.h>
-#include <asm/div64.h>
+#include <asm/timex.h>
  
  #define CPUTIME_PER_USEC 4096ULL
  #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
  
  /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
  
-typedef unsigned long long __nocast cputime_t;
-typedef unsigned long long __nocast cputime64_t;
-
  #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
  
-static inline unsigned long __div(unsigned long long n, unsigned long base)
-{
-       return n / base;
-}
-
  /*
- * Convert cputime to microseconds and back.
+ * Convert cputime to microseconds.
   */
-static inline unsigned int cputime_to_usecs(const cputime_t cputime)
+static inline u64 cputime_to_usecs(const u64 cputime)
  {
-       return (__force unsigned long long) cputime >> 12;
+       return cputime >> 12;
  }
  
+/*
+ * Convert cputime to nanoseconds.
+ */
+#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
  
  u64 arch_cpu_idle_time(int cpu);
  
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h

index 7ed1972b1920eb45e8544f5b495db75bafa20636..ecec682bb5166a41d9c86025fd3f4e9461f6ec71 100644 (file)
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -24,6 +24,7 @@
   * the S390 page table tree.
   */
  #ifndef __ASSEMBLY__
+#include <asm-generic/5level-fixup.h>
  #include <linux/sched.h>
  #include <linux/mm_types.h>
  #include <linux/page-flags.h>
@@ -1050,6 +1051,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
  {
         if (!MACHINE_HAS_NX)
                 pte_val(entry) &= ~_PAGE_NOEXEC;
+       if (pte_present(entry))
+               pte_val(entry) &= ~_PAGE_UNUSED;
         if (mm_has_pgste(mm))
                 ptep_set_pte_at(mm, addr, ptep, entry);
         else
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h

index 5ce29fe100baaa0ee0dcaa987901691fb76e4044..fbd9116eb17bf2c73d80952af2108eecdf4db1ed 100644 (file)
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -4,6 +4,5 @@
  #include <asm-generic/sections.h>
  
  extern char _eshared[], _ehead[];
-extern char __start_ro_after_init[], __end_ro_after_init[];
  
  #endif
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h

index 354344dcc19898bb647722db24f49733b28793c6..118535123f346d9b32bfb140d45884870a99fd2f 100644 (file)
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -206,20 +206,16 @@ static inline unsigned long long get_tod_clock_monotonic(void)
   *    ns = (todval * 125) >> 9;
   *
   * In order to avoid an overflow with the multiplication we can rewrite this.
- * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits)
+ * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits)
   * we end up with
   *
- *    ns = ((2^32 * th + tl) * 125 ) >> 9;
- * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9);
+ *    ns = ((2^9 * th + tl) * 125 ) >> 9;
+ * -> ns = (th * 125) + ((tl * 125) >> 9);
   *
   */
  static inline unsigned long long tod_to_ns(unsigned long long todval)
  {
-       unsigned long long ns;
-
-       ns = ((todval >> 32) << 23) * 125;
-       ns += ((todval & 0xffffffff) * 125) >> 9;
-       return ns;
+       return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
  }
  
  #endif
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h

index 136932ff42502027820a94702a924d65b3049622..3ea1554d04b3776e90fa1c311ff227a9a201925c 100644 (file)
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -147,7 +147,7 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from,
                 "       jg      2b\n"                           \
                 ".popsection\n"                                 \
                 EX_TABLE(0b,3b) EX_TABLE(1b,3b)                 \
-               : "=d" (__rc), "=Q" (*(to))                     \
+               : "=d" (__rc), "+Q" (*(to))                     \
                 : "d" (size), "Q" (*(from)),                    \
                   "d" (__reg0), "K" (-EFAULT)                   \
                 : "cc");                                        \
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h

index 4384bc797a54f9d77dd593123f0cfc567124f792..152de9b796e149ed3745f41351a5cc5e637bb55e 100644 (file)
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -313,7 +313,9 @@
  #define __NR_copy_file_range   375
  #define __NR_preadv2           376
  #define __NR_pwritev2          377
-#define NR_syscalls 378
+/* Number 378 is reserved for guarded storage */
+#define __NR_statx             379
+#define NR_syscalls 380
  
  /* 
   * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c

index ae2cda5eee5a99b35b73e5b7868edd44cba1c6d2..e89cc2e71db1693c4c03f6e6ccc37ba9297b4012 100644 (file)
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -178,3 +178,4 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
  COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
  COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
  COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S

index dff2152350a7ebaaf3df6c8b000eb36b03afd19e..6a7d737d514c4c0064ddd8ef1ca80b824ae60c0c 100644 (file)
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -490,7 +490,7 @@ ENTRY(pgm_check_handler)
         jnz     .Lpgm_svcper            # -> single stepped svc
  1:     CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
         aghi    %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-       j       3f
+       j       4f
  2:     UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
         lg      %r15,__LC_KERNEL_STACK
         lgr     %r14,%r12
@@ -499,8 +499,8 @@ ENTRY(pgm_check_handler)
         tm      __LC_PGM_ILC+2,0x02     # check for transaction abort
         jz      3f
         mvc     __THREAD_trap_tdb(256,%r14),0(%r13)
-3:     la      %r11,STACK_FRAME_OVERHEAD(%r15)
-       stg     %r10,__THREAD_last_break(%r14)
+3:     stg     %r10,__THREAD_last_break(%r14)
+4:     la      %r11,STACK_FRAME_OVERHEAD(%r15)
         stmg    %r0,%r7,__PT_R0(%r11)
         mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
         stmg    %r8,%r9,__PT_PSW(%r11)
@@ -509,14 +509,14 @@ ENTRY(pgm_check_handler)
         xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
         stg     %r10,__PT_ARGS(%r11)
         tm      __LC_PGM_ILC+3,0x80     # check for per exception
-       jz      4f
+       jz      5f
         tmhh    %r8,0x0001              # kernel per event ?
         jz      .Lpgm_kprobe
         oi      __PT_FLAGS+7(%r11),_PIF_PER_TRAP
         mvc     __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
         mvc     __THREAD_per_cause(2,%r14),__LC_PER_CODE
         mvc     __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-4:     REENABLE_IRQS
+5:     REENABLE_IRQS
         xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
         larl    %r1,pgm_check_table
         llgh    %r10,__PT_INT_CODE+2(%r11)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c

index b67dafb7b7cfc58221d786ee9f97b2adc5a61217..e545ffe5155ab0179327cfe4f9f66e677c604041 100644 (file)
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -564,6 +564,8 @@ static struct kset *ipl_kset;
  
  static void __ipl_run(void *unused)
  {
+       if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
+               diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
         diag308(DIAG308_LOAD_CLEAR, NULL);
         if (MACHINE_IS_VM)
                 __cpcmd("IPL", NULL, 0, NULL);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c

index 20cd339e11aefc9e190e7c98c5671b94dac46d37..f29e41c5e2ecf6d28018463cf89a2db677dffccc 100644 (file)
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -124,7 +124,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
         clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
         /* Initialize per thread user and system timer values */
         p->thread.user_timer = 0;
+       p->thread.guest_timer = 0;
         p->thread.system_timer = 0;
+       p->thread.hardirq_timer = 0;
+       p->thread.softirq_timer = 0;
  
         frame->sf.back_chain = 0;
         /* new return point is ret_from_fork */
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c

index 47a973b5b4f184adfa3855828d042bd73d33e61c..5dab859b0d543be205eaa5a176728e87f5e3bfc6 100644 (file)
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -909,13 +909,11 @@ void __init smp_prepare_boot_cpu(void)
  {
         struct pcpu *pcpu = pcpu_devices;
  
+       WARN_ON(!cpu_present(0) || !cpu_online(0));
         pcpu->state = CPU_STATE_CONFIGURED;
-       pcpu->address = stap();
         pcpu->lowcore = (struct lowcore *)(unsigned long) store_prefix();
         S390_lowcore.percpu_offset = __per_cpu_offset[0];
         smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
-       set_cpu_present(0, true);
-       set_cpu_online(0, true);
  }
  
  void __init smp_cpus_done(unsigned int max_cpus)
@@ -924,6 +922,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
  
  void __init smp_setup_processor_id(void)
  {
+       pcpu_devices[0].address = stap();
         S390_lowcore.cpu_nr = 0;
         S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
  }
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S

index 9b59e6212d8fd22cadbc35f9e3546f7aa47e540c..2659b5cfeddba4cd294e71e356d1149cca68314f 100644 (file)
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -386,3 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
  SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
  SYSCALL(sys_preadv2,compat_sys_preadv2)
  SYSCALL(sys_pwritev2,compat_sys_pwritev2)
+NI_SYSCALL
+SYSCALL(sys_statx,compat_sys_statx)
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S

index 5ccf953962518294e2fc0d1b6e6b633e589f223b..72307f108c40387fd718e9ca1e07ee5cb6ef9cb8 100644 (file)
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -63,11 +63,9 @@ SECTIONS
  
         . = ALIGN(PAGE_SIZE);
         __start_ro_after_init = .;
-       __start_data_ro_after_init = .;
         .data..ro_after_init : {
                  *(.data..ro_after_init)
         }
-       __end_data_ro_after_init = .;
         EXCEPTION_TABLE(16)
         . = ALIGN(PAGE_SIZE);
         __end_ro_after_init = .;
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c

index c14fc902991272be4d761b5f6fe506a64e12ace8..072d84ba42a3725ae1b1bff009bc5e241a264717 100644 (file)
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -111,7 +111,7 @@ static inline u64 scale_vtime(u64 vtime)
  }
  
  static void account_system_index_scaled(struct task_struct *p,
-                                       cputime_t cputime, cputime_t scaled,
+                                       u64 cputime, u64 scaled,
                                         enum cpu_usage_stat index)
  {
         p->stimescaled += cputime_to_nsecs(scaled);
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c

index d55c829a5944c28449e734e227ec4621bcd67d9e..ddbffb715b40fd27a705396ffcd2221af74bab26 100644 (file)
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -168,8 +168,7 @@ union page_table_entry {
                 unsigned long z  : 1; /* Zero Bit */
                 unsigned long i  : 1; /* Page-Invalid Bit */
                 unsigned long p  : 1; /* DAT-Protection Bit */
-               unsigned long co : 1; /* Change-Recording Override */
-               unsigned long    : 8;
+               unsigned long    : 9;
         };
  };
  
@@ -745,8 +744,6 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
                 return PGM_PAGE_TRANSLATION;
         if (pte.z)
                 return PGM_TRANSLATION_SPEC;
-       if (pte.co && !edat1)
-               return PGM_TRANSLATION_SPEC;
         dat_protection |= pte.p;
         raddr.pfra = pte.pfra;
  real_address:
@@ -1182,7 +1179,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
                 rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
         if (!rc && pte.i)
                 rc = PGM_PAGE_TRANSLATION;
-       if (!rc && (pte.z || (pte.co && sg->edat_level < 1)))
+       if (!rc && pte.z)
                 rc = PGM_TRANSLATION_SPEC;
  shadow_page:
         pte.p |= dat_protection;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c

index b48dc5f1900b5122f62f98669d3f1ffd97955d99..463e5ef02304bb99c352c8468c7cf0ce57f0ba4e 100644 (file)
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -608,12 +608,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
  {
         spinlock_t *ptl;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
         pgste_t pgste;
         pte_t *ptep;
         pte_t pte;
         bool dirty;
  
-       ptep = get_locked_pte(mm, addr, &ptl);
+       pgd = pgd_offset(mm, addr);
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return false;
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return false;
+       /* We can't run guests backed by huge pages, but userspace can
+        * still set them up and then try to migrate them without any
+        * migration support.
+        */
+       if (pmd_large(*pmd))
+               return true;
+
+       ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
         if (unlikely(!ptep))
                 return false;
  
diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h

index 0553e5cd5985a0a634864a3402a889491a52da91..46ff8fd678a75cd1cf28111961ffec22375be6ac 100644 (file)
--- a/arch/score/include/asm/pgtable.h
+++ b/arch/score/include/asm/pgtable.h
@@ -2,6 +2,7 @@
  #define _ASM_SCORE_PGTABLE_H
  
  #include <linux/const.h>
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  #include <asm/fixmap.h>
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c

index e359ec67586982d3a47dd3bf0eb38418471836c2..12daf45369b44274a1ba299ecbf8be37311dfe55 100644 (file)
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -24,6 +24,7 @@
   */
  
  #include <linux/extable.h>
+#include <linux/ptrace.h>
  #include <linux/sched/mm.h>
  #include <linux/sched/signal.h>
  #include <linux/sched/debug.h>
diff --git a/arch/score/mm/extable.c b/arch/score/mm/extable.c

index ec871355fc2d60498cee6b245c44476f0dc59604..6736a3ad6286093dd1c5ef957a2a60cf82801d97 100644 (file)
--- a/arch/score/mm/extable.c
+++ b/arch/score/mm/extable.c
@@ -24,6 +24,8 @@
   */
  
  #include <linux/extable.h>
+#include <linux/ptrace.h>
+#include <asm/extable.h>
  
  int fixup_exception(struct pt_regs *regs)
  {
diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c

index 340fd40b381dc348b2bf8e86e48a2f8ee52efdaa..9c292c27e0d7114768a7bf8379df7be3fc157257 100644 (file)
--- a/arch/sh/boards/mach-cayman/setup.c
+++ b/arch/sh/boards/mach-cayman/setup.c
@@ -128,7 +128,6 @@ static int __init smsc_superio_setup(void)
         SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX);
         SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX);
  
-#ifdef CONFIG_IDE
         /*
          * Only IDE1 exists on the Cayman
          */
@@ -158,7 +157,6 @@ static int __init smsc_superio_setup(void)
         SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */
         SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */
         SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */
-#endif
  
         /* Exit the configuration state */
         outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR);
diff --git a/arch/sh/include/asm/pgtable-2level.h b/arch/sh/include/asm/pgtable-2level.h

index 19bd89db17e71749b1e7bb07355e2152e4c92408..f75cf438725766d2b7340f38ce74f1ea63f0690e 100644 (file)
--- a/arch/sh/include/asm/pgtable-2level.h
+++ b/arch/sh/include/asm/pgtable-2level.h
@@ -1,6 +1,7 @@
  #ifndef __ASM_SH_PGTABLE_2LEVEL_H
  #define __ASM_SH_PGTABLE_2LEVEL_H
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  /*
diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h

index 249a985d96482e54bb604daaf129552ba8835257..9b1e776eca31bec7ea936633528011e48b6374ba 100644 (file)
--- a/arch/sh/include/asm/pgtable-3level.h
+++ b/arch/sh/include/asm/pgtable-3level.h
@@ -1,6 +1,7 @@
  #ifndef __ASM_SH_PGTABLE_3LEVEL_H
  #define __ASM_SH_PGTABLE_3LEVEL_H
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopud.h>
  
  /*
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig

index 68ac5c7cd982619581dfa65b75ac89aa8e359554..3db2543733a5874f7a8a5e76275eb505cc1c3db3 100644 (file)
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -43,7 +43,7 @@ config SPARC
         select ARCH_HAS_SG_CHAIN
         select CPU_NO_EFFICIENT_FFS
         select HAVE_ARCH_HARDENED_USERCOPY
-       select PROVE_LOCKING_SMALL if PROVE_LOCKING
+       select LOCKDEP_SMALL if LOCKDEP
         select ARCH_WANT_RELAX_ORDER
  
  config SPARC32
@@ -82,6 +82,7 @@ config SPARC64
         select HAVE_ARCH_AUDITSYSCALL
         select ARCH_SUPPORTS_ATOMIC_RMW
         select HAVE_NMI
+       select HAVE_REGS_AND_STACK_ACCESS_API
  
  config ARCH_DEFCONFIG
         string
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h

index f294dd42fc7d3833ccc5b92fa6077e3b7d869d26..5961b2d8398a9cdfa359483f009f0c30cfcca9a8 100644 (file)
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -17,6 +17,7 @@
  
  #define HPAGE_SHIFT            23
  #define REAL_HPAGE_SHIFT       22
+#define HPAGE_2GB_SHIFT                31
  #define HPAGE_256MB_SHIFT      28
  #define HPAGE_64K_SHIFT                16
  #define REAL_HPAGE_SIZE                (_AC(1,UL) << REAL_HPAGE_SHIFT)
@@ -27,7 +28,7 @@
  #define HUGETLB_PAGE_ORDER     (HPAGE_SHIFT - PAGE_SHIFT)
  #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
  #define REAL_HPAGE_PER_HPAGE   (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
-#define HUGE_MAX_HSTATE                3
+#define HUGE_MAX_HSTATE                4
  #endif
  
  #ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h

index 56e49c8f770d6b620eb5811101d8548eb80d5850..6fbd931f0570021fd297db9163ed170fc21aafe2 100644 (file)
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -12,6 +12,7 @@
   * the SpitFire page tables.
   */
  
+#include <asm-generic/5level-fixup.h>
  #include <linux/compiler.h>
  #include <linux/const.h>
  #include <asm/types.h>
@@ -678,26 +679,27 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
         return pte_pfn(pte);
  }
  
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline unsigned long pmd_dirty(pmd_t pmd)
+#define __HAVE_ARCH_PMD_WRITE
+static inline unsigned long pmd_write(pmd_t pmd)
  {
         pte_t pte = __pte(pmd_val(pmd));
  
-       return pte_dirty(pte);
+       return pte_write(pte);
  }
  
-static inline unsigned long pmd_young(pmd_t pmd)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline unsigned long pmd_dirty(pmd_t pmd)
  {
         pte_t pte = __pte(pmd_val(pmd));
  
-       return pte_young(pte);
+       return pte_dirty(pte);
  }
  
-static inline unsigned long pmd_write(pmd_t pmd)
+static inline unsigned long pmd_young(pmd_t pmd)
  {
         pte_t pte = __pte(pmd_val(pmd));
  
-       return pte_write(pte);
+       return pte_young(pte);
  }
  
  static inline unsigned long pmd_trans_huge(pmd_t pmd)
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h

index 365d4cb267b4397c8a108eaff279be292c4e52e5..dd27159819ebedce4d0479ec800e91d56706311f 100644 (file)
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -18,12 +18,6 @@
  #include <asm/signal.h>
  #include <asm/page.h>
  
-/*
- * The sparc has no problems with write protection
- */
-#define wp_works_ok 1
-#define wp_works_ok__is_a_macro /* for versions in ksyms.c */
-
  /* Whee, this is STACK_TOP + PAGE_SIZE and the lowest kernel address too...
   * That one page is used to protect kernel from intruders, so that
   * we can make our access_ok test faster
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h

index 6448cfc8292f72d329704c92c993e7864ee6ae3d..b58ee90184334224b756360e769c47a0d10e088a 100644 (file)
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -18,10 +18,6 @@
  #include <asm/ptrace.h>
  #include <asm/page.h>
  
-/* The sparc has no problems with write protection */
-#define wp_works_ok 1
-#define wp_works_ok__is_a_macro /* for versions in ksyms.c */
-
  /*
   * User lives in his very own context, and cannot reference us. Note
   * that TASK_SIZE is a misnomer, it really gives maximum user virtual
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h

index ca57f08bd3dba57e5e06e17de20eeee856e97bed..d73428e4333c980486561799aba8fc31ccc123b9 100644 (file)
--- a/arch/sparc/include/asm/ptrace.h
+++ b/arch/sparc/include/asm/ptrace.h
@@ -83,7 +83,8 @@ unsigned long profile_pc(struct pt_regs *);
  
  #define MAX_REG_OFFSET (offsetof(struct pt_regs, magic))
  
-extern int regs_query_register_offset(const char *name);
+int regs_query_register_offset(const char *name);
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
  
  /**
   * regs_get_register() - get register value from its offset
diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h

index 36eee8132c22bac329e99fb7284211e11310ff26..ae77df75bffadd1e376a1baec7cb26286497ffb6 100644 (file)
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -425,8 +425,9 @@
  #define __NR_copy_file_range   357
  #define __NR_preadv2           358
  #define __NR_pwritev2          359
+#define __NR_statx             360
  
-#define NR_syscalls            360
+#define NR_syscalls            361
  
  /* Bitmask values returned from kern_features system call.  */
  #define KERN_FEATURE_MIXED_MODE_STACK  0x00000001
@@ -442,4 +443,9 @@
  #define __IGNORE_getresgid
  #endif
  
+/* Sparc doesn't have protection keys. */
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
+
  #endif /* _UAPI_SPARC_UNISTD_H */
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S

index 6aa3da152c20008a08e752c4f9708ff6a89e3d72..44101196d02b5dacb5e3c67248b3c3de77c4a9b4 100644 (file)
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -96,6 +96,7 @@ sparc64_boot:
         andn    %g1, PSTATE_AM, %g1
         wrpr    %g1, 0x0, %pstate
         ba,a,pt %xcc, 1f
+        nop
  
         .globl  prom_finddev_name, prom_chosen_path, prom_root_node
         .globl  prom_getprop_name, prom_mmu_name, prom_peer_name
@@ -613,6 +614,7 @@ niagara_tlb_fixup:
          nop
  
         ba,a,pt %xcc, 80f
+        nop
  niagara4_patch:
         call    niagara4_patch_copyops
          nop
@@ -622,6 +624,7 @@ niagara4_patch:
          nop
  
         ba,a,pt %xcc, 80f
+        nop
  
  niagara2_patch:
         call    niagara2_patch_copyops
@@ -632,6 +635,7 @@ niagara2_patch:
          nop
  
         ba,a,pt %xcc, 80f
+        nop
  
  niagara_patch:
         call    niagara_patch_copyops
diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S

index 34b4933900bf7665b30e791565795d5782351b60..9276d2f0dd8684edffa5a2a4f2f72ac4488a8c99 100644 (file)
--- a/arch/sparc/kernel/misctrap.S
+++ b/arch/sparc/kernel/misctrap.S
@@ -82,6 +82,7 @@ do_stdfmna:
         call            handle_stdfmna
          add            %sp, PTREGS_OFF, %o0
         ba,a,pt         %xcc, rtrap
+        nop
         .size           do_stdfmna,.-do_stdfmna
  
         .type           breakpoint_trap,#function
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c

index df9e731a76f51b923098501ac11ae3b0303c923a..e1d965e90e1697a8205ca505dbf428cb37b30312 100644 (file)
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -351,7 +351,7 @@ static int genregs64_set(struct task_struct *target,
         }
  
         if (!ret) {
-               unsigned long y;
+               unsigned long y = regs->y;
  
                 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
                                          &y,
@@ -1162,3 +1162,39 @@ int regs_query_register_offset(const char *name)
                         return roff->offset;
         return -EINVAL;
  }
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs:      pt_regs which contains kernel stack pointer.
+ * @addr:      address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static inline int regs_within_kernel_stack(struct pt_regs *regs,
+                                          unsigned long addr)
+{
+       unsigned long ksp = kernel_stack_pointer(regs) + STACK_BIAS;
+       return ((addr & ~(THREAD_SIZE - 1))  ==
+               (ksp & ~(THREAD_SIZE - 1)));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:      pt_regs which contains kernel stack pointer.
+ * @n:         stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+       unsigned long ksp = kernel_stack_pointer(regs) + STACK_BIAS;
+       unsigned long *addr = (unsigned long *)ksp;
+       addr += n;
+       if (regs_within_kernel_stack(regs, (unsigned long)addr))
+               return *addr;
+       else
+               return 0;
+}
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S

index 216948ca43829d0c0a5be837ff183f86d03dfb49..709a82ebd294c07bd4b87698c695325e5093a7e4 100644 (file)
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -237,6 +237,7 @@ rt_continue:        ldx                     [%sp + PTREGS_OFF + PT_V9_G1], %g1
                 bne,pt                  %xcc, user_rtt_fill_32bit
                  wrpr                   %g1, %cwp
                 ba,a,pt                 %xcc, user_rtt_fill_64bit
+                nop
  
  user_rtt_fill_fixup_dax:
                 ba,pt   %xcc, user_rtt_fill_fixup_common
diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S

index 4a73009f66a5727e43ad45dbe1a2f7bded8cd48f..d7e5408428098d1138b0c132baa2d43dff3196a4 100644 (file)
--- a/arch/sparc/kernel/spiterrs.S
+++ b/arch/sparc/kernel/spiterrs.S
@@ -86,6 +86,7 @@ __spitfire_cee_trap_continue:
          rd             %pc, %g7
  
         ba,a,pt         %xcc, 2f
+        nop
  
  1:     ba,pt           %xcc, etrap_irq
          rd             %pc, %g7
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S

index 6179e19bc9b98ea4542b59bb4953c1f9f2718330..c19f352f46c7e2d6c08f203e4e926982fd00423c 100644 (file)
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -352,6 +352,7 @@ sun4v_mna:
         call    sun4v_do_mna
          add    %sp, PTREGS_OFF, %o0
         ba,a,pt %xcc, rtrap
+        nop
  
         /* Privileged Action.  */
  sun4v_privact:
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S

index eac7f0db5c8c6269a913152a11941f66f5f3e8d0..5253e895b81b7214626e1afaf0c6157ed00519aa 100644 (file)
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -89,3 +89,4 @@ sys_call_table:
  /*345*/        .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
  /*350*/        .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
  /*355*/        .long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
+/*360*/        .long sys_statx
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S

index b0f17ff2ddba2daa75a8e8a646b5661dfe0d36e9..82339f6be0b2d56427a2b9e0f0e75ef5c8ab211a 100644 (file)
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -90,6 +90,7 @@ sys_call_table32:
         .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
  /*350*/        .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
         .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2
+/*360*/        .word sys_statx
  
  #endif /* CONFIG_COMPAT */
  
@@ -171,3 +172,4 @@ sys_call_table:
         .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
  /*350*/        .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
         .word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
+/*360*/        .word sys_statx
diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S

index 5604a2b051d46bb822873a31b9b1f88a85253562..364af3250646817e58ce37c96f6c12faf8b37dde 100644 (file)
--- a/arch/sparc/kernel/urtt_fill.S
+++ b/arch/sparc/kernel/urtt_fill.S
@@ -92,6 +92,7 @@ user_rtt_fill_fixup_common:
                 call    sun4v_data_access_exception
                  nop
                 ba,a,pt %xcc, rtrap
+                nop
  
  1:             call    spitfire_data_access_exception
                  nop
diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S

index 855019a8590ea5d556b71c9ae9356f7dcb629588..1ee173cc3c3943e4883670fd56537c63d199257b 100644 (file)
--- a/arch/sparc/kernel/winfixup.S
+++ b/arch/sparc/kernel/winfixup.S
@@ -152,6 +152,8 @@ fill_fixup_dax:
         call    sun4v_data_access_exception
          nop
         ba,a,pt %xcc, rtrap
+        nop
  1:     call    spitfire_data_access_exception
          nop
         ba,a,pt %xcc, rtrap
+        nop
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S

index c629dbd121b6e4fe64494c62bcad656747f05ef2..64dcd6cdb606c819e48efe0e5d755edcc816dbea 100644 (file)
--- a/arch/sparc/lib/NG2memcpy.S
+++ b/arch/sparc/lib/NG2memcpy.S
@@ -326,11 +326,13 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
         blu             170f
          nop
         ba,a,pt         %xcc, 180f
+        nop
  
  4:     /* 32 <= low bits < 48 */
         blu             150f
          nop
         ba,a,pt         %xcc, 160f
+        nop
  5:     /* 0 < low bits < 32 */
         blu,a           6f
          cmp            %g2, 8
@@ -338,6 +340,7 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
         blu             130f
          nop
         ba,a,pt         %xcc, 140f
+        nop
  6:     /* 0 < low bits < 16 */
         bgeu            120f
          nop
@@ -475,6 +478,7 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
         brz,pt          %o2, 85f
          sub            %o0, %o1, GLOBAL_SPARE
         ba,a,pt         %XCC, 90f
+        nop
  
         .align          64
  75: /* 16 < len <= 64 */
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S

index 75bb93b1437f7f6f29ab17c96bc0b4c322f2a373..78ea962edcbee4c974481520de516e751d78a61d 100644 (file)
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -530,4 +530,5 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
         bne,pt          %icc, 1b
          EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
         ba,a,pt         %icc, .Lexit
+        nop
         .size           FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S

index 41da4bdd95cbff451889992b06c8dd6893dfed30..7c0c81f18837e11902f8b473b7b480bfc13ab5a7 100644 (file)
--- a/arch/sparc/lib/NG4memset.S
+++ b/arch/sparc/lib/NG4memset.S
@@ -102,4 +102,5 @@ NG4bzero:
         bne,pt          %icc, 1b
          add            %o0, 0x30, %o0
         ba,a,pt         %icc, .Lpostloop
+        nop
         .size           NG4bzero,.-NG4bzero
diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S

index d88c4ed50a0023cd8e2daa06689abc0d46d79f46..cd654a719b278b6c925a730a63195462d6e076df 100644 (file)
--- a/arch/sparc/lib/NGmemcpy.S
+++ b/arch/sparc/lib/NGmemcpy.S
@@ -394,6 +394,7 @@ FUNC_NAME:  /* %i0=dst, %i1=src, %i2=len */
         brz,pt          %i2, 85f
          sub            %o0, %i1, %i3
         ba,a,pt         %XCC, 90f
+        nop
  
         .align          64
  70: /* 16 < len <= 64 */
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c

index 323bc6b6e3ad0eceb0ea4cdb6a1cb6da898227b8..7c29d38e6b99c68c5ac746eb3d3a3fe1da8394a5 100644 (file)
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
         pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
  
         switch (shift) {
+       case HPAGE_2GB_SHIFT:
+               hugepage_size = _PAGE_SZ2GB_4V;
+               pte_val(entry) |= _PAGE_PMD_HUGE;
+               break;
         case HPAGE_256MB_SHIFT:
                 hugepage_size = _PAGE_SZ256MB_4V;
                 pte_val(entry) |= _PAGE_PMD_HUGE;
@@ -183,6 +187,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
         unsigned int shift;
  
         switch (tte_szbits) {
+       case _PAGE_SZ2GB_4V:
+               shift = HPAGE_2GB_SHIFT;
+               break;
         case _PAGE_SZ256MB_4V:
                 shift = HPAGE_256MB_SHIFT;
                 break;
@@ -261,7 +268,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                 if (!pmd)
                         return NULL;
  
-               if (sz == PMD_SHIFT)
+               if (sz >= PMD_SIZE)
                         pte = (pte_t *)pmd;
                 else
                         pte = pte_alloc_map(mm, pmd, addr);
@@ -454,6 +461,22 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
         pgd_t *pgd;
         unsigned long next;
  
+       addr &= PMD_MASK;
+       if (addr < floor) {
+               addr += PMD_SIZE;
+               if (!addr)
+                       return;
+       }
+       if (ceiling) {
+               ceiling &= PMD_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
+               end -= PMD_SIZE;
+       if (addr > end - 1)
+               return;
+
         pgd = pgd_offset(tlb->mm, addr);
         do {
                 next = pgd_addr_end(addr, end);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c

index ccd4553289899ee2e585e409189206f67c7e77f2..0cda653ae007645fa01f05b4c40518332159a6ac 100644 (file)
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -337,6 +337,10 @@ static int __init setup_hugepagesz(char *string)
         hugepage_shift = ilog2(hugepage_size);
  
         switch (hugepage_shift) {
+       case HPAGE_2GB_SHIFT:
+               hv_pgsz_mask = HV_PGSZ_MASK_2GB;
+               hv_pgsz_idx = HV_PGSZ_IDX_2GB;
+               break;
         case HPAGE_256MB_SHIFT:
                 hv_pgsz_mask = HV_PGSZ_MASK_256MB;
                 hv_pgsz_idx = HV_PGSZ_IDX_256MB;
@@ -1563,7 +1567,7 @@ bool kern_addr_valid(unsigned long addr)
         if ((long)addr < 0L) {
                 unsigned long pa = __pa(addr);
  
-               if ((addr >> max_phys_bits) != 0UL)
+               if ((pa >> max_phys_bits) != 0UL)
                         return false;
  
                 return pfn_valid(pa >> PAGE_SHIFT);
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c

index def82f6d626f774772807427c6fe6e67fd343bf1..8e76ebba29863ecd18dd2b9ba29147072f3c4518 100644 (file)
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -54,6 +54,7 @@
  enum mbus_module srmmu_modtype;
  static unsigned int hwbug_bitmask;
  int vac_cache_size;
+EXPORT_SYMBOL(vac_cache_size);
  int vac_line_size;
  
  extern struct resource sparc_iomap;
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c

index afda3bbf78542a0297849d65fe7470a5e73716f1..ee8066c3d96c95cea2d174dedf856c429aa6dbb9 100644 (file)
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -154,7 +154,7 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
                 if (pte_val(*pte) & _PAGE_VALID) {
                         bool exec = pte_exec(*pte);
  
-                       tlb_batch_add_one(mm, vaddr, exec, false);
+                       tlb_batch_add_one(mm, vaddr, exec, PAGE_SHIFT);
                 }
                 pte++;
                 vaddr += PAGE_SIZE;
@@ -209,9 +209,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                         pte_t orig_pte = __pte(pmd_val(orig));
                         bool exec = pte_exec(orig_pte);
  
-                       tlb_batch_add_one(mm, addr, exec, true);
+                       tlb_batch_add_one(mm, addr, exec, REAL_HPAGE_SHIFT);
                         tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec,
-                                       true);
+                                         REAL_HPAGE_SHIFT);
                 } else {
                         tlb_batch_pmd_scan(mm, addr, orig);
                 }
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c

index 0a04811f06b78ceb7851d70b56de63e4d40b6bf4..bedf08b22a4773c5a104b56f7de8b44c461630c1 100644 (file)
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -122,7 +122,7 @@ void flush_tsb_user(struct tlb_batch *tb)
  
         spin_lock_irqsave(&mm->context.lock, flags);
  
-       if (tb->hugepage_shift < HPAGE_SHIFT) {
+       if (tb->hugepage_shift < REAL_HPAGE_SHIFT) {
                 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
                 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
                 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
@@ -155,7 +155,7 @@ void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr,
  
         spin_lock_irqsave(&mm->context.lock, flags);
  
-       if (hugepage_shift < HPAGE_SHIFT) {
+       if (hugepage_shift < REAL_HPAGE_SHIFT) {
                 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
                 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
                 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig

index fd122ef45b0043ce0c091dbe617040c95721cd49..0d925fa0f0c1f270c7bb6ffcacd8f548ede225ec 100644 (file)
--- a/arch/tile/configs/tilegx_defconfig
+++ b/arch/tile/configs/tilegx_defconfig
@@ -249,7 +249,6 @@ CONFIG_USB_EHCI_HCD=y
  CONFIG_USB_OHCI_HCD=y
  CONFIG_USB_STORAGE=y
  CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
  CONFIG_RTC_CLASS=y
  CONFIG_RTC_DRV_TILE=y
  CONFIG_EXT2_FS=y
diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig

index eb6a55944191bc2ea03c834f1e034beb8d14d036..149d8e8eacb83c223d186f86d342de9db21f6063 100644 (file)
--- a/arch/tile/configs/tilepro_defconfig
+++ b/arch/tile/configs/tilepro_defconfig
@@ -358,7 +358,6 @@ CONFIG_WATCHDOG_NOWAYOUT=y
  # CONFIG_VGA_ARB is not set
  # CONFIG_USB_SUPPORT is not set
  CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
  CONFIG_RTC_CLASS=y
  CONFIG_RTC_DRV_TILE=y
  CONFIG_EXT2_FS=y
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h

index d26a42279036837b760ea4b93593b45fe4394f83..5f8c615cb5e9bda9a3c1ef6028e553d5e54c3615 100644 (file)
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */;
  #define MAXMEM         (_VMALLOC_START - PAGE_OFFSET)
  
  /* We have no pmd or pud since we are strictly a two-level page table */
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  static inline int pud_huge_page(pud_t pud)     { return 0; }
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h

index e96cec52f6d8aa86c0f9a89fccf4d1081db98f56..96fe58b451188a3f3a31d560036ff3031823f05b 100644 (file)
--- a/arch/tile/include/asm/pgtable_64.h
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -59,6 +59,7 @@
  #ifndef __ASSEMBLY__
  
  /* We have no pud since we are a three-level page table. */
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopud.h>
  
  /*
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h

index cfbe597524698c9234effb606aadd9ff74299085..179c0ea87a0c3b48e93821d2d1158259c0454b1b 100644 (file)
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -8,6 +8,7 @@
  #ifndef __UM_PGTABLE_2LEVEL_H
  #define __UM_PGTABLE_2LEVEL_H
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h

index bae8523a162fd3b80067260ddfad400bdf480e5b..c4d876dfb9acd14bc11ff6b4230bbff5bbe070fe 100644 (file)
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -7,6 +7,7 @@
  #ifndef __UM_PGTABLE_3LEVEL_H
  #define __UM_PGTABLE_3LEVEL_H
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopud.h>
  
  /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h

index 818d0f5598e3247666de004a1ff004abc5fd23cf..a4f2bef37e70697f215e916118775da8dbc4aad6 100644 (file)
--- a/arch/unicore32/include/asm/pgtable.h
+++ b/arch/unicore32/include/asm/pgtable.h
@@ -12,6 +12,7 @@
  #ifndef __UNICORE_PGTABLE_H__
  #define __UNICORE_PGTABLE_H__
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  #include <asm/cpu-single.h>
  
diff --git a/arch/x86/Makefile b/arch/x86/Makefile

index 2d449337a36051183c8468f469a8816e6c1e9e7c..49d160b781f09609f1c7917a8ee8853c40d6f99f 100644 (file)
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -120,10 +120,6 @@ else
          # -funit-at-a-time shrinks the kernel .text considerably
          # unfortunately it makes reading oopses harder.
          KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
-
-        # this works around some issues with generating unwind tables in older gccs
-        # newer gccs do it by default
-        KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args)
  endif
  
  ifdef CONFIG_X86_X32
@@ -147,6 +143,45 @@ ifeq ($(CONFIG_KMEMCHECK),y)
         KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
  endif
  
+#
+# If the function graph tracer is used with mcount instead of fentry,
+# '-maccumulate-outgoing-args' is needed to prevent a GCC bug
+# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109)
+#
+ifdef CONFIG_FUNCTION_GRAPH_TRACER
+  ifndef CONFIG_HAVE_FENTRY
+       ACCUMULATE_OUTGOING_ARGS := 1
+  else
+    ifeq ($(call cc-option-yn, -mfentry), n)
+       ACCUMULATE_OUTGOING_ARGS := 1
+
+       # GCC ignores '-maccumulate-outgoing-args' when used with '-Os'.
+       # If '-Os' is enabled, disable it and print a warning.
+        ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+          undefine CONFIG_CC_OPTIMIZE_FOR_SIZE
+         $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE.  Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
+        endif
+
+    endif
+  endif
+endif
+
+#
+# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a
+# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226).  There's no way
+# to test for this bug at compile-time because the test case needs to execute,
+# which is a no-go for cross compilers.  So check the GCC version instead.
+#
+ifdef CONFIG_JUMP_LABEL
+  ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1)
+       ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1)
+  endif
+endif
+
+ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
+       KBUILD_CFLAGS += -maccumulate-outgoing-args
+endif
+
  # Stackpointer is addressed different for 32 bit and 64 bit x86
  sp-$(CONFIG_X86_32) := esp
  sp-$(CONFIG_X86_64) := rsp
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu

index 6647ed49c66c9789b7e0d37286653021a6fc7d4c..a45eb15b7cf290a176d287c1255b9f9412495831 100644 (file)
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -45,24 +45,6 @@ cflags-$(CONFIG_MGEODE_LX)   += $(call cc-option,-march=geode,-march=pentium-mmx)
  # cpu entries
  cflags-$(CONFIG_X86_GENERIC)   += $(call tune,generic,$(call tune,i686))
  
-# Work around the pentium-mmx code generator madness of gcc4.4.x which
-# does stack alignment by generating horrible code _before_ the mcount
-# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
-# tracer assumptions. For i686, generic, core2 this is set by the
-# compiler anyway
-ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
-ADD_ACCUMULATE_OUTGOING_ARGS := y
-endif
-
-# Work around to a bug with asm goto with first implementations of it
-# in gcc causing gcc to mess up the push and pop of the stack in some
-# uses of asm goto.
-ifeq ($(CONFIG_JUMP_LABEL), y)
-ADD_ACCUMULATE_OUTGOING_ARGS := y
-endif
-
-cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)
-
  # Bug fix for binutils: this option is required in order to keep
  # binutils from generating NOPL instructions against our will.
  ifneq ($(CONFIG_X86_P6_NOP),y)
diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c

index 6248740b68b5a0c71bddf6eb29e4c6898902d471..31922023de49281d44f7fe893099935a594f5456 100644 (file)
--- a/arch/x86/boot/compressed/error.c
+++ b/arch/x86/boot/compressed/error.c
@@ -4,6 +4,7 @@
   * memcpy() and memmove() are defined for the compressed boot environment.
   */
  #include "misc.h"
+#include "error.h"
  
  void warn(char *m)
  {
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig

index 7ef4a099defcda7f2d4e70fb7b3edec77361f2ec..6205d3b81e6d117b4116c2fa38ea40cea4a46fb8 100644 (file)
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -176,6 +176,7 @@ CONFIG_E1000E=y
  CONFIG_SKY2=y
  CONFIG_FORCEDETH=y
  CONFIG_8139TOO=y
+CONFIG_R8169=y
  CONFIG_FDDI=y
  CONFIG_INPUT_POLLDEV=y
  # CONFIG_INPUT_MOUSEDEV_PSAUX is not set
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c

index 7853b53959cd35a8d555a5ddebfbb4af1f40562a..3f9d1a83891adf9f47cba058078b447fb1de7ecf 100644 (file)
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -30,8 +30,10 @@ static int __init vdso32_setup(char *s)
  {
         vdso32_enabled = simple_strtoul(s, NULL, 0);
  
-       if (vdso32_enabled > 1)
+       if (vdso32_enabled > 1) {
                 pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
+               vdso32_enabled = 0;
+       }
  
         return 1;
  }
@@ -62,13 +64,18 @@ subsys_initcall(sysenter_setup);
  /* Register vsyscall32 into the ABI table */
  #include <linux/sysctl.h>
  
+static const int zero;
+static const int one = 1;
+
  static struct ctl_table abi_table2[] = {
         {
                 .procname       = "vsyscall32",
                 .data           = &vdso32_enabled,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = (int *)&zero,
+               .extra2         = (int *)&one,
         },
         {}
  };
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c

index afb222b63caeb0217ef34d9b2b193b6b59bd190d..c84584bb940280b56f3b7d6d5365803ec4364505 100644 (file)
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
                         return &amd_f15_PMC20;
                 }
         case AMD_EVENT_NB:
-               /* moved to perf_event_amd_uncore.c */
+               /* moved to uncore.c */
                 return &emptyconstraint;
         default:
                 return &emptyconstraint;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c

index 349d4d17aa7fbd3a6268be3bd6e7bea909e76ccf..580b60f5ac83cea46a75a11185c8ef0a8c2da516 100644 (file)
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2101,8 +2101,8 @@ static int x86_pmu_event_init(struct perf_event *event)
  
  static void refresh_pce(void *ignored)
  {
-       if (current->mm)
-               load_mm_cr4(current->mm);
+       if (current->active_mm)
+               load_mm_cr4(current->active_mm);
  }
  
  static void x86_pmu_event_mapped(struct perf_event *event)
@@ -2110,6 +2110,18 @@ static void x86_pmu_event_mapped(struct perf_event *event)
         if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
                 return;
  
+       /*
+        * This function relies on not being called concurrently in two
+        * tasks in the same mm.  Otherwise one task could observe
+        * perf_rdpmc_allowed > 1 and return all the way back to
+        * userspace with CR4.PCE clear while another task is still
+        * doing on_each_cpu_mask() to propagate CR4.PCE.
+        *
+        * For now, this can't happen because all callers hold mmap_sem
+        * for write.  If this changes, we'll need a different solution.
+        */
+       lockdep_assert_held_exclusive(&current->mm->mmap_sem);
+
         if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
                 on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
  }
@@ -2244,6 +2256,7 @@ void arch_perf_update_userpage(struct perf_event *event,
                                struct perf_event_mmap_page *userpg, u64 now)
  {
         struct cyc2ns_data *data;
+       u64 offset;
  
         userpg->cap_user_time = 0;
         userpg->cap_user_time_zero = 0;
@@ -2251,11 +2264,13 @@ void arch_perf_update_userpage(struct perf_event *event,
                 !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
         userpg->pmc_width = x86_pmu.cntval_bits;
  
-       if (!sched_clock_stable())
+       if (!using_native_sched_clock() || !sched_clock_stable())
                 return;
  
         data = cyc2ns_read_begin();
  
+       offset = data->cyc2ns_offset + __sched_clock_offset;
+
         /*
          * Internal timekeeping for enabled/running/stopped times
          * is always in the local_clock domain.
@@ -2263,7 +2278,7 @@ void arch_perf_update_userpage(struct perf_event *event,
         userpg->cap_user_time = 1;
         userpg->time_mult = data->cyc2ns_mul;
         userpg->time_shift = data->cyc2ns_shift;
-       userpg->time_offset = data->cyc2ns_offset - now;
+       userpg->time_offset = offset - now;
  
         /*
          * cap_user_time_zero doesn't make sense when we're using a different
@@ -2271,7 +2286,7 @@ void arch_perf_update_userpage(struct perf_event *event,
          */
         if (!event->attr.use_clockid) {
                 userpg->cap_user_time_zero = 1;
-               userpg->time_zero = data->cyc2ns_offset;
+               userpg->time_zero = offset;
         }
  
         cyc2ns_read_end(data);
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c

index aff4b5b69d4021aeb0ad4356833ca3c2380a7960..238ae3248ba5594265f14ef37ee6fde5c320675a 100644 (file)
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -1,5 +1,5 @@
  /*
- * perf_event_intel_cstate.c: support cstate residency counters
+ * Support cstate residency counters
   *
   * Copyright (C) 2015, Intel Corp.
   * Author: Kan Liang (kan.liang@intel.com)
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c

index 81b321ace8e0194d3ce18b29fcb42c20b834a918..f924629836a8ec23eefe0642f48c2f898ea84f2b 100644 (file)
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -507,6 +507,9 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
                 cpuc->lbr_entries[i].to         = msr_lastbranch.to;
                 cpuc->lbr_entries[i].mispred    = 0;
                 cpuc->lbr_entries[i].predicted  = 0;
+               cpuc->lbr_entries[i].in_tx      = 0;
+               cpuc->lbr_entries[i].abort      = 0;
+               cpuc->lbr_entries[i].cycles     = 0;
                 cpuc->lbr_entries[i].reserved   = 0;
         }
         cpuc->lbr_stack.nr = i;
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c

index 22054ca49026511f6cbe2ee9ce30ca0f4281c78c..9d05c7e67f6073e3441c164d1bdc6db390507ef0 100644 (file)
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -1,5 +1,5 @@
  /*
- * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
+ * Support Intel RAPL energy consumption counters
   * Copyright (C) 2013 Google, Inc., Stephane Eranian
   *
   * Intel RAPL interface is specified in the IA-32 Manual Vol3b
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h

index ad986c1e29bccd7d5303d94bcbf2caa9223fcf74..df5989f27b1b6508404af774c55953f5c9b082c4 100644 (file)
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head;
  extern struct pci_extra_dev *uncore_extra_pci_dev;
  extern struct event_constraint uncore_constraint_empty;
  
-/* perf_event_intel_uncore_snb.c */
+/* uncore_snb.c */
  int snb_uncore_pci_init(void);
  int ivb_uncore_pci_init(void);
  int hsw_uncore_pci_init(void);
@@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void);
  void skl_uncore_cpu_init(void);
  int snb_pci2phy_map_init(int devid);
  
-/* perf_event_intel_uncore_snbep.c */
+/* uncore_snbep.c */
  int snbep_uncore_pci_init(void);
  void snbep_uncore_cpu_init(void);
  int ivbep_uncore_pci_init(void);
@@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void);
  int skx_uncore_pci_init(void);
  void skx_uncore_cpu_init(void);
  
-/* perf_event_intel_uncore_nhmex.c */
+/* uncore_nhmex.c */
  void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c

index db64baf0e500b4d2172bb2b5980a4e222fbdc84e..8bef70e7f3cc6d242e7841bfa2404d182e5235cd 100644 (file)
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -158,13 +158,13 @@ void hyperv_init(void)
                 clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
                 return;
         }
+register_msr_cs:
  #endif
         /*
          * For 32 bit guests just use the MSR based mechanism for reading
          * the partition counter.
          */
  
-register_msr_cs:
         hyperv_cs = &hyperv_cs_msr;
         if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
                 clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h

index 4e7772387c6e92efc365d271f08ec3fef6208c9d..b04bb6dfed7f8464c1425df50c0fa9d1481dcee2 100644 (file)
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -289,7 +289,8 @@
  #define X86_FEATURE_PKU                (16*32+ 3) /* Protection Keys for Userspace */
  #define X86_FEATURE_OSPKE      (16*32+ 4) /* OS Protection Keys Enable */
  #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_RDPID      (16*32+ 22) /* RDPID instruction */
+#define X86_FEATURE_LA57       (16*32+16) /* 5-level page tables */
+#define X86_FEATURE_RDPID      (16*32+22) /* RDPID instruction */
  
  /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
  #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h

index 9d49c18b5ea9360feb5e5bb1fe378914154f34d5..3762536619f8cb83c17777e60bbd56830d792cde 100644 (file)
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -287,7 +287,7 @@ struct task_struct;
  
  #define        ARCH_DLINFO_IA32                                                \
  do {                                                                   \
-       if (vdso32_enabled) {                                           \
+       if (VDSO_CURRENT_BASE) {                                        \
                 NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY);                    \
                 NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);        \
         }                                                               \
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h

index d74747b031ecd2e20dcf437944195a37e7c6bb3b..c4eda791f877b6c67808546ce072da07b9bb8002 100644 (file)
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -46,6 +46,7 @@ struct kvm_page_track_notifier_node {
  };
  
  void kvm_page_track_init(struct kvm *kvm);
+void kvm_page_track_cleanup(struct kvm *kvm);
  
  void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
                                  struct kvm_memory_slot *dont);
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h

index 72277b1028a5f54551962555fa56bfd5aebab15c..50d35e3185f553b92ce1eeba2700f13e33e49258 100644 (file)
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd)
         *(tmp + 1) = 0;
  }
  
-#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \
-               defined(CONFIG_PARAVIRT))
  static inline void native_pud_clear(pud_t *pudp)
  {
  }
-#endif
  
  static inline void pud_clear(pud_t *pudp)
  {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h

index 1cfb36b8c024ab07b8334121fc56ac79f2a35371..585ee0d42d18fc162601ff0d8a53827f0d011f5e 100644 (file)
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
  # define set_pud(pudp, pud)            native_set_pud(pudp, pud)
  #endif
  
-#ifndef __PAGETABLE_PMD_FOLDED
+#ifndef __PAGETABLE_PUD_FOLDED
  #define pud_clear(pud)                 native_pud_clear(pud)
  #endif
  
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h

index 8b4de22d64299e8997e8b12270e5c23112f85597..62484333673d98c251d52d1eccc10e762b38478b 100644 (file)
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
  }
  
  #if CONFIG_PGTABLE_LEVELS > 3
+#include <asm-generic/5level-fixup.h>
+
  typedef struct { pudval_t pud; } pud_t;
  
  static inline pud_t native_make_pud(pmdval_t val)
@@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud)
         return pud.pud;
  }
  #else
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopud.h>
  
  static inline pudval_t native_pud_val(pud_t pud)
@@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
         return pmd.pmd;
  }
  #else
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  
  static inline pmdval_t native_pmd_val(pmd_t pmd)
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h

index 34684adb6899ad132e44e159be621d92370b99c0..b3b09b98896d528d2ef7d425a10784e4ebe86106 100644 (file)
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  static inline
  bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
  {
+       /*
+        * "Allocated" pkeys are those that have been returned
+        * from pkey_alloc().  pkey 0 is special, and never
+        * returned from pkey_alloc().
+        */
+       if (pkey <= 0)
+               return false;
+       if (pkey >= arch_max_pkey())
+               return false;
         return mm_pkey_allocation_map(mm) & (1U << pkey);
  }
  
@@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm)
  static inline
  int mm_pkey_free(struct mm_struct *mm, int pkey)
  {
-       /*
-        * pkey 0 is special, always allocated and can never
-        * be freed.
-        */
-       if (!pkey)
-               return -EINVAL;
         if (!mm_pkey_is_allocated(mm, pkey))
                 return -EINVAL;
  
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h

index 2c1ebeb4d7376db6350b7266048a1d37d800ac86..529bb4a6487a9e42d07d0c4ceb226e41b5808f0f 100644 (file)
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -55,7 +55,8 @@ static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
   * @size:      number of bytes to write back
   *
   * Write back a cache range using the CLWB (cache line write back)
- * instruction.
+ * instruction. Note that @size is internally rounded up to be cache
+ * line size aligned.
   */
  static inline void arch_wb_cache_pmem(void *addr, size_t size)
  {
@@ -69,15 +70,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size)
                 clwb(p);
  }
  
-/*
- * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
- * iterators, so for other types (bvec & kvec) we must do a cache write-back.
- */
-static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
-{
-       return iter_is_iovec(i) == false;
-}
-
  /**
   * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
   * @addr:      PMEM destination address
@@ -94,7 +86,35 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
         /* TODO: skip the write-back by always using non-temporal stores */
         len = copy_from_iter_nocache(addr, bytes, i);
  
-       if (__iter_needs_pmem_wb(i))
+       /*
+        * In the iovec case on x86_64 copy_from_iter_nocache() uses
+        * non-temporal stores for the bulk of the transfer, but we need
+        * to manually flush if the transfer is unaligned. A cached
+        * memory copy is used when destination or size is not naturally
+        * aligned. That is:
+        *   - Require 8-byte alignment when size is 8 bytes or larger.
+        *   - Require 4-byte alignment when size is 4 bytes.
+        *
+        * In the non-iovec case the entire destination needs to be
+        * flushed.
+        */
+       if (iter_is_iovec(i)) {
+               unsigned long flushed, dest = (unsigned long) addr;
+
+               if (bytes < 8) {
+                       if (!IS_ALIGNED(dest, 4) || (bytes != 4))
+                               arch_wb_cache_pmem(addr, 1);
+               } else {
+                       if (!IS_ALIGNED(dest, 8)) {
+                               dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
+                               arch_wb_cache_pmem(addr, 1);
+                       }
+
+                       flushed = dest - (unsigned long) addr;
+                       if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
+                               arch_wb_cache_pmem(addr + bytes - 1, 1);
+               }
+       } else
                 arch_wb_cache_pmem(addr, bytes);
  
         return len;
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h

new file mode 100644 (file)

index 0000000..d7da272
--- /dev/null
+++ b/arch/x86/include/asm/purgatory.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_PURGATORY_H
+#define _ASM_X86_PURGATORY_H
+
+#ifndef __ASSEMBLY__
+#include <linux/purgatory.h>
+
+extern void purgatory(void);
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ *    lookup in kexec works
+ */
+extern unsigned long purgatory_backup_dest;
+extern unsigned long purgatory_backup_src;
+extern unsigned long purgatory_backup_sz;
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h

index a04eabd43d06621dfadb38d01869f2f22adce783..27e9f9d769b892ef27fa3cf13cb95a7c9563b559 100644 (file)
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void);
  
  extern int no_timer_check;
  
+extern bool using_native_sched_clock(void);
+
  /*
   * We use the full linear equation: f(x) = a + b*x, in order to allow
   * a continuous function in the face of dynamic freq changes.
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h

index 6fa85944af83d8ddbbad3a344a31a7920e64e6d0..fc5abff9b7fd63d6b3a01a18061be8b3f752d109 100644 (file)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -188,7 +188,7 @@ static inline void __native_flush_tlb_single(unsigned long addr)
  
  static inline void __flush_tlb_all(void)
  {
-       if (static_cpu_has(X86_FEATURE_PGE))
+       if (boot_cpu_has(X86_FEATURE_PGE))
                 __flush_tlb_global();
         else
                 __flush_tlb();
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h

index 72e8300b1e8a6a96eef10a918abb5b3b020014aa..9cffb44a3cf5dfedb122c7b31c2f690177e68604 100644 (file)
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -485,15 +485,17 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
  
         if (paddr < uv_hub_info->lowmem_remap_top)
                 paddr |= uv_hub_info->lowmem_remap_base;
-       paddr |= uv_hub_info->gnode_upper;
-       if (m_val)
+
+       if (m_val) {
+               paddr |= uv_hub_info->gnode_upper;
                 paddr = ((paddr << uv_hub_info->m_shift)
                                                 >> uv_hub_info->m_shift) |
                         ((paddr >> uv_hub_info->m_val)
                                                 << uv_hub_info->n_lshift);
-       else
+       } else {
                 paddr |= uv_soc_phys_ram_to_nasid(paddr)
                                                 << uv_hub_info->gpa_shift;
+       }
         return paddr;
  }
  
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h

index 5138dacf8bb8360511f7b3514f8ab0ac1a1e88f8..07244ea16765a6bc77e1107233c65fc7286f0e8b 100644 (file)
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -58,7 +58,7 @@ struct setup_header {
         __u32   header;
         __u16   version;
         __u32   realmode_swtch;
-       __u16   start_sys;
+       __u16   start_sys_seg;
         __u16   kernel_version;
         __u8    type_of_loader;
         __u8    loadflags;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c

index ae32838cac5fd2251e1ffa0bbb8b8c629e399a84..b2879cc23db470ec8cc2cbeacdea4ff2b94ec1e3 100644 (file)
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -179,10 +179,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
                 return -EINVAL;
         }
  
+       if (!enabled) {
+               ++disabled_cpus;
+               return -EINVAL;
+       }
+
         if (boot_cpu_physical_apicid != -1U)
                 ver = boot_cpu_apic_version;
  
-       cpu = __generic_processor_info(id, ver, enabled);
+       cpu = generic_processor_info(id, ver);
         if (cpu >= 0)
                 early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
  
@@ -710,7 +715,7 @@ static void __init acpi_set_irq_model_ioapic(void)
  #ifdef CONFIG_ACPI_HOTPLUG_CPU
  #include <acpi/processor.h>
  
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
  {
  #ifdef CONFIG_ACPI_NUMA
         int nid;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c

index 4261b3282ad99dd87799683e33b2945bcfb20746..8ccb7ef512e05dd9edaa6a3d7a852f70639a54d2 100644 (file)
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { }
  static inline void __x2apic_enable(void) { }
  #endif /* !CONFIG_X86_X2APIC */
  
-static int __init try_to_enable_IR(void)
-{
-#ifdef CONFIG_X86_IO_APIC
-       if (!x2apic_enabled() && skip_ioapic_setup) {
-               pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
-               return -1;
-       }
-#endif
-       return irq_remapping_enable();
-}
-
  void __init enable_IR_x2apic(void)
  {
         unsigned long flags;
         int ret, ir_stat;
  
-       if (skip_ioapic_setup)
+       if (skip_ioapic_setup) {
+               pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
                 return;
+       }
  
         ir_stat = irq_remapping_prepare();
         if (ir_stat < 0 && !x2apic_supported())
@@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void)
  
         /* If irq_remapping_prepare() succeeded, try to enable it */
         if (ir_stat >= 0)
-               ir_stat = try_to_enable_IR();
+               ir_stat = irq_remapping_enable();
         /* ir_stat contains the remap mode or an error code */
         try_to_enable_x2apic(ir_stat);
  
@@ -2062,17 +2053,17 @@ static int allocate_logical_cpuid(int apicid)
  
         /* Allocate a new cpuid. */
         if (nr_logical_cpuids >= nr_cpu_ids) {
-               WARN_ONCE(1, "Only %d processors supported."
+               WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. "
                              "Processor %d/0x%x and the rest are ignored.\n",
-                            nr_cpu_ids - 1, nr_logical_cpuids, apicid);
-               return -1;
+                            nr_cpu_ids, nr_logical_cpuids, apicid);
+               return -EINVAL;
         }
  
         cpuid_to_apicid[nr_logical_cpuids] = apicid;
         return nr_logical_cpuids++;
  }
  
-int __generic_processor_info(int apicid, int version, bool enabled)
+int generic_processor_info(int apicid, int version)
  {
         int cpu, max = nr_cpu_ids;
         bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2130,11 +2121,9 @@ int __generic_processor_info(int apicid, int version, bool enabled)
         if (num_processors >= nr_cpu_ids) {
                 int thiscpu = max + disabled_cpus;
  
-               if (enabled) {
-                       pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
-                                  "reached. Processor %d/0x%x ignored.\n",
-                                  max, thiscpu, apicid);
-               }
+               pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
+                          "reached. Processor %d/0x%x ignored.\n",
+                          max, thiscpu, apicid);
  
                 disabled_cpus++;
                 return -EINVAL;
@@ -2186,23 +2175,13 @@ int __generic_processor_info(int apicid, int version, bool enabled)
                 apic->x86_32_early_logical_apicid(cpu);
  #endif
         set_cpu_possible(cpu, true);
-
-       if (enabled) {
-               num_processors++;
-               physid_set(apicid, phys_cpu_present_map);
-               set_cpu_present(cpu, true);
-       } else {
-               disabled_cpus++;
-       }
+       physid_set(apicid, phys_cpu_present_map);
+       set_cpu_present(cpu, true);
+       num_processors++;
  
         return cpu;
  }
  
-int generic_processor_info(int apicid, int version)
-{
-       return __generic_processor_info(apicid, version, true);
-}
-
  int hard_smp_processor_id(void)
  {
         return read_apic_id();
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c

index e9f8f8cdd57085db85dee8247b67150250308a42..86f20cc0a65e2240b2eb5a05fa77af3e866b40d6 100644 (file)
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1105,7 +1105,8 @@ void __init uv_init_hub_info(struct uv_hub_info_s *hi)
         node_id.v               = uv_read_local_mmr(UVH_NODE_ID);
         uv_cpuid.gnode_shift    = max_t(unsigned int, uv_cpuid.gnode_shift, mn.n_val);
         hi->gnode_extra         = (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1;
-       hi->gnode_upper         = (unsigned long)hi->gnode_extra << mn.m_val;
+       if (mn.m_val)
+               hi->gnode_upper = (u64)hi->gnode_extra << mn.m_val;
  
         if (uv_gp_table) {
                 hi->global_mmr_base     = uv_gp_table->mmr_base;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c

index 35a5d5dca2fae5fb68d522658824440f3b736d8b..c36140d788fe215aadb3a8f27a8de040f2c44c06 100644 (file)
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -556,10 +556,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
         if (c->x86_power & (1 << 8)) {
                 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
                 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
-               if (check_tsc_unstable())
-                       clear_sched_clock_stable();
-       } else {
-               clear_sched_clock_stable();
         }
  
         /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c

index adc0ebd8bed0e17be1716f3fb7c3eab51b7fa0c4..43955ee6715b1876b89ebd615b8eb171bbde1dd0 100644 (file)
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -105,8 +105,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
  #ifdef CONFIG_X86_64
         set_cpu_cap(c, X86_FEATURE_SYSENTER32);
  #endif
-
-       clear_sched_clock_stable();
  }
  
  static void init_centaur(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index b11b38c3b0bde194b9139ebfa0d3bc251b96f803..58094a1f9e9d301e11d2c93a1ecc126e1715002e 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -88,7 +88,6 @@ static void default_init(struct cpuinfo_x86 *c)
                         strcpy(c->x86_model_id, "386");
         }
  #endif
-       clear_sched_clock_stable();
  }
  
  static const struct cpu_dev default_cpu = {
@@ -1077,8 +1076,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
          */
         if (this_cpu->c_init)
                 this_cpu->c_init(c);
-       else
-               clear_sched_clock_stable();
  
         /* Disable the PN if appropriate */
         squash_the_stupid_serial_number(c);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c

index 0a3bc19de0177e93f81ae24c58264e7205406fd6..a70fd61095f8a73baa5eb7c486afd6ff19cd4fd1 100644 (file)
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -185,7 +185,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c)
                 set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
                 break;
         }
-       clear_sched_clock_stable();
  }
  
  static void init_cyrix(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c

index fe0a615a051b19a99f9388a7b245b326cee86e11..063197771b8d7ba08f2eafe474cacb0efe9e79d3 100644 (file)
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -162,10 +162,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
         if (c->x86_power & (1 << 8)) {
                 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
                 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
-               if (check_tsc_unstable())
-                       clear_sched_clock_stable();
-       } else {
-               clear_sched_clock_stable();
         }
  
         /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c

index 0bbe0f3a039f6412f55863195cbb3a36b84ceb22..9ac2a5cdd9c206e83f171847ac04d5bf4f2a3152 100644 (file)
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -28,7 +28,6 @@
  #include <linux/sched/signal.h>
  #include <linux/sched/task.h>
  #include <linux/slab.h>
-#include <linux/cpu.h>
  #include <linux/task_work.h>
  
  #include <uapi/linux/magic.h>
@@ -728,7 +727,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
         if (atomic_dec_and_test(&rdtgrp->waitcount) &&
             (rdtgrp->flags & RDT_DELETED)) {
                 kernfs_unbreak_active_protection(kn);
-               kernfs_put(kn);
+               kernfs_put(rdtgrp->kn);
                 kfree(rdtgrp);
         } else {
                 kernfs_unbreak_active_protection(kn);
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c

index f369cb8db0d5b4332125745dd5a587148f109597..badd2b31a5605f3df24dd250d1f08c9df022b7b2 100644 (file)
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -200,11 +200,11 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
         }
  
  out:
-       rdtgroup_kn_unlock(of->kn);
         for_each_enabled_rdt_resource(r) {
                 kfree(r->tmp_cbms);
                 r->tmp_cbms = NULL;
         }
+       rdtgroup_kn_unlock(of->kn);
         return ret ?: nbytes;
  }
  
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c

index 1e5a50c11d3c3546a59d286cbb84c402682cfb2b..217cd4449bc9db3aedfd6367529bc9ca99fb8443 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -85,7 +85,7 @@ void mce_gen_pool_process(struct work_struct *__unused)
         head = llist_reverse_order(head);
         llist_for_each_entry_safe(node, tmp, head, llnode) {
                 mce = &node->mce;
-               atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
+               blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
                 gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
         }
  }
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h

index 903043e6a62b36a2c395c7aab52da6f85e34fc11..19592ba1a320030a4cbdc59aa8194f14bcefae69 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -13,7 +13,7 @@ enum severity_level {
         MCE_PANIC_SEVERITY,
  };
  
-extern struct atomic_notifier_head x86_mce_decoder_chain;
+extern struct blocking_notifier_head x86_mce_decoder_chain;
  
  #define ATTR_LEN               16
  #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c

index 8e9725c607ea6acb7a91deed9b72b2c9a873803e..af44ebeb593fb60f70bb497de3e8f09b4b988333 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -54,6 +54,8 @@
  
  static DEFINE_MUTEX(mce_chrdev_read_mutex);
  
+static int mce_chrdev_open_count;      /* #times opened */
+
  #define mce_log_get_idx_check(p) \
  ({ \
         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
@@ -121,7 +123,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
   * CPU/chipset specific EDAC code can register a notifier call here to print
   * MCE errors in a human-readable form.
   */
-ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
  
  /* Do initial initialization of a struct mce */
  void mce_setup(struct mce *m)
@@ -218,7 +220,7 @@ void mce_register_decode_chain(struct notifier_block *nb)
  
         WARN_ON(nb->priority > MCE_PRIO_LOWEST && nb->priority < MCE_PRIO_EDAC);
  
-       atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
+       blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
  }
  EXPORT_SYMBOL_GPL(mce_register_decode_chain);
  
@@ -226,7 +228,7 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
  {
         atomic_dec(&num_notifiers);
  
-       atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
+       blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
  }
  EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
  
@@ -319,18 +321,7 @@ static void __print_mce(struct mce *m)
  
  static void print_mce(struct mce *m)
  {
-       int ret = 0;
-
         __print_mce(m);
-
-       /*
-        * Print out human-readable details about the MCE error,
-        * (if the CPU has an implementation for that)
-        */
-       ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
-       if (ret == NOTIFY_STOP)
-               return;
-
         pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
  }
  
@@ -598,6 +589,10 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
         if (atomic_read(&num_notifiers) > 2)
                 return NOTIFY_DONE;
  
+       /* Don't print when mcelog is running */
+       if (mce_chrdev_open_count > 0)
+               return NOTIFY_DONE;
+
         __print_mce(m);
  
         return NOTIFY_DONE;
@@ -1828,7 +1823,6 @@ void mcheck_cpu_clear(struct cpuinfo_x86 *c)
   */
  
  static DEFINE_SPINLOCK(mce_chrdev_state_lock);
-static int mce_chrdev_open_count;      /* #times opened */
  static int mce_chrdev_open_exclu;      /* already open exclusive? */
  
  static int mce_chrdev_open(struct inode *inode, struct file *file)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c

index 524cc5780a779630d3203d834b0a508097340c67..6e4a047e4b684b0feeeeba851b850c9baebb552f 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -60,7 +60,7 @@ static const char * const th_names[] = {
         "load_store",
         "insn_fetch",
         "combined_unit",
-       "",
+       "decode_unit",
         "northbridge",
         "execution_unit",
  };
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c

index 8457b49786686f74a737429ec47d04c2f92bab35..d77d07ab310b4317d33e44de10896297bb2a6654 100644 (file)
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -16,8 +16,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c)
                 if (xlvl >= 0x80860001)
                         c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001);
         }
-
-       clear_sched_clock_stable();
  }
  
  static void init_transmeta(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c

index 891f4dad7b2c49c81518e15ecee61bc8d4694ff2..22403a28caf52226163ae96779c3f2a79fc3431d 100644 (file)
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -30,7 +30,6 @@
  #include <asm/hypervisor.h>
  #include <asm/timer.h>
  #include <asm/apic.h>
-#include <asm/timer.h>
  
  #undef pr_fmt
  #define pr_fmt(fmt)    "vmware: " fmt
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c

index 8639bb2ae05868ab65d88e44683f44c8651121f3..5b71535407279f3c4d70ce628f9bf5dc8561abea 100644 (file)
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -535,7 +535,7 @@ static void run_sync(void)
  {
         int enable_irqs = irqs_disabled();
  
-       /* We may be called with interrupts disbled (on bootup). */
+       /* We may be called with interrupts disabled (on bootup). */
         if (enable_irqs)
                 local_irq_enable();
         on_each_cpu(do_sync_core, NULL, 1);
@@ -983,6 +983,18 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
         unsigned long return_hooker = (unsigned long)
                                 &return_to_handler;
  
+       /*
+        * When resuming from suspend-to-ram, this function can be indirectly
+        * called from early CPU startup code while the CPU is in real mode,
+        * which would fail miserably.  Make sure the stack pointer is a
+        * virtual address.
+        *
+        * This check isn't as accurate as virt_addr_valid(), but it should be
+        * good enough for this purpose, and it's fast.
+        */
+       if (unlikely((long)__builtin_frame_address(0) >= 0))
+               return;
+
         if (unlikely(ftrace_graph_is_dead()))
                 return;
  
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c

index 54a2372f5dbb1eb0598788e944ad28708b638671..b5785c197e534796d5e477b6cd86a502d229db7c 100644 (file)
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -4,6 +4,7 @@
   *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
   */
  
+#define DISABLE_BRANCH_PROFILING
  #include <linux/init.h>
  #include <linux/linkage.h>
  #include <linux/types.h>
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c

index dc6ba5bda9fc83630c773a80c4adea6871db0a59..89ff7af2de508ba0c34c2ce24c227dea5bf6d973 100644 (file)
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
  
                 irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
                 irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
-               disable_irq(hdev->irq);
+               disable_hardirq(hdev->irq);
                 irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
                 enable_irq(hdev->irq);
         }
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c

index bdb83e431d8976086e3c17328f01a9bdac2c1c8b..38b64587b31be5611a763df6dafe8434db2a66b5 100644 (file)
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void)
         struct dentry *dbp, *version, *data;
         int error = -ENOMEM;
  
-       dbp = debugfs_create_dir("boot_params", NULL);
+       dbp = debugfs_create_dir("boot_params", arch_debugfs_dir);
         if (!dbp)
                 return -ENOMEM;
  
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h

index c6ee63f927ab721dd542b016bcfb22d65a55f114..d688826e5736a18c9f9343ebe278ec2b04bff66d 100644 (file)
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -67,7 +67,7 @@
  #endif
  
  /* Ensure if the instruction can be boostable */
-extern int can_boost(kprobe_opcode_t *instruction);
+extern int can_boost(kprobe_opcode_t *instruction, void *addr);
  /* Recover instruction if given address is probed */
  extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
                                          unsigned long addr);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c

index 6384eb754a58302a18406c3a9587034bbff3c2c3..993fa4fe4f68694a3fa75406b2e762cfadbbf745 100644 (file)
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -167,12 +167,12 @@ NOKPROBE_SYMBOL(skip_prefixes);
   * Returns non-zero if opcode is boostable.
   * RIP relative instructions are adjusted at copying time in 64 bits mode
   */
-int can_boost(kprobe_opcode_t *opcodes)
+int can_boost(kprobe_opcode_t *opcodes, void *addr)
  {
         kprobe_opcode_t opcode;
         kprobe_opcode_t *orig_opcodes = opcodes;
  
-       if (search_exception_tables((unsigned long)opcodes))
+       if (search_exception_tables((unsigned long)addr))
                 return 0;       /* Page fault may occur on this address. */
  
  retry:
@@ -417,7 +417,7 @@ static int arch_copy_kprobe(struct kprobe *p)
          * __copy_instruction can modify the displacement of the instruction,
          * but it doesn't affect boostable check.
          */
-       if (can_boost(p->ainsn.insn))
+       if (can_boost(p->ainsn.insn, p->addr))
                 p->ainsn.boostable = 0;
         else
                 p->ainsn.boostable = -1;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c

index 3d1bee9d6a728fd50645d0179986cd1dda6629d0..3e7c6e5a08ffde197c192ab57f3bb38aac590969 100644 (file)
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src)
  
         while (len < RELATIVEJUMP_SIZE) {
                 ret = __copy_instruction(dest + len, src + len);
-               if (!ret || !can_boost(dest + len))
+               if (!ret || !can_boost(dest + len, src + len))
                         return -EINVAL;
                 len += ret;
         }
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c

index 307b1f4543de4bc96c6759c5f81a7faf5c9f443c..857cdbd028675716afad71c0b48974399889e622 100644 (file)
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -194,19 +194,22 @@ static int arch_update_purgatory(struct kimage *image)
  
         /* Setup copying of backup region */
         if (image->type == KEXEC_TYPE_CRASH) {
-               ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
+               ret = kexec_purgatory_get_set_symbol(image,
+                               "purgatory_backup_dest",
                                 &image->arch.backup_load_addr,
                                 sizeof(image->arch.backup_load_addr), 0);
                 if (ret)
                         return ret;
  
-               ret = kexec_purgatory_get_set_symbol(image, "backup_src",
+               ret = kexec_purgatory_get_set_symbol(image,
+                               "purgatory_backup_src",
                                 &image->arch.backup_src_start,
                                 sizeof(image->arch.backup_src_start), 0);
                 if (ret)
                         return ret;
  
-               ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
+               ret = kexec_purgatory_get_set_symbol(image,
+                               "purgatory_backup_sz",
                                 &image->arch.backup_src_sz,
                                 sizeof(image->arch.backup_src_sz), 0);
                 if (ret)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c

index f088ea4c66e72e5787e6c2052b09bc95291cf131..446c8aa09b9bc1935627579926c0e9b22f7aa12f 100644 (file)
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -166,11 +166,9 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
         spin_lock_irqsave(&desc->lock, flags);
  
         /*
-        * most handlers of type NMI_UNKNOWN never return because
-        * they just assume the NMI is theirs.  Just a sanity check
-        * to manage expectations
+        * Indicate if there are multiple registrations on the
+        * internal NMI handler call chains (SERR and IO_CHECK).
          */
-       WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
         WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
         WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
  
@@ -224,17 +222,6 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
         pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
                  reason, smp_processor_id());
  
-       /*
-        * On some machines, PCI SERR line is used to report memory
-        * errors. EDAC makes use of it.
-        */
-#if defined(CONFIG_EDAC)
-       if (edac_handler_set()) {
-               edac_atomic_assert_error();
-               return;
-       }
-#endif
-
         if (panic_on_unrecovered_nmi)
                 nmi_panic(regs, "NMI: Not continuing");
  
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c

index e244c19a2451aa7d4dfa8bf34b1834e48f68ec54..067f9813fd2cf7c15d5a1d297b537eedf6ca7959 100644 (file)
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -223,6 +223,22 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
                         DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
                 },
         },
+       {       /* Handle problems with rebooting on ASUS EeeBook X205TA */
+               .callback = set_acpi_reboot,
+               .ident = "ASUS EeeBook X205TA",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"),
+               },
+       },
+       {       /* Handle problems with rebooting on ASUS EeeBook X205TAW */
+               .callback = set_acpi_reboot,
+               .ident = "ASUS EeeBook X205TAW",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+               },
+       },
  
         /* Certec */
         {       /* Handle problems with rebooting on Certec BPC600 */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c

index 396c042e9d0ee58873de5c68e6e7e22186070147..cc30a74e4adb2c3499b52488c49498dea1851ae7 100644 (file)
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -846,7 +846,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
                        task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
                        me->comm, me->pid, where, frame,
                        regs->ip, regs->sp, regs->orig_ax);
-               print_vma_addr(" in ", regs->ip);
+               print_vma_addr(KERN_CONT " in ", regs->ip);
                 pr_cont("\n");
         }
  
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c

index ec1f756f9dc9ace1badccd544b6032d64e520360..71beb28600d4531d93a9966401a1ca082a9f0c2e 100644 (file)
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -151,8 +151,8 @@ int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from,
  
                                 if (from->si_signo == SIGSEGV) {
                                         if (from->si_code == SEGV_BNDERR) {
-                                               compat_uptr_t lower = (unsigned long)&to->si_lower;
-                                               compat_uptr_t upper = (unsigned long)&to->si_upper;
+                                               compat_uptr_t lower = (unsigned long)from->si_lower;
+                                               compat_uptr_t upper = (unsigned long)from->si_upper;
                                                 put_user_ex(lower, &to->si_lower);
                                                 put_user_ex(upper, &to->si_upper);
                                         }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c

index 948443e115c147c28a6445eca725fad698d1a56f..4e496379a871687281ddc0c69c0e10d7ec036f09 100644 (file)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -255,7 +255,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
                 pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
                         tsk->comm, tsk->pid, str,
                         regs->ip, regs->sp, error_code);
-               print_vma_addr(" in ", regs->ip);
+               print_vma_addr(KERN_CONT " in ", regs->ip);
                 pr_cont("\n");
         }
  
@@ -519,7 +519,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
                 pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx",
                         tsk->comm, task_pid_nr(tsk),
                         regs->ip, regs->sp, error_code);
-               print_vma_addr(" in ", regs->ip);
+               print_vma_addr(KERN_CONT " in ", regs->ip);
                 pr_cont("\n");
         }
  
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c

index 46bcda4cb1c2f84762bc409bf4c89b1528ba1c52..714dfba6a1e713fb6b5f4268c318f913bb266628 100644 (file)
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -327,9 +327,16 @@ unsigned long long sched_clock(void)
  {
         return paravirt_sched_clock();
  }
+
+bool using_native_sched_clock(void)
+{
+       return pv_time_ops.sched_clock == native_sched_clock;
+}
  #else
  unsigned long long
  sched_clock(void) __attribute__((alias("native_sched_clock")));
+
+bool using_native_sched_clock(void) { return true; }
  #endif
  
  int check_tsc_unstable(void)
@@ -1112,8 +1119,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
  {
         if (tsc_unstable)
                 return;
+
         tsc_unstable = 1;
-       clear_sched_clock_stable();
+       if (using_native_sched_clock())
+               clear_sched_clock_stable();
         disable_sched_clock_irqtime();
         pr_info("Marking TSC unstable due to clocksource watchdog\n");
  }
@@ -1135,18 +1144,20 @@ static struct clocksource clocksource_tsc = {
  
  void mark_tsc_unstable(char *reason)
  {
-       if (!tsc_unstable) {
-               tsc_unstable = 1;
+       if (tsc_unstable)
+               return;
+
+       tsc_unstable = 1;
+       if (using_native_sched_clock())
                 clear_sched_clock_stable();
-               disable_sched_clock_irqtime();
-               pr_info("Marking TSC unstable due to %s\n", reason);
-               /* Change only the rating, when not registered */
-               if (clocksource_tsc.mult)
-                       clocksource_mark_unstable(&clocksource_tsc);
-               else {
-                       clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
-                       clocksource_tsc.rating = 0;
-               }
+       disable_sched_clock_irqtime();
+       pr_info("Marking TSC unstable due to %s\n", reason);
+       /* Change only the rating, when not registered */
+       if (clocksource_tsc.mult) {
+               clocksource_mark_unstable(&clocksource_tsc);
+       } else {
+               clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
+               clocksource_tsc.rating = 0;
         }
  }
  
@@ -1322,6 +1333,8 @@ static int __init init_tsc_clocksource(void)
          * the refined calibration and directly register it as a clocksource.
          */
         if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
+               if (boot_cpu_has(X86_FEATURE_ART))
+                       art_related_clocksource = &clocksource_tsc;
                 clocksource_register_khz(&clocksource_tsc, tsc_khz);
                 return 0;
         }
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c

index 478d15dbaee41b251c8bb28b59183e2b6c733326..08339262b666e56f2623406a10c42f3184c83e29 100644 (file)
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -82,19 +82,43 @@ static size_t regs_size(struct pt_regs *regs)
         return sizeof(*regs);
  }
  
+#ifdef CONFIG_X86_32
+#define GCC_REALIGN_WORDS 3
+#else
+#define GCC_REALIGN_WORDS 1
+#endif
+
  static bool is_last_task_frame(struct unwind_state *state)
  {
-       unsigned long bp = (unsigned long)state->bp;
-       unsigned long regs = (unsigned long)task_pt_regs(state->task);
+       unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2;
+       unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS;
  
         /*
          * We have to check for the last task frame at two different locations
          * because gcc can occasionally decide to realign the stack pointer and
-        * change the offset of the stack frame by a word in the prologue of a
-        * function called by head/entry code.
+        * change the offset of the stack frame in the prologue of a function
+        * called by head/entry code.  Examples:
+        *
+        * <start_secondary>:
+        *      push   %edi
+        *      lea    0x8(%esp),%edi
+        *      and    $0xfffffff8,%esp
+        *      pushl  -0x4(%edi)
+        *      push   %ebp
+        *      mov    %esp,%ebp
+        *
+        * <x86_64_start_kernel>:
+        *      lea    0x8(%rsp),%r10
+        *      and    $0xfffffffffffffff0,%rsp
+        *      pushq  -0x8(%r10)
+        *      push   %rbp
+        *      mov    %rsp,%rbp
+        *
+        * Note that after aligning the stack, it pushes a duplicate copy of
+        * the return address before pushing the frame pointer.
          */
-       return bp == regs - FRAME_HEADER_SIZE ||
-              bp == regs - FRAME_HEADER_SIZE - sizeof(long);
+       return (state->bp == last_bp ||
+               (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1)));
  }
  
  /*
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c

index 73ea24d4f119c8dce2a0d3e5bfc24ef3d7562d3a..047b17a26269610b9cc083899cafaa6ca236eb5b 100644 (file)
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -657,6 +657,9 @@ void kvm_pic_destroy(struct kvm *kvm)
  {
         struct kvm_pic *vpic = kvm->arch.vpic;
  
+       if (!vpic)
+               return;
+
         kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
         kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
         kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c

index 6e219e5c07d27c5dc41786953b1114b1e475e346..289270a6aecbb478ea14cc786c72fcfdf5058350 100644 (file)
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -635,6 +635,9 @@ void kvm_ioapic_destroy(struct kvm *kvm)
  {
         struct kvm_ioapic *ioapic = kvm->arch.vioapic;
  
+       if (!ioapic)
+               return;
+
         cancel_delayed_work_sync(&ioapic->eoi_inject);
         kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
         kvm->arch.vioapic = NULL;
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c

index 37942e419c32e599a4ba05d3b75a77680f0065d9..60168cdd05463e2e18c993e20dfdeed7986808ce 100644 (file)
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -160,6 +160,14 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
         return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]);
  }
  
+void kvm_page_track_cleanup(struct kvm *kvm)
+{
+       struct kvm_page_track_notifier_head *head;
+
+       head = &kvm->arch.track_notifier_head;
+       cleanup_srcu_struct(&head->track_srcu);
+}
+
  void kvm_page_track_init(struct kvm *kvm)
  {
         struct kvm_page_track_notifier_head *head;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c

index d1efe2c62b3f8d0db7392970cdfd8e018dd3ac06..5fba70646c327941d231ab6dfa459e8a8536a115 100644 (file)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1379,6 +1379,9 @@ static void avic_vm_destroy(struct kvm *kvm)
         unsigned long flags;
         struct kvm_arch *vm_data = &kvm->arch;
  
+       if (!avic)
+               return;
+
         avic_free_vm_id(vm_data->avic_vm_id);
  
         if (vm_data->avic_logical_id_table_page)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 283aa8601833509b9cf792b919dd3f243a78f389..259e9b28ccf8e7ff2434d6911ffcfbfc4ef89f42 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1239,6 +1239,11 @@ static inline bool cpu_has_vmx_invvpid_global(void)
         return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
  }
  
+static inline bool cpu_has_vmx_invvpid(void)
+{
+       return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
+}
+
  static inline bool cpu_has_vmx_ept(void)
  {
         return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -2753,7 +2758,6 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                 SECONDARY_EXEC_RDTSCP |
                 SECONDARY_EXEC_DESC |
                 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
-               SECONDARY_EXEC_ENABLE_VPID |
                 SECONDARY_EXEC_APIC_REGISTER_VIRT |
                 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                 SECONDARY_EXEC_WBINVD_EXITING |
@@ -2781,10 +2785,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
          * though it is treated as global context.  The alternative is
          * not failing the single-context invvpid, and it is worse.
          */
-       if (enable_vpid)
+       if (enable_vpid) {
+               vmx->nested.nested_vmx_secondary_ctls_high |=
+                       SECONDARY_EXEC_ENABLE_VPID;
                 vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
                         VMX_VPID_EXTENT_SUPPORTED_MASK;
-       else
+       } else
                 vmx->nested.nested_vmx_vpid_caps = 0;
  
         if (enable_unrestricted_guest)
@@ -4024,6 +4030,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
         __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
  }
  
+static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
+{
+       if (enable_ept)
+               vmx_flush_tlb(vcpu);
+}
+
  static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
  {
         ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -6517,8 +6529,10 @@ static __init int hardware_setup(void)
         if (boot_cpu_has(X86_FEATURE_NX))
                 kvm_enable_efer_bits(EFER_NX);
  
-       if (!cpu_has_vmx_vpid())
+       if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
+               !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
                 enable_vpid = 0;
+
         if (!cpu_has_vmx_shadow_vmcs())
                 enable_shadow_vmcs = 0;
         if (enable_shadow_vmcs)
@@ -7258,9 +7272,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
  static int handle_vmclear(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
+       u32 zero = 0;
         gpa_t vmptr;
-       struct vmcs12 *vmcs12;
-       struct page *page;
  
         if (!nested_vmx_check_permission(vcpu))
                 return 1;
@@ -7271,22 +7284,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
         if (vmptr == vmx->nested.current_vmptr)
                 nested_release_vmcs12(vmx);
  
-       page = nested_get_page(vcpu, vmptr);
-       if (page == NULL) {
-               /*
-                * For accurate processor emulation, VMCLEAR beyond available
-                * physical memory should do nothing at all. However, it is
-                * possible that a nested vmx bug, not a guest hypervisor bug,
-                * resulted in this case, so let's shut down before doing any
-                * more damage:
-                */
-               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-               return 1;
-       }
-       vmcs12 = kmap(page);
-       vmcs12->launch_state = 0;
-       kunmap(page);
-       nested_release_page(page);
+       kvm_vcpu_write_guest(vcpu,
+                       vmptr + offsetof(struct vmcs12, launch_state),
+                       &zero, sizeof(zero));
  
         nested_free_vmcs02(vmx, vmptr);
  
@@ -8198,6 +8198,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
         case EXIT_REASON_PREEMPTION_TIMER:
                 return false;
+       case EXIT_REASON_PML_FULL:
+               /* We don't expose PML support to L1. */
+               return false;
         default:
                 return true;
         }
@@ -8515,7 +8518,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
             && kvm_vmx_exit_handlers[exit_reason])
                 return kvm_vmx_exit_handlers[exit_reason](vcpu);
         else {
-               WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+               vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
+                               exit_reason);
                 kvm_queue_exception(vcpu, UD_VECTOR);
                 return 1;
         }
@@ -8561,6 +8565,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
         } else {
                 sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
                 sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+               vmx_flush_tlb_ept_only(vcpu);
         }
         vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
  
@@ -8586,8 +8591,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
          */
         if (!is_guest_mode(vcpu) ||
             !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
-                            SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+                            SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
                 vmcs_write64(APIC_ACCESS_ADDR, hpa);
+               vmx_flush_tlb_ept_only(vcpu);
+       }
  }
  
  static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
@@ -9694,10 +9701,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
                 return false;
  
         page = nested_get_page(vcpu, vmcs12->msr_bitmap);
-       if (!page) {
-               WARN_ON(1);
+       if (!page)
                 return false;
-       }
         msr_bitmap_l1 = (unsigned long *)kmap(page);
  
         memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
@@ -9990,7 +9995,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         u32 exec_control;
-       bool nested_ept_enabled = false;
  
         vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
         vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10137,8 +10141,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                                 vmcs12->guest_intr_status);
                 }
  
-               nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0;
-
                 /*
                  * Write an illegal value to APIC_ACCESS_ADDR. Later,
                  * nested_get_vmcs12_pages will either fix it up or
@@ -10268,9 +10270,24 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
  
         }
  
+       if (enable_pml) {
+               /*
+                * Conceptually we want to copy the PML address and index from
+                * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
+                * since we always flush the log on each vmexit, this happens
+                * to be equivalent to simply resetting the fields in vmcs02.
+                */
+               ASSERT(vmx->pml_pg);
+               vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+               vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+       }
+
         if (nested_cpu_has_ept(vmcs12)) {
                 kvm_mmu_unload(vcpu);
                 nested_ept_init_mmu_context(vcpu);
+       } else if (nested_cpu_has2(vmcs12,
+                                  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+               vmx_flush_tlb_ept_only(vcpu);
         }
  
         /*
@@ -10298,12 +10315,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
         vmx_set_efer(vcpu, vcpu->arch.efer);
  
         /* Shadow page tables on either EPT or shadow page tables. */
-       if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled,
+       if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
                                 entry_failure_code))
                 return 1;
  
-       kvm_mmu_reset_context(vcpu);
-
         if (!enable_ept)
                 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
  
@@ -11072,6 +11087,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
                 vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
                 vmx_set_virtual_x2apic_mode(vcpu,
                                 vcpu->arch.apic_base & X2APIC_ENABLE);
+       } else if (!nested_cpu_has_ept(vmcs12) &&
+                  nested_cpu_has2(vmcs12,
+                                  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+               vmx_flush_tlb_ept_only(vcpu);
         }
  
         /* This is needed for same reason as it was needed in prepare_vmcs02 */
@@ -11121,8 +11140,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
   */
  static void vmx_leave_nested(struct kvm_vcpu *vcpu)
  {
-       if (is_guest_mode(vcpu))
+       if (is_guest_mode(vcpu)) {
+               to_vmx(vcpu)->nested.nested_run_pending = 0;
                 nested_vmx_vmexit(vcpu, -1, 0, 0);
+       }
         free_nested(to_vmx(vcpu));
  }
  
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 1faf620a6fdc206705a03357d3a8ec5814b2c790..ccbd45ecd41a3fa5c850cf924cccbb2723aecc92 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8153,11 +8153,12 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
         if (kvm_x86_ops->vm_destroy)
                 kvm_x86_ops->vm_destroy(kvm);
         kvm_iommu_unmap_guest(kvm);
-       kfree(kvm->arch.vpic);
-       kfree(kvm->arch.vioapic);
+       kvm_pic_destroy(kvm);
+       kvm_ioapic_destroy(kvm);
         kvm_free_vcpus(kvm);
         kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
         kvm_mmu_uninit_vm(kvm);
+       kvm_page_track_cleanup(kvm);
  }
  
  void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
@@ -8566,11 +8567,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
  {
         struct x86_exception fault;
  
-       trace_kvm_async_pf_ready(work->arch.token, work->gva);
         if (work->wakeup_all)
                 work->arch.token = ~0; /* broadcast wakeup */
         else
                 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
+       trace_kvm_async_pf_ready(work->arch.token, work->gva);
  
         if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
             !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c

index a8e91ae89fb3048c78aa4a279565070bb1958902..29df077cb0899be78cf604a3e21eb7262a729159 100644 (file)
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -93,6 +93,13 @@ static void delay_mwaitx(unsigned long __loops)
  {
         u64 start, end, delay, loops = __loops;
  
+       /*
+        * Timer value of 0 causes MWAITX to wait indefinitely, unless there
+        * is a store on the memory monitored by MONITORX.
+        */
+       if (loops == 0)
+               return;
+
         start = rdtsc_ordered();
  
         for (;;) {
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S

index 779782f5832476582becc24e5a0f0f5b10ea0b53..9a53a06e5a3efcb62f9563a6161fd98bbc22d617 100644 (file)
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
         _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
         _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
         _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
-       _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+       _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
         _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
         _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
         _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c

index 99c7805a96937c17fffa7b92eb72a8b8c776ccbb..1f3b6ef105cda5732146fa6121c35f75ada9c0f5 100644 (file)
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -106,32 +106,35 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
                 unsigned long end, int write, struct page **pages, int *nr)
  {
         struct dev_pagemap *pgmap = NULL;
-       int nr_start = *nr;
-       pte_t *ptep;
+       int nr_start = *nr, ret = 0;
+       pte_t *ptep, *ptem;
  
-       ptep = pte_offset_map(&pmd, addr);
+       /*
+        * Keep the original mapped PTE value (ptem) around since we
+        * might increment ptep off the end of the page when finishing
+        * our loop iteration.
+        */
+       ptem = ptep = pte_offset_map(&pmd, addr);
         do {
                 pte_t pte = gup_get_pte(ptep);
                 struct page *page;
  
                 /* Similar to the PMD case, NUMA hinting must take slow path */
-               if (pte_protnone(pte)) {
-                       pte_unmap(ptep);
-                       return 0;
-               }
+               if (pte_protnone(pte))
+                       break;
+
+               if (!pte_allows_gup(pte_val(pte), write))
+                       break;
  
                 if (pte_devmap(pte)) {
                         pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
                         if (unlikely(!pgmap)) {
                                 undo_dev_pagemap(nr, nr_start, pages);
-                               pte_unmap(ptep);
-                               return 0;
+                               break;
                         }
-               } else if (!pte_allows_gup(pte_val(pte), write) ||
-                          pte_special(pte)) {
-                       pte_unmap(ptep);
-                       return 0;
-               }
+               } else if (pte_special(pte))
+                       break;
+
                 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
                 page = pte_page(pte);
                 get_page(page);
@@ -141,9 +144,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
                 (*nr)++;
  
         } while (ptep++, addr += PAGE_SIZE, addr != end);
-       pte_unmap(ptep - 1);
+       if (addr == end)
+               ret = 1;
+       pte_unmap(ptem);
  
-       return 1;
+       return ret;
  }
  
  static inline void get_head_page_multiple(struct page *page, int nr)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c

index 22af912d66d258f413414c7355ac82da2814b074..889e7619a0914d87ad49dbb5b960933e5dd316ad 100644 (file)
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -643,21 +643,40 @@ void __init init_mem_mapping(void)
   * devmem_is_allowed() checks to see if /dev/mem access to a certain address
   * is valid. The argument is a physical page number.
   *
- *
- * On x86, access has to be given to the first megabyte of ram because that area
- * contains BIOS code and data regions used by X and dosemu and similar apps.
- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
- * mmio resources as well as potential bios/acpi data regions.
+ * On x86, access has to be given to the first megabyte of RAM because that
+ * area traditionally contains BIOS code and data regions used by X, dosemu,
+ * and similar apps. Since they map the entire memory range, the whole range
+ * must be allowed (for mapping), but any areas that would otherwise be
+ * disallowed are flagged as being "zero filled" instead of rejected.
+ * Access has to be given to non-kernel-ram areas as well, these contain the
+ * PCI mmio resources as well as potential bios/acpi data regions.
   */
  int devmem_is_allowed(unsigned long pagenr)
  {
-       if (pagenr < 256)
-               return 1;
-       if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+       if (page_is_ram(pagenr)) {
+               /*
+                * For disallowed memory regions in the low 1MB range,
+                * request that the page be shown as all zeros.
+                */
+               if (pagenr < 256)
+                       return 2;
+
+               return 0;
+       }
+
+       /*
+        * This must follow RAM test, since System RAM is considered a
+        * restricted resource under CONFIG_STRICT_IOMEM.
+        */
+       if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) {
+               /* Low 1MB bypasses iomem restrictions. */
+               if (pagenr < 256)
+                       return 1;
+
                 return 0;
-       if (!page_is_ram(pagenr))
-               return 1;
-       return 0;
+       }
+
+       return 1;
  }
  
  void free_init_pages(char *what, unsigned long begin, unsigned long end)
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c

index 8d63d7a104c3c445805dcf24a59fff2756a17b01..4c90cfdc128b832c6065cdb8830f89d16bff63dd 100644 (file)
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -1,3 +1,4 @@
+#define DISABLE_BRANCH_PROFILING
  #define pr_fmt(fmt) "kasan: " fmt
  #include <linux/bootmem.h>
  #include <linux/kasan.h>
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c

index 887e57182716828b7f4f4946fe7145d106ec5bea..aed206475aa7c04892443646efa6e88a9e5f4d24 100644 (file)
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
  #if defined(CONFIG_X86_ESPFIX64)
  static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
  #elif defined(CONFIG_EFI)
-static const unsigned long vaddr_end = EFI_VA_START;
+static const unsigned long vaddr_end = EFI_VA_END;
  #else
  static const unsigned long vaddr_end = __START_KERNEL_map;
  #endif
@@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void)
          */
         BUILD_BUG_ON(vaddr_start >= vaddr_end);
         BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
-                    vaddr_end >= EFI_VA_START);
+                    vaddr_end >= EFI_VA_END);
         BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
                       IS_ENABLED(CONFIG_EFI)) &&
                      vaddr_end >= __START_KERNEL_map);
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c

index 5126dfd52b182dd66471a49a0464eb2411fbc7cd..cd44ae727df7f48ceba7fad00591c48cec151896 100644 (file)
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -590,7 +590,7 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
   * we might run off the end of the bounds table if we are on
   * a 64-bit kernel and try to get 8 bytes.
   */
-int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
+static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
                 long __user *bd_entry_ptr)
  {
         u32 bd_entry_32;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c

index 0cb52ae0a8f07521ee1cdf6a1075a4221f920884..190e718694b1720df737afdd9688ded962be6014 100644 (file)
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev)
                 pcibios_disable_irq(dev);
  }
  
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+void pcibios_release_device(struct pci_dev *dev)
+{
+       if (atomic_dec_return(&dev->enable_cnt) >= 0)
+               pcibios_disable_device(dev);
+
+}
+#endif
+
  int pci_ext_cfg_avail(void)
  {
         if (raw_pci_ext_ops)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c

index e1fb269c87af7b39f1445e01734e76f431982f58..292ab0364a89af9aa6bc93a2ad79a88d00fbad9d 100644 (file)
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -234,23 +234,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
                 return 1;
  
         for_each_pci_msi_entry(msidesc, dev) {
-               __pci_read_msi_msg(msidesc, &msg);
-               pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
-                       ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
-               if (msg.data != XEN_PIRQ_MSI_DATA ||
-                   xen_irq_from_pirq(pirq) < 0) {
-                       pirq = xen_allocate_pirq_msi(dev, msidesc);
-                       if (pirq < 0) {
-                               irq = -ENODEV;
-                               goto error;
-                       }
-                       xen_msi_compose_msg(dev, pirq, &msg);
-                       __pci_write_msi_msg(msidesc, &msg);
-                       dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
-               } else {
-                       dev_dbg(&dev->dev,
-                               "xen: msi already bound to pirq=%d\n", pirq);
+               pirq = xen_allocate_pirq_msi(dev, msidesc);
+               if (pirq < 0) {
+                       irq = -ENODEV;
+                       goto error;
                 }
+               xen_msi_compose_msg(dev, pirq, &msg);
+               __pci_write_msi_msg(msidesc, &msg);
+               dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
                 irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
                                                (type == PCI_CAP_ID_MSI) ? nvec : 1,
                                                (type == PCI_CAP_ID_MSIX) ?
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c

index 30031d5293c483202c526d5045cda23be6617359..cdfe8c62895981029b8f69218b717b05d0b8961a 100644 (file)
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -201,6 +201,10 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
                 return;
         }
  
+       /* No need to reserve regions that will never be freed. */
+       if (md.attribute & EFI_MEMORY_RUNTIME)
+               return;
+
         size += addr % EFI_PAGE_SIZE;
         size = round_up(size, EFI_PAGE_SIZE);
         addr = round_down(addr, EFI_PAGE_SIZE);
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile

index a7dbec4dce2758261c6e1680b7ed825e5e44a9d1..3dbde04febdccab382bc47ccba53b422ac7c72ea 100644 (file)
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -26,5 +26,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
  obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
  # MISC Devices
  obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_mrfld_power_btn.o
  obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o
  obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c

new file mode 100644 (file)

index 0000000..a6c3705
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
@@ -0,0 +1,82 @@
+/*
+ * Intel Merrifield power button support
+ *
+ * (C) Copyright 2017 Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/sfi.h>
+
+#include <asm/intel-mid.h>
+#include <asm/intel_scu_ipc.h>
+
+static struct resource mrfld_power_btn_resources[] = {
+       {
+               .flags          = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device mrfld_power_btn_dev = {
+       .name           = "msic_power_btn",
+       .id             = PLATFORM_DEVID_NONE,
+       .num_resources  = ARRAY_SIZE(mrfld_power_btn_resources),
+       .resource       = mrfld_power_btn_resources,
+};
+
+static int mrfld_power_btn_scu_status_change(struct notifier_block *nb,
+                                            unsigned long code, void *data)
+{
+       if (code == SCU_DOWN) {
+               platform_device_unregister(&mrfld_power_btn_dev);
+               return 0;
+       }
+
+       return platform_device_register(&mrfld_power_btn_dev);
+}
+
+static struct notifier_block mrfld_power_btn_scu_notifier = {
+       .notifier_call  = mrfld_power_btn_scu_status_change,
+};
+
+static int __init register_mrfld_power_btn(void)
+{
+       if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+               return -ENODEV;
+
+       /*
+        * We need to be sure that the SCU IPC is ready before
+        * PMIC power button device can be registered:
+        */
+       intel_scu_notifier_add(&mrfld_power_btn_scu_notifier);
+
+       return 0;
+}
+arch_initcall(register_mrfld_power_btn);
+
+static void __init *mrfld_power_btn_platform_data(void *info)
+{
+       struct resource *res = mrfld_power_btn_resources;
+       struct sfi_device_table_entry *pentry = info;
+
+       res->start = res->end = pentry->irq;
+       return NULL;
+}
+
+static const struct devs_id mrfld_power_btn_dev_id __initconst = {
+       .name                   = "bcove_power_btn",
+       .type                   = SFI_DEV_TYPE_IPC,
+       .delay                  = 1,
+       .msic                   = 1,
+       .get_platform_data      = &mrfld_power_btn_platform_data,
+};
+
+sfi_device(mrfld_power_btn_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c

index 86edd1e941eb07bc46187024ae332409c6924073..9e304e2ea4f55c456e7f0037a8963f6586ad2b19 100644 (file)
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -19,7 +19,7 @@
  #include <asm/intel_scu_ipc.h>
  #include <asm/io_apic.h>
  
-#define TANGIER_EXT_TIMER0_MSI 15
+#define TANGIER_EXT_TIMER0_MSI 12
  
  static struct platform_device wdt_dev = {
         .name = "intel_mid_wdt",
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c

index e793fe509971f49fb2cfa6a12f8b365a937ae206..e42978d4deafeb184ea8595eb0cf3ef54ceb62bc 100644 (file)
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -17,16 +17,6 @@
  
  #include "intel_mid_weak_decls.h"
  
-static void penwell_arch_setup(void);
-/* penwell arch ops */
-static struct intel_mid_ops penwell_ops = {
-       .arch_setup = penwell_arch_setup,
-};
-
-static void mfld_power_off(void)
-{
-}
-
  static unsigned long __init mfld_calibrate_tsc(void)
  {
         unsigned long fast_calibrate;
@@ -63,9 +53,12 @@ static unsigned long __init mfld_calibrate_tsc(void)
  static void __init penwell_arch_setup(void)
  {
         x86_platform.calibrate_tsc = mfld_calibrate_tsc;
-       pm_power_off = mfld_power_off;
  }
  
+static struct intel_mid_ops penwell_ops = {
+       .arch_setup = penwell_arch_setup,
+};
+
  void *get_penwell_ops(void)
  {
         return &penwell_ops;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c

index 766d4d3529a1d946e36d4186e4c48da5d90b75cc..f25982cdff9006960d9e354d132ff01df717ad9b 100644 (file)
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode)
  
         ops.write_payload_first(pnode, first);
         ops.write_payload_last(pnode, last);
-       ops.write_g_sw_ack(pnode, 0xffffUL);
  
         /* in effect, all msg_type's are set to MSG_NOOP */
         memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile

index 555b9fa0ad43cbd4148b2fb268692d4b2de167c4..7dbdb780264df9258d98829f8a11496cd58bb7ab 100644 (file)
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -8,6 +8,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
  LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
  targets += purgatory.ro
  
+KASAN_SANITIZE := n
  KCOV_INSTRUMENT := n
  
  # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c

index 25e068ba338214826413265b26ccd9179e10b7f4..470edad96bb9560a218affd4c0922888f9200dba 100644 (file)
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -10,21 +10,19 @@
   * Version 2.  See the file COPYING for more details.
   */
  
+#include <linux/bug.h>
+#include <asm/purgatory.h>
+
  #include "sha256.h"
  #include "../boot/string.h"
  
-struct sha_region {
-       unsigned long start;
-       unsigned long len;
-};
-
-unsigned long backup_dest = 0;
-unsigned long backup_src = 0;
-unsigned long backup_sz = 0;
+unsigned long purgatory_backup_dest __section(.kexec-purgatory);
+unsigned long purgatory_backup_src __section(.kexec-purgatory);
+unsigned long purgatory_backup_sz __section(.kexec-purgatory);
  
-u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory);
  
-struct sha_region sha_regions[16] = {};
+struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory);
  
  /*
   * On x86, second kernel requries first 640K of memory to boot. Copy
@@ -33,26 +31,28 @@ struct sha_region sha_regions[16] = {};
   */
  static int copy_backup_region(void)
  {
-       if (backup_dest)
-               memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
-
+       if (purgatory_backup_dest) {
+               memcpy((void *)purgatory_backup_dest,
+                      (void *)purgatory_backup_src, purgatory_backup_sz);
+       }
         return 0;
  }
  
-int verify_sha256_digest(void)
+static int verify_sha256_digest(void)
  {
-       struct sha_region *ptr, *end;
+       struct kexec_sha_region *ptr, *end;
         u8 digest[SHA256_DIGEST_SIZE];
         struct sha256_state sctx;
  
         sha256_init(&sctx);
-       end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
-       for (ptr = sha_regions; ptr < end; ptr++)
+       end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
+
+       for (ptr = purgatory_sha_regions; ptr < end; ptr++)
                 sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
  
         sha256_final(&sctx, digest);
  
-       if (memcmp(digest, sha256_digest, sizeof(digest)))
+       if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)))
                 return 1;
  
         return 0;
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S

index fe3c91ba1bd0c6fd0fe0c880510364dbda0fa5c5..dfae9b9e60b5ba01e62d92d442bd1276eee45ae7 100644 (file)
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -9,6 +9,7 @@
   * This source code is licensed under the GNU General Public License,
   * Version 2.  See the file COPYING for more details.
   */
+#include <asm/purgatory.h>
  
         .text
         .globl purgatory_start
diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h

index bd15a4127735e5f6ed9560b8dfb2503126f56e47..2867d9825a57e5f1f734bfb4a5777bc31810b090 100644 (file)
--- a/arch/x86/purgatory/sha256.h
+++ b/arch/x86/purgatory/sha256.h
@@ -10,7 +10,6 @@
  #ifndef SHA256_H
  #define SHA256_H
  
-
  #include <linux/types.h>
  #include <crypto/sha.h>
  
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h

index 976b1d70edbc0a2d77016409fec93ec597df9cad..4ddbfd57a7c824c7d05aaa3d6d8824a849d03628 100644 (file)
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -164,8 +164,21 @@ void copy_user_highpage(struct page *to, struct page *from,
  
  #define ARCH_PFN_OFFSET                (PHYS_OFFSET >> PAGE_SHIFT)
  
+#ifdef CONFIG_MMU
+static inline unsigned long ___pa(unsigned long va)
+{
+       unsigned long off = va - PAGE_OFFSET;
+
+       if (off >= XCHAL_KSEG_SIZE)
+               off -= XCHAL_KSEG_SIZE;
+
+       return off + PHYS_OFFSET;
+}
+#define __pa(x)        ___pa((unsigned long)(x))
+#else
  #define __pa(x)        \
         ((unsigned long) (x) - PAGE_OFFSET + PHYS_OFFSET)
+#endif
  #define __va(x)        \
         ((void *)((unsigned long) (x) - PHYS_OFFSET + PAGE_OFFSET))
  #define pfn_valid(pfn) \
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h

index 8aa0e0d9cbb21f0c3703192a828dbbeaf4d475ca..30dd5b2e4ad5af403bdf794e57a58b1c3beef2e7 100644 (file)
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -11,6 +11,7 @@
  #ifndef _XTENSA_PGTABLE_H
  #define _XTENSA_PGTABLE_H
  
+#define __ARCH_USE_5LEVEL_HACK
  #include <asm-generic/pgtable-nopmd.h>
  #include <asm/page.h>
  #include <asm/kmem_layout.h>
diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h

index cd400af4a6b25597756cda04826278fea75ecf33..6be7eb27fd29d68b7c3a13643f628b7f2ae40c57 100644 (file)
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -774,7 +774,10 @@ __SYSCALL(349, sys_pkey_alloc, 2)
  #define __NR_pkey_free                         350
  __SYSCALL(350, sys_pkey_free, 1)
  
-#define __NR_syscall_count                     351
+#define __NR_statx                             351
+__SYSCALL(351, sys_statx, 5)
+
+#define __NR_syscall_count                     352
  
  /*
   * sysxtensa syscall handler
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c

index c82c43bff2968cd3bab83688bc8a362671792150..bae697a06a984536bc51ce21cb3e402d5cfbd065 100644 (file)
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -483,10 +483,8 @@ void show_regs(struct pt_regs * regs)
  
  static int show_trace_cb(struct stackframe *frame, void *data)
  {
-       if (kernel_text_address(frame->pc)) {
-               pr_cont(" [<%08lx>]", frame->pc);
-               print_symbol(" %s\n", frame->pc);
-       }
+       if (kernel_text_address(frame->pc))
+               pr_cont(" [<%08lx>] %pB\n", frame->pc, (void *)frame->pc);
         return 0;
  }
  
diff --git a/block/Kconfig b/block/Kconfig

index e9f780f815f5d70e094b5d13778c5752e8c3d504..89cd28f8d05129680aa531dd462fc013966c9654 100644 (file)
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -115,6 +115,18 @@ config BLK_DEV_THROTTLING
  
         See Documentation/cgroups/blkio-controller.txt for more information.
  
+config BLK_DEV_THROTTLING_LOW
+       bool "Block throttling .low limit interface support (EXPERIMENTAL)"
+       depends on BLK_DEV_THROTTLING
+       default n
+       ---help---
+       Add .low limit interface for block throttling. The low limit is a best
+       effort limit to prioritize cgroups. Depending on the setting, the limit
+       can be used to protect cgroups in terms of bandwidth/iops and better
+       utilize disk resource.
+
+       Note, this is an experimental interface and could be changed someday.
+
  config BLK_CMDLINE_PARSER
         bool "Block device command line partition parser"
         default n
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched

index 58fc8684788d1f9fe7894c5afefa46c05692bf27..fd2cefa47d354fae842f1596176b52f261dc00a4 100644 (file)
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -40,6 +40,7 @@ config CFQ_GROUP_IOSCHED
           Enable group IO scheduling in CFQ.
  
  choice
+
         prompt "Default I/O scheduler"
         default DEFAULT_CFQ
         help
@@ -69,6 +70,35 @@ config MQ_IOSCHED_DEADLINE
         ---help---
           MQ version of the deadline IO scheduler.
  
+config MQ_IOSCHED_KYBER
+       tristate "Kyber I/O scheduler"
+       default y
+       ---help---
+         The Kyber I/O scheduler is a low-overhead scheduler suitable for
+         multiqueue and other fast devices. Given target latencies for reads and
+         synchronous writes, it will self-tune queue depths to achieve that
+         goal.
+
+config IOSCHED_BFQ
+       tristate "BFQ I/O scheduler"
+       default n
+       ---help---
+       BFQ I/O scheduler for BLK-MQ. BFQ distributes the bandwidth of
+       of the device among all processes according to their weights,
+       regardless of the device parameters and with any workload. It
+       also guarantees a low latency to interactive and soft
+       real-time applications.  Details in
+       Documentation/block/bfq-iosched.txt
+
+config BFQ_GROUP_IOSCHED
+       bool "BFQ hierarchical scheduling support"
+       depends on IOSCHED_BFQ && BLK_CGROUP
+       default n
+       ---help---
+
+       Enable hierarchical scheduling in BFQ, using the blkio
+       (cgroups-v1) or io (cgroups-v2) controller.
+
  endmenu
  
  endif
diff --git a/block/Makefile b/block/Makefile

index 081bb680789bc8f191d22950a1ab71cc97443e07..2b281cf258a0abdae62bf92c36955422a0b58b1d 100644 (file)
--- a/block/Makefile
+++ b/block/Makefile
@@ -20,6 +20,9 @@ obj-$(CONFIG_IOSCHED_NOOP)    += noop-iosched.o
  obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
  obj-$(CONFIG_IOSCHED_CFQ)      += cfq-iosched.o
  obj-$(CONFIG_MQ_IOSCHED_DEADLINE)      += mq-deadline.o
+obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
+bfq-y                          := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
+obj-$(CONFIG_IOSCHED_BFQ)      += bfq.o
  
  obj-$(CONFIG_BLOCK_COMPAT)     += compat_ioctl.o
  obj-$(CONFIG_BLK_CMDLINE_PARSER)       += cmdline-parser.o
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c

new file mode 100644 (file)

index 0000000..c8a32fb
--- /dev/null
+++ b/block/bfq-cgroup.c
@@ -0,0 +1,1139 @@
+/*
+ * cgroups support for the BFQ I/O scheduler.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation; either version 2 of the
+ *  License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/cgroup.h>
+#include <linux/elevator.h>
+#include <linux/ktime.h>
+#include <linux/rbtree.h>
+#include <linux/ioprio.h>
+#include <linux/sbitmap.h>
+#include <linux/delay.h>
+
+#include "bfq-iosched.h"
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+
+/* bfqg stats flags */
+enum bfqg_stats_flags {
+       BFQG_stats_waiting = 0,
+       BFQG_stats_idling,
+       BFQG_stats_empty,
+};
+
+#define BFQG_FLAG_FNS(name)                                            \
+static void bfqg_stats_mark_##name(struct bfqg_stats *stats)   \
+{                                                                      \
+       stats->flags |= (1 << BFQG_stats_##name);                       \
+}                                                                      \
+static void bfqg_stats_clear_##name(struct bfqg_stats *stats)  \
+{                                                                      \
+       stats->flags &= ~(1 << BFQG_stats_##name);                      \
+}                                                                      \
+static int bfqg_stats_##name(struct bfqg_stats *stats)         \
+{                                                                      \
+       return (stats->flags & (1 << BFQG_stats_##name)) != 0;          \
+}                                                                      \
+
+BFQG_FLAG_FNS(waiting)
+BFQG_FLAG_FNS(idling)
+BFQG_FLAG_FNS(empty)
+#undef BFQG_FLAG_FNS
+
+/* This should be called with the queue_lock held. */
+static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
+{
+       unsigned long long now;
+
+       if (!bfqg_stats_waiting(stats))
+               return;
+
+       now = sched_clock();
+       if (time_after64(now, stats->start_group_wait_time))
+               blkg_stat_add(&stats->group_wait_time,
+                             now - stats->start_group_wait_time);
+       bfqg_stats_clear_waiting(stats);
+}
+
+/* This should be called with the queue_lock held. */
+static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
+                                                struct bfq_group *curr_bfqg)
+{
+       struct bfqg_stats *stats = &bfqg->stats;
+
+       if (bfqg_stats_waiting(stats))
+               return;
+       if (bfqg == curr_bfqg)
+               return;
+       stats->start_group_wait_time = sched_clock();
+       bfqg_stats_mark_waiting(stats);
+}
+
+/* This should be called with the queue_lock held. */
+static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
+{
+       unsigned long long now;
+
+       if (!bfqg_stats_empty(stats))
+               return;
+
+       now = sched_clock();
+       if (time_after64(now, stats->start_empty_time))
+               blkg_stat_add(&stats->empty_time,
+                             now - stats->start_empty_time);
+       bfqg_stats_clear_empty(stats);
+}
+
+void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
+{
+       blkg_stat_add(&bfqg->stats.dequeue, 1);
+}
+
+void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
+{
+       struct bfqg_stats *stats = &bfqg->stats;
+
+       if (blkg_rwstat_total(&stats->queued))
+               return;
+
+       /*
+        * group is already marked empty. This can happen if bfqq got new
+        * request in parent group and moved to this group while being added
+        * to service tree. Just ignore the event and move on.
+        */
+       if (bfqg_stats_empty(stats))
+               return;
+
+       stats->start_empty_time = sched_clock();
+       bfqg_stats_mark_empty(stats);
+}
+
+void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
+{
+       struct bfqg_stats *stats = &bfqg->stats;
+
+       if (bfqg_stats_idling(stats)) {
+               unsigned long long now = sched_clock();
+
+               if (time_after64(now, stats->start_idle_time))
+                       blkg_stat_add(&stats->idle_time,
+                                     now - stats->start_idle_time);
+               bfqg_stats_clear_idling(stats);
+       }
+}
+
+void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
+{
+       struct bfqg_stats *stats = &bfqg->stats;
+
+       stats->start_idle_time = sched_clock();
+       bfqg_stats_mark_idling(stats);
+}
+
+void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
+{
+       struct bfqg_stats *stats = &bfqg->stats;
+
+       blkg_stat_add(&stats->avg_queue_size_sum,
+                     blkg_rwstat_total(&stats->queued));
+       blkg_stat_add(&stats->avg_queue_size_samples, 1);
+       bfqg_stats_update_group_wait_time(stats);
+}
+
+/*
+ * blk-cgroup policy-related handlers
+ * The following functions help in converting between blk-cgroup
+ * internal structures and BFQ-specific structures.
+ */
+
+static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
+{
+       return pd ? container_of(pd, struct bfq_group, pd) : NULL;
+}
+
+struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
+{
+       return pd_to_blkg(&bfqg->pd);
+}
+
+static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
+{
+       return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq));
+}
+
+/*
+ * bfq_group handlers
+ * The following functions help in navigating the bfq_group hierarchy
+ * by allowing to find the parent of a bfq_group or the bfq_group
+ * associated to a bfq_queue.
+ */
+
+static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
+{
+       struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
+
+       return pblkg ? blkg_to_bfqg(pblkg) : NULL;
+}
+
+struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
+{
+       struct bfq_entity *group_entity = bfqq->entity.parent;
+
+       return group_entity ? container_of(group_entity, struct bfq_group,
+                                          entity) :
+                             bfqq->bfqd->root_group;
+}
+
+/*
+ * The following two functions handle get and put of a bfq_group by
+ * wrapping the related blk-cgroup hooks.
+ */
+
+static void bfqg_get(struct bfq_group *bfqg)
+{
+       return blkg_get(bfqg_to_blkg(bfqg));
+}
+
+void bfqg_put(struct bfq_group *bfqg)
+{
+       return blkg_put(bfqg_to_blkg(bfqg));
+}
+
+void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
+                             unsigned int op)
+{
+       blkg_rwstat_add(&bfqg->stats.queued, op, 1);
+       bfqg_stats_end_empty_time(&bfqg->stats);
+       if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
+               bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
+}
+
+void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
+{
+       blkg_rwstat_add(&bfqg->stats.queued, op, -1);
+}
+
+void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
+{
+       blkg_rwstat_add(&bfqg->stats.merged, op, 1);
+}
+
+void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
+                                 uint64_t io_start_time, unsigned int op)
+{
+       struct bfqg_stats *stats = &bfqg->stats;
+       unsigned long long now = sched_clock();
+
+       if (time_after64(now, io_start_time))
+               blkg_rwstat_add(&stats->service_time, op,
+                               now - io_start_time);
+       if (time_after64(io_start_time, start_time))
+               blkg_rwstat_add(&stats->wait_time, op,
+                               io_start_time - start_time);
+}
+
+/* @stats = 0 */
+static void bfqg_stats_reset(struct bfqg_stats *stats)
+{
+       /* queued stats shouldn't be cleared */
+       blkg_rwstat_reset(&stats->merged);
+       blkg_rwstat_reset(&stats->service_time);
+       blkg_rwstat_reset(&stats->wait_time);
+       blkg_stat_reset(&stats->time);
+       blkg_stat_reset(&stats->avg_queue_size_sum);
+       blkg_stat_reset(&stats->avg_queue_size_samples);
+       blkg_stat_reset(&stats->dequeue);
+       blkg_stat_reset(&stats->group_wait_time);
+       blkg_stat_reset(&stats->idle_time);
+       blkg_stat_reset(&stats->empty_time);
+}
+
+/* @to += @from */
+static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
+{
+       if (!to || !from)
+               return;
+
+       /* queued stats shouldn't be cleared */
+       blkg_rwstat_add_aux(&to->merged, &from->merged);
+       blkg_rwstat_add_aux(&to->service_time, &from->service_time);
+       blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
+       blkg_stat_add_aux(&from->time, &from->time);
+       blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
+       blkg_stat_add_aux(&to->avg_queue_size_samples,
+                         &from->avg_queue_size_samples);
+       blkg_stat_add_aux(&to->dequeue, &from->dequeue);
+       blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
+       blkg_stat_add_aux(&to->idle_time, &from->idle_time);
+       blkg_stat_add_aux(&to->empty_time, &from->empty_time);
+}
+
+/*
+ * Transfer @bfqg's stats to its parent's aux counts so that the ancestors'
+ * recursive stats can still account for the amount used by this bfqg after
+ * it's gone.
+ */
+static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
+{
+       struct bfq_group *parent;
+
+       if (!bfqg) /* root_group */
+               return;
+
+       parent = bfqg_parent(bfqg);
+
+       lockdep_assert_held(bfqg_to_blkg(bfqg)->q->queue_lock);
+
+       if (unlikely(!parent))
+               return;
+
+       bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
+       bfqg_stats_reset(&bfqg->stats);
+}
+
+void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+       entity->weight = entity->new_weight;
+       entity->orig_weight = entity->new_weight;
+       if (bfqq) {
+               bfqq->ioprio = bfqq->new_ioprio;
+               bfqq->ioprio_class = bfqq->new_ioprio_class;
+               bfqg_get(bfqg);
+       }
+       entity->parent = bfqg->my_entity; /* NULL for root group */
+       entity->sched_data = &bfqg->sched_data;
+}
+
+static void bfqg_stats_exit(struct bfqg_stats *stats)
+{
+       blkg_rwstat_exit(&stats->merged);
+       blkg_rwstat_exit(&stats->service_time);
+       blkg_rwstat_exit(&stats->wait_time);
+       blkg_rwstat_exit(&stats->queued);
+       blkg_stat_exit(&stats->time);
+       blkg_stat_exit(&stats->avg_queue_size_sum);
+       blkg_stat_exit(&stats->avg_queue_size_samples);
+       blkg_stat_exit(&stats->dequeue);
+       blkg_stat_exit(&stats->group_wait_time);
+       blkg_stat_exit(&stats->idle_time);
+       blkg_stat_exit(&stats->empty_time);
+}
+
+static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
+{
+       if (blkg_rwstat_init(&stats->merged, gfp) ||
+           blkg_rwstat_init(&stats->service_time, gfp) ||
+           blkg_rwstat_init(&stats->wait_time, gfp) ||
+           blkg_rwstat_init(&stats->queued, gfp) ||
+           blkg_stat_init(&stats->time, gfp) ||
+           blkg_stat_init(&stats->avg_queue_size_sum, gfp) ||
+           blkg_stat_init(&stats->avg_queue_size_samples, gfp) ||
+           blkg_stat_init(&stats->dequeue, gfp) ||
+           blkg_stat_init(&stats->group_wait_time, gfp) ||
+           blkg_stat_init(&stats->idle_time, gfp) ||
+           blkg_stat_init(&stats->empty_time, gfp)) {
+               bfqg_stats_exit(stats);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
+{
+       return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
+}
+
+static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
+{
+       return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
+}
+
+struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
+{
+       struct bfq_group_data *bgd;
+
+       bgd = kzalloc(sizeof(*bgd), gfp);
+       if (!bgd)
+               return NULL;
+       return &bgd->pd;
+}
+
+void bfq_cpd_init(struct blkcg_policy_data *cpd)
+{
+       struct bfq_group_data *d = cpd_to_bfqgd(cpd);
+
+       d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
+               CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
+}
+
+void bfq_cpd_free(struct blkcg_policy_data *cpd)
+{
+       kfree(cpd_to_bfqgd(cpd));
+}
+
+struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
+{
+       struct bfq_group *bfqg;
+
+       bfqg = kzalloc_node(sizeof(*bfqg), gfp, node);
+       if (!bfqg)
+               return NULL;
+
+       if (bfqg_stats_init(&bfqg->stats, gfp)) {
+               kfree(bfqg);
+               return NULL;
+       }
+
+       return &bfqg->pd;
+}
+
+void bfq_pd_init(struct blkg_policy_data *pd)
+{
+       struct blkcg_gq *blkg = pd_to_blkg(pd);
+       struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+       struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
+       struct bfq_entity *entity = &bfqg->entity;
+       struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
+
+       entity->orig_weight = entity->weight = entity->new_weight = d->weight;
+       entity->my_sched_data = &bfqg->sched_data;
+       bfqg->my_entity = entity; /*
+                                  * the root_group's will be set to NULL
+                                  * in bfq_init_queue()
+                                  */
+       bfqg->bfqd = bfqd;
+       bfqg->active_entities = 0;
+       bfqg->rq_pos_tree = RB_ROOT;
+}
+
+void bfq_pd_free(struct blkg_policy_data *pd)
+{
+       struct bfq_group *bfqg = pd_to_bfqg(pd);
+
+       bfqg_stats_exit(&bfqg->stats);
+       return kfree(bfqg);
+}
+
+void bfq_pd_reset_stats(struct blkg_policy_data *pd)
+{
+       struct bfq_group *bfqg = pd_to_bfqg(pd);
+
+       bfqg_stats_reset(&bfqg->stats);
+}
+
+static void bfq_group_set_parent(struct bfq_group *bfqg,
+                                       struct bfq_group *parent)
+{
+       struct bfq_entity *entity;
+
+       entity = &bfqg->entity;
+       entity->parent = parent->my_entity;
+       entity->sched_data = &parent->sched_data;
+}
+
+static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
+                                        struct blkcg *blkcg)
+{
+       struct blkcg_gq *blkg;
+
+       blkg = blkg_lookup(blkcg, bfqd->queue);
+       if (likely(blkg))
+               return blkg_to_bfqg(blkg);
+       return NULL;
+}
+
+struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
+                                    struct blkcg *blkcg)
+{
+       struct bfq_group *bfqg, *parent;
+       struct bfq_entity *entity;
+
+       bfqg = bfq_lookup_bfqg(bfqd, blkcg);
+
+       if (unlikely(!bfqg))
+               return NULL;
+
+       /*
+        * Update chain of bfq_groups as we might be handling a leaf group
+        * which, along with some of its relatives, has not been hooked yet
+        * to the private hierarchy of BFQ.
+        */
+       entity = &bfqg->entity;
+       for_each_entity(entity) {
+               bfqg = container_of(entity, struct bfq_group, entity);
+               if (bfqg != bfqd->root_group) {
+                       parent = bfqg_parent(bfqg);
+                       if (!parent)
+                               parent = bfqd->root_group;
+                       bfq_group_set_parent(bfqg, parent);
+               }
+       }
+
+       return bfqg;
+}
+
+/**
+ * bfq_bfqq_move - migrate @bfqq to @bfqg.
+ * @bfqd: queue descriptor.
+ * @bfqq: the queue to move.
+ * @bfqg: the group to move to.
+ *
+ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
+ * it on the new one.  Avoid putting the entity on the old group idle tree.
+ *
+ * Must be called under the queue lock; the cgroup owning @bfqg must
+ * not disappear (by now this just means that we are called under
+ * rcu_read_lock()).
+ */
+void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                  struct bfq_group *bfqg)
+{
+       struct bfq_entity *entity = &bfqq->entity;
+
+       /* If bfqq is empty, then bfq_bfqq_expire also invokes
+        * bfq_del_bfqq_busy, thereby removing bfqq and its entity
+        * from data structures related to current group. Otherwise we
+        * need to remove bfqq explicitly with bfq_deactivate_bfqq, as
+        * we do below.
+        */
+       if (bfqq == bfqd->in_service_queue)
+               bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
+                               false, BFQQE_PREEMPTED);
+
+       if (bfq_bfqq_busy(bfqq))
+               bfq_deactivate_bfqq(bfqd, bfqq, false, false);
+       else if (entity->on_st)
+               bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
+       bfqg_put(bfqq_group(bfqq));
+
+       /*
+        * Here we use a reference to bfqg.  We don't need a refcounter
+        * as the cgroup reference will not be dropped, so that its
+        * destroy() callback will not be invoked.
+        */
+       entity->parent = bfqg->my_entity;
+       entity->sched_data = &bfqg->sched_data;
+       bfqg_get(bfqg);
+
+       if (bfq_bfqq_busy(bfqq)) {
+               bfq_pos_tree_add_move(bfqd, bfqq);
+               bfq_activate_bfqq(bfqd, bfqq);
+       }
+
+       if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
+               bfq_schedule_dispatch(bfqd);
+}
+
+/**
+ * __bfq_bic_change_cgroup - move @bic to @cgroup.
+ * @bfqd: the queue descriptor.
+ * @bic: the bic to move.
+ * @blkcg: the blk-cgroup to move to.
+ *
+ * Move bic to blkcg, assuming that bfqd->queue is locked; the caller
+ * has to make sure that the reference to cgroup is valid across the call.
+ *
+ * NOTE: an alternative approach might have been to store the current
+ * cgroup in bfqq and getting a reference to it, reducing the lookup
+ * time here, at the price of slightly more complex code.
+ */
+static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+                                               struct bfq_io_cq *bic,
+                                               struct blkcg *blkcg)
+{
+       struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
+       struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
+       struct bfq_group *bfqg;
+       struct bfq_entity *entity;
+
+       bfqg = bfq_find_set_group(bfqd, blkcg);
+
+       if (unlikely(!bfqg))
+               bfqg = bfqd->root_group;
+
+       if (async_bfqq) {
+               entity = &async_bfqq->entity;
+
+               if (entity->sched_data != &bfqg->sched_data) {
+                       bic_set_bfqq(bic, NULL, 0);
+                       bfq_log_bfqq(bfqd, async_bfqq,
+                                    "bic_change_group: %p %d",
+                                    async_bfqq, async_bfqq->ref);
+                       bfq_put_queue(async_bfqq);
+               }
+       }
+
+       if (sync_bfqq) {
+               entity = &sync_bfqq->entity;
+               if (entity->sched_data != &bfqg->sched_data)
+                       bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+       }
+
+       return bfqg;
+}
+
+void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+{
+       struct bfq_data *bfqd = bic_to_bfqd(bic);
+       struct bfq_group *bfqg = NULL;
+       uint64_t serial_nr;
+
+       rcu_read_lock();
+       serial_nr = bio_blkcg(bio)->css.serial_nr;
+
+       /*
+        * Check whether blkcg has changed.  The condition may trigger
+        * spuriously on a newly created cic but there's no harm.
+        */
+       if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
+               goto out;
+
+       bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
+       bic->blkcg_serial_nr = serial_nr;
+out:
+       rcu_read_unlock();
+}
+
+/**
+ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
+ * @st: the service tree being flushed.
+ */
+static void bfq_flush_idle_tree(struct bfq_service_tree *st)
+{
+       struct bfq_entity *entity = st->first_idle;
+
+       for (; entity ; entity = st->first_idle)
+               __bfq_deactivate_entity(entity, false);
+}
+
+/**
+ * bfq_reparent_leaf_entity - move leaf entity to the root_group.
+ * @bfqd: the device data structure with the root group.
+ * @entity: the entity to move.
+ */
+static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
+                                    struct bfq_entity *entity)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+       bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
+}
+
+/**
+ * bfq_reparent_active_entities - move to the root group all active
+ *                                entities.
+ * @bfqd: the device data structure with the root group.
+ * @bfqg: the group to move from.
+ * @st: the service tree with the entities.
+ *
+ * Needs queue_lock to be taken and reference to be valid over the call.
+ */
+static void bfq_reparent_active_entities(struct bfq_data *bfqd,
+                                        struct bfq_group *bfqg,
+                                        struct bfq_service_tree *st)
+{
+       struct rb_root *active = &st->active;
+       struct bfq_entity *entity = NULL;
+
+       if (!RB_EMPTY_ROOT(&st->active))
+               entity = bfq_entity_of(rb_first(active));
+
+       for (; entity ; entity = bfq_entity_of(rb_first(active)))
+               bfq_reparent_leaf_entity(bfqd, entity);
+
+       if (bfqg->sched_data.in_service_entity)
+               bfq_reparent_leaf_entity(bfqd,
+                       bfqg->sched_data.in_service_entity);
+}
+
+/**
+ * bfq_pd_offline - deactivate the entity associated with @pd,
+ *                 and reparent its children entities.
+ * @pd: descriptor of the policy going offline.
+ *
+ * blkio already grabs the queue_lock for us, so no need to use
+ * RCU-based magic
+ */
+void bfq_pd_offline(struct blkg_policy_data *pd)
+{
+       struct bfq_service_tree *st;
+       struct bfq_group *bfqg = pd_to_bfqg(pd);
+       struct bfq_data *bfqd = bfqg->bfqd;
+       struct bfq_entity *entity = bfqg->my_entity;
+       unsigned long flags;
+       int i;
+
+       if (!entity) /* root group */
+               return;
+
+       spin_lock_irqsave(&bfqd->lock, flags);
+       /*
+        * Empty all service_trees belonging to this group before
+        * deactivating the group itself.
+        */
+       for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
+               st = bfqg->sched_data.service_tree + i;
+
+               /*
+                * The idle tree may still contain bfq_queues belonging
+                * to exited task because they never migrated to a different
+                * cgroup from the one being destroyed now.  No one else
+                * can access them so it's safe to act without any lock.
+                */
+               bfq_flush_idle_tree(st);
+
+               /*
+                * It may happen that some queues are still active
+                * (busy) upon group destruction (if the corresponding
+                * processes have been forced to terminate). We move
+                * all the leaf entities corresponding to these queues
+                * to the root_group.
+                * Also, it may happen that the group has an entity
+                * in service, which is disconnected from the active
+                * tree: it must be moved, too.
+                * There is no need to put the sync queues, as the
+                * scheduler has taken no reference.
+                */
+               bfq_reparent_active_entities(bfqd, bfqg, st);
+       }
+
+       __bfq_deactivate_entity(entity, false);
+       bfq_put_async_queues(bfqd, bfqg);
+
+       spin_unlock_irqrestore(&bfqd->lock, flags);
+       /*
+        * @blkg is going offline and will be ignored by
+        * blkg_[rw]stat_recursive_sum().  Transfer stats to the parent so
+        * that they don't get lost.  If IOs complete after this point, the
+        * stats for them will be lost.  Oh well...
+        */
+       bfqg_stats_xfer_dead(bfqg);
+}
+
+void bfq_end_wr_async(struct bfq_data *bfqd)
+{
+       struct blkcg_gq *blkg;
+
+       list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
+               struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+
+               bfq_end_wr_async_queues(bfqd, bfqg);
+       }
+       bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+}
+
+static int bfq_io_show_weight(struct seq_file *sf, void *v)
+{
+       struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+       struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+       unsigned int val = 0;
+
+       if (bfqgd)
+               val = bfqgd->weight;
+
+       seq_printf(sf, "%u\n", val);
+
+       return 0;
+}
+
+static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
+                                   struct cftype *cftype,
+                                   u64 val)
+{
+       struct blkcg *blkcg = css_to_blkcg(css);
+       struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+       struct blkcg_gq *blkg;
+       int ret = -ERANGE;
+
+       if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
+               return ret;
+
+       ret = 0;
+       spin_lock_irq(&blkcg->lock);
+       bfqgd->weight = (unsigned short)val;
+       hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+               struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+
+               if (!bfqg)
+                       continue;
+               /*
+                * Setting the prio_changed flag of the entity
+                * to 1 with new_weight == weight would re-set
+                * the value of the weight to its ioprio mapping.
+                * Set the flag only if necessary.
+                */
+               if ((unsigned short)val != bfqg->entity.new_weight) {
+                       bfqg->entity.new_weight = (unsigned short)val;
+                       /*
+                        * Make sure that the above new value has been
+                        * stored in bfqg->entity.new_weight before
+                        * setting the prio_changed flag. In fact,
+                        * this flag may be read asynchronously (in
+                        * critical sections protected by a different
+                        * lock than that held here), and finding this
+                        * flag set may cause the execution of the code
+                        * for updating parameters whose value may
+                        * depend also on bfqg->entity.new_weight (in
+                        * __bfq_entity_update_weight_prio).
+                        * This barrier makes sure that the new value
+                        * of bfqg->entity.new_weight is correctly
+                        * seen in that code.
+                        */
+                       smp_wmb();
+                       bfqg->entity.prio_changed = 1;
+               }
+       }
+       spin_unlock_irq(&blkcg->lock);
+
+       return ret;
+}
+
+static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
+                                char *buf, size_t nbytes,
+                                loff_t off)
+{
+       u64 weight;
+       /* First unsigned long found in the file is used */
+       int ret = kstrtoull(strim(buf), 0, &weight);
+
+       if (ret)
+               return ret;
+
+       return bfq_io_set_weight_legacy(of_css(of), NULL, weight);
+}
+
+static int bfqg_print_stat(struct seq_file *sf, void *v)
+{
+       blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
+                         &blkcg_policy_bfq, seq_cft(sf)->private, false);
+       return 0;
+}
+
+static int bfqg_print_rwstat(struct seq_file *sf, void *v)
+{
+       blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
+                         &blkcg_policy_bfq, seq_cft(sf)->private, true);
+       return 0;
+}
+
+static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
+                                     struct blkg_policy_data *pd, int off)
+{
+       u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd),
+                                         &blkcg_policy_bfq, off);
+       return __blkg_prfill_u64(sf, pd, sum);
+}
+
+static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
+                                       struct blkg_policy_data *pd, int off)
+{
+       struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd),
+                                                          &blkcg_policy_bfq,
+                                                          off);
+       return __blkg_prfill_rwstat(sf, pd, &sum);
+}
+
+static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
+{
+       blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+                         bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
+                         seq_cft(sf)->private, false);
+       return 0;
+}
+
+static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
+{
+       blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+                         bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
+                         seq_cft(sf)->private, true);
+       return 0;
+}
+
+static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
+                              int off)
+{
+       u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
+
+       return __blkg_prfill_u64(sf, pd, sum >> 9);
+}
+
+static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
+{
+       blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+                         bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
+       return 0;
+}
+
+static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
+                                        struct blkg_policy_data *pd, int off)
+{
+       struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL,
+                                       offsetof(struct blkcg_gq, stat_bytes));
+       u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
+               atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
+
+       return __blkg_prfill_u64(sf, pd, sum >> 9);
+}
+
+static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
+{
+       blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+                         bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
+                         false);
+       return 0;
+}
+
+static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
+                                     struct blkg_policy_data *pd, int off)
+{
+       struct bfq_group *bfqg = pd_to_bfqg(pd);
+       u64 samples = blkg_stat_read(&bfqg->stats.avg_queue_size_samples);
+       u64 v = 0;
+
+       if (samples) {
+               v = blkg_stat_read(&bfqg->stats.avg_queue_size_sum);
+               v = div64_u64(v, samples);
+       }
+       __blkg_prfill_u64(sf, pd, v);
+       return 0;
+}
+
+/* print avg_queue_size */
+static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
+{
+       blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+                         bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
+                         0, false);
+       return 0;
+}
+
+struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
+{
+       int ret;
+
+       ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
+       if (ret)
+               return NULL;
+
+       return blkg_to_bfqg(bfqd->queue->root_blkg);
+}
+
+struct blkcg_policy blkcg_policy_bfq = {
+       .dfl_cftypes            = bfq_blkg_files,
+       .legacy_cftypes         = bfq_blkcg_legacy_files,
+
+       .cpd_alloc_fn           = bfq_cpd_alloc,
+       .cpd_init_fn            = bfq_cpd_init,
+       .cpd_bind_fn            = bfq_cpd_init,
+       .cpd_free_fn            = bfq_cpd_free,
+
+       .pd_alloc_fn            = bfq_pd_alloc,
+       .pd_init_fn             = bfq_pd_init,
+       .pd_offline_fn          = bfq_pd_offline,
+       .pd_free_fn             = bfq_pd_free,
+       .pd_reset_stats_fn      = bfq_pd_reset_stats,
+};
+
+struct cftype bfq_blkcg_legacy_files[] = {
+       {
+               .name = "bfq.weight",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = bfq_io_show_weight,
+               .write_u64 = bfq_io_set_weight_legacy,
+       },
+
+       /* statistics, covers only the tasks in the bfqg */
+       {
+               .name = "bfq.time",
+               .private = offsetof(struct bfq_group, stats.time),
+               .seq_show = bfqg_print_stat,
+       },
+       {
+               .name = "bfq.sectors",
+               .seq_show = bfqg_print_stat_sectors,
+       },
+       {
+               .name = "bfq.io_service_bytes",
+               .private = (unsigned long)&blkcg_policy_bfq,
+               .seq_show = blkg_print_stat_bytes,
+       },
+       {
+               .name = "bfq.io_serviced",
+               .private = (unsigned long)&blkcg_policy_bfq,
+               .seq_show = blkg_print_stat_ios,
+       },
+       {
+               .name = "bfq.io_service_time",
+               .private = offsetof(struct bfq_group, stats.service_time),
+               .seq_show = bfqg_print_rwstat,
+       },
+       {
+               .name = "bfq.io_wait_time",
+               .private = offsetof(struct bfq_group, stats.wait_time),
+               .seq_show = bfqg_print_rwstat,
+       },
+       {
+               .name = "bfq.io_merged",
+               .private = offsetof(struct bfq_group, stats.merged),
+               .seq_show = bfqg_print_rwstat,
+       },
+       {
+               .name = "bfq.io_queued",
+               .private = offsetof(struct bfq_group, stats.queued),
+               .seq_show = bfqg_print_rwstat,
+       },
+
+       /* the same statictics which cover the bfqg and its descendants */
+       {
+               .name = "bfq.time_recursive",
+               .private = offsetof(struct bfq_group, stats.time),
+               .seq_show = bfqg_print_stat_recursive,
+       },
+       {
+               .name = "bfq.sectors_recursive",
+               .seq_show = bfqg_print_stat_sectors_recursive,
+       },
+       {
+               .name = "bfq.io_service_bytes_recursive",
+               .private = (unsigned long)&blkcg_policy_bfq,
+               .seq_show = blkg_print_stat_bytes_recursive,
+       },
+       {
+               .name = "bfq.io_serviced_recursive",
+               .private = (unsigned long)&blkcg_policy_bfq,
+               .seq_show = blkg_print_stat_ios_recursive,
+       },
+       {
+               .name = "bfq.io_service_time_recursive",
+               .private = offsetof(struct bfq_group, stats.service_time),
+               .seq_show = bfqg_print_rwstat_recursive,
+       },
+       {
+               .name = "bfq.io_wait_time_recursive",
+               .private = offsetof(struct bfq_group, stats.wait_time),
+               .seq_show = bfqg_print_rwstat_recursive,
+       },
+       {
+               .name = "bfq.io_merged_recursive",
+               .private = offsetof(struct bfq_group, stats.merged),
+               .seq_show = bfqg_print_rwstat_recursive,
+       },
+       {
+               .name = "bfq.io_queued_recursive",
+               .private = offsetof(struct bfq_group, stats.queued),
+               .seq_show = bfqg_print_rwstat_recursive,
+       },
+       {
+               .name = "bfq.avg_queue_size",
+               .seq_show = bfqg_print_avg_queue_size,
+       },
+       {
+               .name = "bfq.group_wait_time",
+               .private = offsetof(struct bfq_group, stats.group_wait_time),
+               .seq_show = bfqg_print_stat,
+       },
+       {
+               .name = "bfq.idle_time",
+               .private = offsetof(struct bfq_group, stats.idle_time),
+               .seq_show = bfqg_print_stat,
+       },
+       {
+               .name = "bfq.empty_time",
+               .private = offsetof(struct bfq_group, stats.empty_time),
+               .seq_show = bfqg_print_stat,
+       },
+       {
+               .name = "bfq.dequeue",
+               .private = offsetof(struct bfq_group, stats.dequeue),
+               .seq_show = bfqg_print_stat,
+       },
+       { }     /* terminate */
+};
+
+struct cftype bfq_blkg_files[] = {
+       {
+               .name = "bfq.weight",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = bfq_io_show_weight,
+               .write = bfq_io_set_weight,
+       },
+       {} /* terminate */
+};
+
+#else  /* CONFIG_BFQ_GROUP_IOSCHED */
+
+void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
+                             unsigned int op) { }
+void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
+void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
+void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
+                                 uint64_t io_start_time, unsigned int op) { }
+void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
+void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
+void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
+void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
+void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
+
+void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                  struct bfq_group *bfqg) {}
+
+void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+       entity->weight = entity->new_weight;
+       entity->orig_weight = entity->new_weight;
+       if (bfqq) {
+               bfqq->ioprio = bfqq->new_ioprio;
+               bfqq->ioprio_class = bfqq->new_ioprio_class;
+       }
+       entity->sched_data = &bfqg->sched_data;
+}
+
+void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
+
+void bfq_end_wr_async(struct bfq_data *bfqd)
+{
+       bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+}
+
+struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
+{
+       return bfqd->root_group;
+}
+
+struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
+{
+       return bfqq->bfqd->root_group;
+}
+
+struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
+{
+       struct bfq_group *bfqg;
+       int i;
+
+       bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
+       if (!bfqg)
+               return NULL;
+
+       for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+               bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
+
+       return bfqg;
+}
+#endif /* CONFIG_BFQ_GROUP_IOSCHED */
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c

new file mode 100644 (file)

index 0000000..bd8499e
--- /dev/null
+++ b/block/bfq-iosched.c
@@ -0,0 +1,5047 @@
+/*
+ * Budget Fair Queueing (BFQ) I/O scheduler.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *                   Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
+ *                    Arianna Avanzini <avanzini@google.com>
+ *
+ * Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation; either version 2 of the
+ *  License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ * BFQ is a proportional-share I/O scheduler, with some extra
+ * low-latency capabilities. BFQ also supports full hierarchical
+ * scheduling through cgroups. Next paragraphs provide an introduction
+ * on BFQ inner workings. Details on BFQ benefits, usage and
+ * limitations can be found in Documentation/block/bfq-iosched.txt.
+ *
+ * BFQ is a proportional-share storage-I/O scheduling algorithm based
+ * on the slice-by-slice service scheme of CFQ. But BFQ assigns
+ * budgets, measured in number of sectors, to processes instead of
+ * time slices. The device is not granted to the in-service process
+ * for a given time slice, but until it has exhausted its assigned
+ * budget. This change from the time to the service domain enables BFQ
+ * to distribute the device throughput among processes as desired,
+ * without any distortion due to throughput fluctuations, or to device
+ * internal queueing. BFQ uses an ad hoc internal scheduler, called
+ * B-WF2Q+, to schedule processes according to their budgets. More
+ * precisely, BFQ schedules queues associated with processes. Each
+ * process/queue is assigned a user-configurable weight, and B-WF2Q+
+ * guarantees that each queue receives a fraction of the throughput
+ * proportional to its weight. Thanks to the accurate policy of
+ * B-WF2Q+, BFQ can afford to assign high budgets to I/O-bound
+ * processes issuing sequential requests (to boost the throughput),
+ * and yet guarantee a low latency to interactive and soft real-time
+ * applications.
+ *
+ * In particular, to provide these low-latency guarantees, BFQ
+ * explicitly privileges the I/O of two classes of time-sensitive
+ * applications: interactive and soft real-time. This feature enables
+ * BFQ to provide applications in these classes with a very low
+ * latency. Finally, BFQ also features additional heuristics for
+ * preserving both a low latency and a high throughput on NCQ-capable,
+ * rotational or flash-based devices, and to get the job done quickly
+ * for applications consisting in many I/O-bound processes.
+ *
+ * BFQ is described in [1], where also a reference to the initial, more
+ * theoretical paper on BFQ can be found. The interested reader can find
+ * in the latter paper full details on the main algorithm, as well as
+ * formulas of the guarantees and formal proofs of all the properties.
+ * With respect to the version of BFQ presented in these papers, this
+ * implementation adds a few more heuristics, such as the one that
+ * guarantees a low latency to soft real-time applications, and a
+ * hierarchical extension based on H-WF2Q+.
+ *
+ * B-WF2Q+ is based on WF2Q+, which is described in [2], together with
+ * H-WF2Q+, while the augmented tree used here to implement B-WF2Q+
+ * with O(log N) complexity derives from the one introduced with EEVDF
+ * in [3].
+ *
+ * [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
+ *     Scheduler", Proceedings of the First Workshop on Mobile System
+ *     Technologies (MST-2015), May 2015.
+ *     http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
+ *
+ * [2] Jon C.R. Bennett and H. Zhang, "Hierarchical Packet Fair Queueing
+ *     Algorithms", IEEE/ACM Transactions on Networking, 5(5):675-689,
+ *     Oct 1997.
+ *
+ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz
+ *
+ * [3] I. Stoica and H. Abdel-Wahab, "Earliest Eligible Virtual Deadline
+ *     First: A Flexible and Accurate Mechanism for Proportional Share
+ *     Resource Allocation", technical report.
+ *
+ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/cgroup.h>
+#include <linux/elevator.h>
+#include <linux/ktime.h>
+#include <linux/rbtree.h>
+#include <linux/ioprio.h>
+#include <linux/sbitmap.h>
+#include <linux/delay.h>
+
+#include "blk.h"
+#include "blk-mq.h"
+#include "blk-mq-tag.h"
+#include "blk-mq-sched.h"
+#include "bfq-iosched.h"
+
+#define BFQ_BFQQ_FNS(name)                                             \
+void bfq_mark_bfqq_##name(struct bfq_queue *bfqq)                      \
+{                                                                      \
+       __set_bit(BFQQF_##name, &(bfqq)->flags);                        \
+}                                                                      \
+void bfq_clear_bfqq_##name(struct bfq_queue *bfqq)                     \
+{                                                                      \
+       __clear_bit(BFQQF_##name, &(bfqq)->flags);              \
+}                                                                      \
+int bfq_bfqq_##name(const struct bfq_queue *bfqq)                      \
+{                                                                      \
+       return test_bit(BFQQF_##name, &(bfqq)->flags);          \
+}
+
+BFQ_BFQQ_FNS(just_created);
+BFQ_BFQQ_FNS(busy);
+BFQ_BFQQ_FNS(wait_request);
+BFQ_BFQQ_FNS(non_blocking_wait_rq);
+BFQ_BFQQ_FNS(fifo_expire);
+BFQ_BFQQ_FNS(idle_window);
+BFQ_BFQQ_FNS(sync);
+BFQ_BFQQ_FNS(IO_bound);
+BFQ_BFQQ_FNS(in_large_burst);
+BFQ_BFQQ_FNS(coop);
+BFQ_BFQQ_FNS(split_coop);
+BFQ_BFQQ_FNS(softrt_update);
+#undef BFQ_BFQQ_FNS                                            \
+
+/* Expiration time of sync (0) and async (1) requests, in ns. */
+static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
+
+/* Maximum backwards seek (magic number lifted from CFQ), in KiB. */
+static const int bfq_back_max = 16 * 1024;
+
+/* Penalty of a backwards seek, in number of sectors. */
+static const int bfq_back_penalty = 2;
+
+/* Idling period duration, in ns. */
+static u64 bfq_slice_idle = NSEC_PER_SEC / 125;
+
+/* Minimum number of assigned budgets for which stats are safe to compute. */
+static const int bfq_stats_min_budgets = 194;
+
+/* Default maximum budget values, in sectors and number of requests. */
+static const int bfq_default_max_budget = 16 * 1024;
+
+/*
+ * Async to sync throughput distribution is controlled as follows:
+ * when an async request is served, the entity is charged the number
+ * of sectors of the request, multiplied by the factor below
+ */
+static const int bfq_async_charge_factor = 10;
+
+/* Default timeout values, in jiffies, approximating CFQ defaults. */
+const int bfq_timeout = HZ / 8;
+
+static struct kmem_cache *bfq_pool;
+
+/* Below this threshold (in ns), we consider thinktime immediate. */
+#define BFQ_MIN_TT             (2 * NSEC_PER_MSEC)
+
+/* hw_tag detection: parallel requests threshold and min samples needed. */
+#define BFQ_HW_QUEUE_THRESHOLD 4
+#define BFQ_HW_QUEUE_SAMPLES   32
+
+#define BFQQ_SEEK_THR          (sector_t)(8 * 100)
+#define BFQQ_SECT_THR_NONROT   (sector_t)(2 * 32)
+#define BFQQ_CLOSE_THR         (sector_t)(8 * 1024)
+#define BFQQ_SEEKY(bfqq)       (hweight32(bfqq->seek_history) > 32/8)
+
+/* Min number of samples required to perform peak-rate update */
+#define BFQ_RATE_MIN_SAMPLES   32
+/* Min observation time interval required to perform a peak-rate update (ns) */
+#define BFQ_RATE_MIN_INTERVAL  (300*NSEC_PER_MSEC)
+/* Target observation time interval for a peak-rate update (ns) */
+#define BFQ_RATE_REF_INTERVAL  NSEC_PER_SEC
+
+/* Shift used for peak rate fixed precision calculations. */
+#define BFQ_RATE_SHIFT         16
+
+/*
+ * By default, BFQ computes the duration of the weight raising for
+ * interactive applications automatically, using the following formula:
+ * duration = (R / r) * T, where r is the peak rate of the device, and
+ * R and T are two reference parameters.
+ * In particular, R is the peak rate of the reference device (see below),
+ * and T is a reference time: given the systems that are likely to be
+ * installed on the reference device according to its speed class, T is
+ * about the maximum time needed, under BFQ and while reading two files in
+ * parallel, to load typical large applications on these systems.
+ * In practice, the slower/faster the device at hand is, the more/less it
+ * takes to load applications with respect to the reference device.
+ * Accordingly, the longer/shorter BFQ grants weight raising to interactive
+ * applications.
+ *
+ * BFQ uses four different reference pairs (R, T), depending on:
+ * . whether the device is rotational or non-rotational;
+ * . whether the device is slow, such as old or portable HDDs, as well as
+ *   SD cards, or fast, such as newer HDDs and SSDs.
+ *
+ * The device's speed class is dynamically (re)detected in
+ * bfq_update_peak_rate() every time the estimated peak rate is updated.
+ *
+ * In the following definitions, R_slow[0]/R_fast[0] and
+ * T_slow[0]/T_fast[0] are the reference values for a slow/fast
+ * rotational device, whereas R_slow[1]/R_fast[1] and
+ * T_slow[1]/T_fast[1] are the reference values for a slow/fast
+ * non-rotational device. Finally, device_speed_thresh are the
+ * thresholds used to switch between speed classes. The reference
+ * rates are not the actual peak rates of the devices used as a
+ * reference, but slightly lower values. The reason for using these
+ * slightly lower values is that the peak-rate estimator tends to
+ * yield slightly lower values than the actual peak rate (it can yield
+ * the actual peak rate only if there is only one process doing I/O,
+ * and the process does sequential I/O).
+ *
+ * Both the reference peak rates and the thresholds are measured in
+ * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
+ */
+static int R_slow[2] = {1000, 10700};
+static int R_fast[2] = {14000, 33000};
+/*
+ * To improve readability, a conversion function is used to initialize the
+ * following arrays, which entails that they can be initialized only in a
+ * function.
+ */
+static int T_slow[2];
+static int T_fast[2];
+static int device_speed_thresh[2];
+
+#define RQ_BIC(rq)             ((struct bfq_io_cq *) (rq)->elv.priv[0])
+#define RQ_BFQQ(rq)            ((rq)->elv.priv[1])
+
+struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
+{
+       return bic->bfqq[is_sync];
+}
+
+void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync)
+{
+       bic->bfqq[is_sync] = bfqq;
+}
+
+struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
+{
+       return bic->icq.q->elevator->elevator_data;
+}
+
+/**
+ * icq_to_bic - convert iocontext queue structure to bfq_io_cq.
+ * @icq: the iocontext queue.
+ */
+static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
+{
+       /* bic->icq is the first member, %NULL will convert to %NULL */
+       return container_of(icq, struct bfq_io_cq, icq);
+}
+
+/**
+ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd.
+ * @bfqd: the lookup key.
+ * @ioc: the io_context of the process doing I/O.
+ * @q: the request queue.
+ */
+static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
+                                       struct io_context *ioc,
+                                       struct request_queue *q)
+{
+       if (ioc) {
+               unsigned long flags;
+               struct bfq_io_cq *icq;
+
+               spin_lock_irqsave(q->queue_lock, flags);
+               icq = icq_to_bic(ioc_lookup_icq(ioc, q));
+               spin_unlock_irqrestore(q->queue_lock, flags);
+
+               return icq;
+       }
+
+       return NULL;
+}
+
+/*
+ * Scheduler run of queue, if there are requests pending and no one in the
+ * driver that will restart queueing.
+ */
+void bfq_schedule_dispatch(struct bfq_data *bfqd)
+{
+       if (bfqd->queued != 0) {
+               bfq_log(bfqd, "schedule dispatch");
+               blk_mq_run_hw_queues(bfqd->queue, true);
+       }
+}
+
+#define bfq_class_idle(bfqq)   ((bfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
+#define bfq_class_rt(bfqq)     ((bfqq)->ioprio_class == IOPRIO_CLASS_RT)
+
+#define bfq_sample_valid(samples)      ((samples) > 80)
+
+/*
+ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
+ * We choose the request that is closesr to the head right now.  Distance
+ * behind the head is penalized and only allowed to a certain extent.
+ */
+static struct request *bfq_choose_req(struct bfq_data *bfqd,
+                                     struct request *rq1,
+                                     struct request *rq2,
+                                     sector_t last)
+{
+       sector_t s1, s2, d1 = 0, d2 = 0;
+       unsigned long back_max;
+#define BFQ_RQ1_WRAP   0x01 /* request 1 wraps */
+#define BFQ_RQ2_WRAP   0x02 /* request 2 wraps */
+       unsigned int wrap = 0; /* bit mask: requests behind the disk head? */
+
+       if (!rq1 || rq1 == rq2)
+               return rq2;
+       if (!rq2)
+               return rq1;
+
+       if (rq_is_sync(rq1) && !rq_is_sync(rq2))
+               return rq1;
+       else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
+               return rq2;
+       if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
+               return rq1;
+       else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META))
+               return rq2;
+
+       s1 = blk_rq_pos(rq1);
+       s2 = blk_rq_pos(rq2);
+
+       /*
+        * By definition, 1KiB is 2 sectors.
+        */
+       back_max = bfqd->bfq_back_max * 2;
+
+       /*
+        * Strict one way elevator _except_ in the case where we allow
+        * short backward seeks which are biased as twice the cost of a
+        * similar forward seek.
+        */
+       if (s1 >= last)
+               d1 = s1 - last;
+       else if (s1 + back_max >= last)
+               d1 = (last - s1) * bfqd->bfq_back_penalty;
+       else
+               wrap |= BFQ_RQ1_WRAP;
+
+       if (s2 >= last)
+               d2 = s2 - last;
+       else if (s2 + back_max >= last)
+               d2 = (last - s2) * bfqd->bfq_back_penalty;
+       else
+               wrap |= BFQ_RQ2_WRAP;
+
+       /* Found required data */
+
+       /*
+        * By doing switch() on the bit mask "wrap" we avoid having to
+        * check two variables for all permutations: --> faster!
+        */
+       switch (wrap) {
+       case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
+               if (d1 < d2)
+                       return rq1;
+               else if (d2 < d1)
+                       return rq2;
+
+               if (s1 >= s2)
+                       return rq1;
+               else
+                       return rq2;
+
+       case BFQ_RQ2_WRAP:
+               return rq1;
+       case BFQ_RQ1_WRAP:
+               return rq2;
+       case BFQ_RQ1_WRAP|BFQ_RQ2_WRAP: /* both rqs wrapped */
+       default:
+               /*
+                * Since both rqs are wrapped,
+                * start with the one that's further behind head
+                * (--> only *one* back seek required),
+                * since back seek takes more time than forward.
+                */
+               if (s1 <= s2)
+                       return rq1;
+               else
+                       return rq2;
+       }
+}
+
+static struct bfq_queue *
+bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
+                    sector_t sector, struct rb_node **ret_parent,
+                    struct rb_node ***rb_link)
+{
+       struct rb_node **p, *parent;
+       struct bfq_queue *bfqq = NULL;
+
+       parent = NULL;
+       p = &root->rb_node;
+       while (*p) {
+               struct rb_node **n;
+
+               parent = *p;
+               bfqq = rb_entry(parent, struct bfq_queue, pos_node);
+
+               /*
+                * Sort strictly based on sector. Smallest to the left,
+                * largest to the right.
+                */
+               if (sector > blk_rq_pos(bfqq->next_rq))
+                       n = &(*p)->rb_right;
+               else if (sector < blk_rq_pos(bfqq->next_rq))
+                       n = &(*p)->rb_left;
+               else
+                       break;
+               p = n;
+               bfqq = NULL;
+       }
+
+       *ret_parent = parent;
+       if (rb_link)
+               *rb_link = p;
+
+       bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",
+               (unsigned long long)sector,
+               bfqq ? bfqq->pid : 0);
+
+       return bfqq;
+}
+
+void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+       struct rb_node **p, *parent;
+       struct bfq_queue *__bfqq;
+
+       if (bfqq->pos_root) {
+               rb_erase(&bfqq->pos_node, bfqq->pos_root);
+               bfqq->pos_root = NULL;
+       }
+
+       if (bfq_class_idle(bfqq))
+               return;
+       if (!bfqq->next_rq)
+               return;
+
+       bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
+       __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
+                       blk_rq_pos(bfqq->next_rq), &parent, &p);
+       if (!__bfqq) {
+               rb_link_node(&bfqq->pos_node, parent, p);
+               rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
+       } else
+               bfqq->pos_root = NULL;
+}
+
+/*
+ * Tell whether there are active queues or groups with differentiated weights.
+ */
+static bool bfq_differentiated_weights(struct bfq_data *bfqd)
+{
+       /*
+        * For weights to differ, at least one of the trees must contain
+        * at least two nodes.
+        */
+       return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
+               (bfqd->queue_weights_tree.rb_node->rb_left ||
+                bfqd->queue_weights_tree.rb_node->rb_right)
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+              ) ||
+              (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) &&
+               (bfqd->group_weights_tree.rb_node->rb_left ||
+                bfqd->group_weights_tree.rb_node->rb_right)
+#endif
+              );
+}
+
+/*
+ * The following function returns true if every queue must receive the
+ * same share of the throughput (this condition is used when deciding
+ * whether idling may be disabled, see the comments in the function
+ * bfq_bfqq_may_idle()).
+ *
+ * Such a scenario occurs when:
+ * 1) all active queues have the same weight,
+ * 2) all active groups at the same level in the groups tree have the same
+ *    weight,
+ * 3) all active groups at the same level in the groups tree have the same
+ *    number of children.
+ *
+ * Unfortunately, keeping the necessary state for evaluating exactly the
+ * above symmetry conditions would be quite complex and time-consuming.
+ * Therefore this function evaluates, instead, the following stronger
+ * sub-conditions, for which it is much easier to maintain the needed
+ * state:
+ * 1) all active queues have the same weight,
+ * 2) all active groups have the same weight,
+ * 3) all active groups have at most one active child each.
+ * In particular, the last two conditions are always true if hierarchical
+ * support and the cgroups interface are not enabled, thus no state needs
+ * to be maintained in this case.
+ */
+static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
+{
+       return !bfq_differentiated_weights(bfqd);
+}
+
+/*
+ * If the weight-counter tree passed as input contains no counter for
+ * the weight of the input entity, then add that counter; otherwise just
+ * increment the existing counter.
+ *
+ * Note that weight-counter trees contain few nodes in mostly symmetric
+ * scenarios. For example, if all queues have the same weight, then the
+ * weight-counter tree for the queues may contain at most one node.
+ * This holds even if low_latency is on, because weight-raised queues
+ * are not inserted in the tree.
+ * In most scenarios, the rate at which nodes are created/destroyed
+ * should be low too.
+ */
+void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
+                         struct rb_root *root)
+{
+       struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+       /*
+        * Do not insert if the entity is already associated with a
+        * counter, which happens if:
+        *   1) the entity is associated with a queue,
+        *   2) a request arrival has caused the queue to become both
+        *      non-weight-raised, and hence change its weight, and
+        *      backlogged; in this respect, each of the two events
+        *      causes an invocation of this function,
+        *   3) this is the invocation of this function caused by the
+        *      second event. This second invocation is actually useless,
+        *      and we handle this fact by exiting immediately. More
+        *      efficient or clearer solutions might possibly be adopted.
+        */
+       if (entity->weight_counter)
+               return;
+
+       while (*new) {
+               struct bfq_weight_counter *__counter = container_of(*new,
+                                               struct bfq_weight_counter,
+                                               weights_node);
+               parent = *new;
+
+               if (entity->weight == __counter->weight) {
+                       entity->weight_counter = __counter;
+                       goto inc_counter;
+               }
+               if (entity->weight < __counter->weight)
+                       new = &((*new)->rb_left);
+               else
+                       new = &((*new)->rb_right);
+       }
+
+       entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
+                                        GFP_ATOMIC);
+
+       /*
+        * In the unlucky event of an allocation failure, we just
+        * exit. This will cause the weight of entity to not be
+        * considered in bfq_differentiated_weights, which, in its
+        * turn, causes the scenario to be deemed wrongly symmetric in
+        * case entity's weight would have been the only weight making
+        * the scenario asymmetric. On the bright side, no unbalance
+        * will however occur when entity becomes inactive again (the
+        * invocation of this function is triggered by an activation
+        * of entity). In fact, bfq_weights_tree_remove does nothing
+        * if !entity->weight_counter.
+        */
+       if (unlikely(!entity->weight_counter))
+               return;
+
+       entity->weight_counter->weight = entity->weight;
+       rb_link_node(&entity->weight_counter->weights_node, parent, new);
+       rb_insert_color(&entity->weight_counter->weights_node, root);
+
+inc_counter:
+       entity->weight_counter->num_active++;
+}
+
+/*
+ * Decrement the weight counter associated with the entity, and, if the
+ * counter reaches 0, remove the counter from the tree.
+ * See the comments to the function bfq_weights_tree_add() for considerations
+ * about overhead.
+ */
+void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_entity *entity,
+                            struct rb_root *root)
+{
+       if (!entity->weight_counter)
+               return;
+
+       entity->weight_counter->num_active--;
+       if (entity->weight_counter->num_active > 0)
+               goto reset_entity_pointer;
+
+       rb_erase(&entity->weight_counter->weights_node, root);
+       kfree(entity->weight_counter);
+
+reset_entity_pointer:
+       entity->weight_counter = NULL;
+}
+
+/*
+ * Return expired entry, or NULL to just start from scratch in rbtree.
+ */
+static struct request *bfq_check_fifo(struct bfq_queue *bfqq,
+                                     struct request *last)
+{
+       struct request *rq;
+
+       if (bfq_bfqq_fifo_expire(bfqq))
+               return NULL;
+
+       bfq_mark_bfqq_fifo_expire(bfqq);
+
+       rq = rq_entry_fifo(bfqq->fifo.next);
+
+       if (rq == last || ktime_get_ns() < rq->fifo_time)
+               return NULL;
+
+       bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq);
+       return rq;
+}
+
+static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
+                                       struct bfq_queue *bfqq,
+                                       struct request *last)
+{
+       struct rb_node *rbnext = rb_next(&last->rb_node);
+       struct rb_node *rbprev = rb_prev(&last->rb_node);
+       struct request *next, *prev = NULL;
+
+       /* Follow expired path, else get first next available. */
+       next = bfq_check_fifo(bfqq, last);
+       if (next)
+               return next;
+
+       if (rbprev)
+               prev = rb_entry_rq(rbprev);
+
+       if (rbnext)
+               next = rb_entry_rq(rbnext);
+       else {
+               rbnext = rb_first(&bfqq->sort_list);
+               if (rbnext && rbnext != &last->rb_node)
+                       next = rb_entry_rq(rbnext);
+       }
+
+       return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last));
+}
+
+/* see the definition of bfq_async_charge_factor for details */
+static unsigned long bfq_serv_to_charge(struct request *rq,
+                                       struct bfq_queue *bfqq)
+{
+       if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1)
+               return blk_rq_sectors(rq);
+
+       /*
+        * If there are no weight-raised queues, then amplify service
+        * by just the async charge factor; otherwise amplify service
+        * by twice the async charge factor, to further reduce latency
+        * for weight-raised queues.
+        */
+       if (bfqq->bfqd->wr_busy_queues == 0)
+               return blk_rq_sectors(rq) * bfq_async_charge_factor;
+
+       return blk_rq_sectors(rq) * 2 * bfq_async_charge_factor;
+}
+
+/**
+ * bfq_updated_next_req - update the queue after a new next_rq selection.
+ * @bfqd: the device data the queue belongs to.
+ * @bfqq: the queue to update.
+ *
+ * If the first request of a queue changes we make sure that the queue
+ * has enough budget to serve at least its first request (if the
+ * request has grown).  We do this because if the queue has not enough
+ * budget for its first request, it has to go through two dispatch
+ * rounds to actually get it dispatched.
+ */
+static void bfq_updated_next_req(struct bfq_data *bfqd,
+                                struct bfq_queue *bfqq)
+{
+       struct bfq_entity *entity = &bfqq->entity;
+       struct request *next_rq = bfqq->next_rq;
+       unsigned long new_budget;
+
+       if (!next_rq)
+               return;
+
+       if (bfqq == bfqd->in_service_queue)
+               /*
+                * In order not to break guarantees, budgets cannot be
+                * changed after an entity has been selected.
+                */
+               return;
+
+       new_budget = max_t(unsigned long, bfqq->max_budget,
+                          bfq_serv_to_charge(next_rq, bfqq));
+       if (entity->budget != new_budget) {
+               entity->budget = new_budget;
+               bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu",
+                                        new_budget);
+               bfq_requeue_bfqq(bfqd, bfqq);
+       }
+}
+
+static void
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+{
+       if (bic->saved_idle_window)
+               bfq_mark_bfqq_idle_window(bfqq);
+       else
+               bfq_clear_bfqq_idle_window(bfqq);
+
+       if (bic->saved_IO_bound)
+               bfq_mark_bfqq_IO_bound(bfqq);
+       else
+               bfq_clear_bfqq_IO_bound(bfqq);
+
+       bfqq->ttime = bic->saved_ttime;
+       bfqq->wr_coeff = bic->saved_wr_coeff;
+       bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
+       bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
+       bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
+
+       if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) ||
+           time_is_before_jiffies(bfqq->last_wr_start_finish +
+                                  bfqq->wr_cur_max_time))) {
+               bfq_log_bfqq(bfqq->bfqd, bfqq,
+                   "resume state: switching off wr");
+
+               bfqq->wr_coeff = 1;
+       }
+
+       /* make sure weight will be updated, however we got here */
+       bfqq->entity.prio_changed = 1;
+}
+
+static int bfqq_process_refs(struct bfq_queue *bfqq)
+{
+       return bfqq->ref - bfqq->allocated - bfqq->entity.on_st;
+}
+
+/* Empty burst list and add just bfqq (see comments on bfq_handle_burst) */
+static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+       struct bfq_queue *item;
+       struct hlist_node *n;
+
+       hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node)
+               hlist_del_init(&item->burst_list_node);
+       hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+       bfqd->burst_size = 1;
+       bfqd->burst_parent_entity = bfqq->entity.parent;
+}
+
+/* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */
+static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+       /* Increment burst size to take into account also bfqq */
+       bfqd->burst_size++;
+
+       if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) {
+               struct bfq_queue *pos, *bfqq_item;
+               struct hlist_node *n;
+
+               /*
+                * Enough queues have been activated shortly after each
+                * other to consider this burst as large.
+                */
+               bfqd->large_burst = true;
+
+               /*
+                * We can now mark all queues in the burst list as
+                * belonging to a large burst.
+                */
+               hlist_for_each_entry(bfqq_item, &bfqd->burst_list,
+                                    burst_list_node)
+                       bfq_mark_bfqq_in_large_burst(bfqq_item);
+               bfq_mark_bfqq_in_large_burst(bfqq);
+
+               /*
+                * From now on, and until the current burst finishes, any
+                * new queue being activated shortly after the last queue
+                * was inserted in the burst can be immediately marked as
+                * belonging to a large burst. So the burst list is not
+                * needed any more. Remove it.
+                */
+               hlist_for_each_entry_safe(pos, n, &bfqd->burst_list,
+                                         burst_list_node)
+                       hlist_del_init(&pos->burst_list_node);
+       } else /*
+               * Burst not yet large: add bfqq to the burst list. Do
+               * not increment the ref counter for bfqq, because bfqq
+               * is removed from the burst list before freeing bfqq
+               * in put_queue.
+               */
+               hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+}
+
+/*
+ * If many queues belonging to the same group happen to be created
+ * shortly after each other, then the processes associated with these
+ * queues have typically a common goal. In particular, bursts of queue
+ * creations are usually caused by services or applications that spawn
+ * many parallel threads/processes. Examples are systemd during boot,
+ * or git grep. To help these processes get their job done as soon as
+ * possible, it is usually better to not grant either weight-raising
+ * or device idling to their queues.
+ *
+ * In this comment we describe, firstly, the reasons why this fact
+ * holds, and, secondly, the next function, which implements the main
+ * steps needed to properly mark these queues so that they can then be
+ * treated in a different way.
+ *
+ * The above services or applications benefit mostly from a high
+ * throughput: the quicker the requests of the activated queues are
+ * cumulatively served, the sooner the target job of these queues gets
+ * completed. As a consequence, weight-raising any of these queues,
+ * which also implies idling the device for it, is almost always
+ * counterproductive. In most cases it just lowers throughput.
+ *
+ * On the other hand, a burst of queue creations may be caused also by
+ * the start of an application that does not consist of a lot of
+ * parallel I/O-bound threads. In fact, with a complex application,
+ * several short processes may need to be executed to start-up the
+ * application. In this respect, to start an application as quickly as
+ * possible, the best thing to do is in any case to privilege the I/O
+ * related to the application with respect to all other
+ * I/O. Therefore, the best strategy to start as quickly as possible
+ * an application that causes a burst of queue creations is to
+ * weight-raise all the queues created during the burst. This is the
+ * exact opposite of the best strategy for the other type of bursts.
+ *
+ * In the end, to take the best action for each of the two cases, the
+ * two types of bursts need to be distinguished. Fortunately, this
+ * seems relatively easy, by looking at the sizes of the bursts. In
+ * particular, we found a threshold such that only bursts with a
+ * larger size than that threshold are apparently caused by
+ * services or commands such as systemd or git grep. For brevity,
+ * hereafter we call just 'large' these bursts. BFQ *does not*
+ * weight-raise queues whose creation occurs in a large burst. In
+ * addition, for each of these queues BFQ performs or does not perform
+ * idling depending on which choice boosts the throughput more. The
+ * exact choice depends on the device and request pattern at
+ * hand.
+ *
+ * Unfortunately, false positives may occur while an interactive task
+ * is starting (e.g., an application is being started). The
+ * consequence is that the queues associated with the task do not
+ * enjoy weight raising as expected. Fortunately these false positives
+ * are very rare. They typically occur if some service happens to
+ * start doing I/O exactly when the interactive task starts.
+ *
+ * Turning back to the next function, it implements all the steps
+ * needed to detect the occurrence of a large burst and to properly
+ * mark all the queues belonging to it (so that they can then be
+ * treated in a different way). This goal is achieved by maintaining a
+ * "burst list" that holds, temporarily, the queues that belong to the
+ * burst in progress. The list is then used to mark these queues as
+ * belonging to a large burst if the burst does become large. The main
+ * steps are the following.
+ *
+ * . when the very first queue is created, the queue is inserted into the
+ *   list (as it could be the first queue in a possible burst)
+ *
+ * . if the current burst has not yet become large, and a queue Q that does
+ *   not yet belong to the burst is activated shortly after the last time
+ *   at which a new queue entered the burst list, then the function appends
+ *   Q to the burst list
+ *
+ * . if, as a consequence of the previous step, the burst size reaches
+ *   the large-burst threshold, then
+ *
+ *     . all the queues in the burst list are marked as belonging to a
+ *       large burst
+ *
+ *     . the burst list is deleted; in fact, the burst list already served
+ *       its purpose (keeping temporarily track of the queues in a burst,
+ *       so as to be able to mark them as belonging to a large burst in the
+ *       previous sub-step), and now is not needed any more
+ *
+ *     . the device enters a large-burst mode
+ *
+ * . if a queue Q that does not belong to the burst is created while
+ *   the device is in large-burst mode and shortly after the last time
+ *   at which a queue either entered the burst list or was marked as
+ *   belonging to the current large burst, then Q is immediately marked
+ *   as belonging to a large burst.
+ *
+ * . if a queue Q that does not belong to the burst is created a while
+ *   later, i.e., not shortly after, than the last time at which a queue
+ *   either entered the burst list or was marked as belonging to the
+ *   current large burst, then the current burst is deemed as finished and:
+ *
+ *        . the large-burst mode is reset if set
+ *
+ *        . the burst list is emptied
+ *
+ *        . Q is inserted in the burst list, as Q may be the first queue
+ *          in a possible new burst (then the burst list contains just Q
+ *          after this step).
+ */
+static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+       /*
+        * If bfqq is already in the burst list or is part of a large
+        * burst, or finally has just been split, then there is
+        * nothing else to do.
+        */
+       if (!hlist_unhashed(&bfqq->burst_list_node) ||
+           bfq_bfqq_in_large_burst(bfqq) ||
+           time_is_after_eq_jiffies(bfqq->split_time +
+                                    msecs_to_jiffies(10)))
+               return;
+
+       /*
+        * If bfqq's creation happens late enough, or bfqq belongs to
+        * a different group than the burst group, then the current
+        * burst is finished, and related data structures must be
+        * reset.
+        *
+        * In this respect, consider the special case where bfqq is
+        * the very first queue created after BFQ is selected for this
+        * device. In this case, last_ins_in_burst and
+        * burst_parent_entity are not yet significant when we get
+        * here. But it is easy to verify that, whether or not the
+        * following condition is true, bfqq will end up being
+        * inserted into the burst list. In particular the list will
+        * happen to contain only bfqq. And this is exactly what has
+        * to happen, as bfqq may be the first queue of the first
+        * burst.
+        */
+       if (time_is_before_jiffies(bfqd->last_ins_in_burst +
+           bfqd->bfq_burst_interval) ||
+           bfqq->entity.parent != bfqd->burst_parent_entity) {
+               bfqd->large_burst = false;
+               bfq_reset_burst_list(bfqd, bfqq);
+               goto end;
+       }
+
+       /*
+        * If we get here, then bfqq is being activated shortly after the
+        * last queue. So, if the current burst is also large, we can mark
+        * bfqq as belonging to this large burst immediately.
+        */
+       if (bfqd->large_burst) {
+               bfq_mark_bfqq_in_large_burst(bfqq);
+               goto end;
+       }
+
+       /*
+        * If we get here, then a large-burst state has not yet been
+        * reached, but bfqq is being activated shortly after the last
+        * queue. Then we add bfqq to the burst.
+        */
+       bfq_add_to_burst(bfqd, bfqq);
+end:
+       /*
+        * At this point, bfqq either has been added to the current
+        * burst or has caused the current burst to terminate and a
+        * possible new burst to start. In particular, in the second
+        * case, bfqq has become the first queue in the possible new
+        * burst.  In both cases last_ins_in_burst needs to be moved
+        * forward.
+        */
+       bfqd->last_ins_in_burst = jiffies;
+}
+
+static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
+{
+       struct bfq_entity *entity = &bfqq->entity;
+
+       return entity->budget - entity->service;
+}
+
+/*
+ * If enough samples have been computed, return the current max budget
+ * stored in bfqd, which is dynamically updated according to the
+ * estimated disk peak rate; otherwise return the default max budget
+ */
+static int bfq_max_budget(struct bfq_data *bfqd)
+{
+       if (bfqd->budgets_assigned < bfq_stats_min_budgets)
+               return bfq_default_max_budget;
+       else
+               return bfqd->bfq_max_budget;
+}
+
+/*
+ * Return min budget, which is a fraction of the current or default
+ * max budget (trying with 1/32)
+ */
+static int bfq_min_budget(struct bfq_data *bfqd)
+{
+       if (bfqd->budgets_assigned < bfq_stats_min_budgets)
+               return bfq_default_max_budget / 32;
+       else
+               return bfqd->bfq_max_budget / 32;
+}
+
+/*
+ * The next function, invoked after the input queue bfqq switches from
+ * idle to busy, updates the budget of bfqq. The function also tells
+ * whether the in-service queue should be expired, by returning
+ * true. The purpose of expiring the in-service queue is to give bfqq
+ * the chance to possibly preempt the in-service queue, and the reason
+ * for preempting the in-service queue is to achieve one of the two
+ * goals below.
+ *
+ * 1. Guarantee to bfqq its reserved bandwidth even if bfqq has
+ * expired because it has remained idle. In particular, bfqq may have
+ * expired for one of the following two reasons:
+ *
+ * - BFQQE_NO_MORE_REQUESTS bfqq did not enjoy any device idling
+ *   and did not make it to issue a new request before its last
+ *   request was served;
+ *
+ * - BFQQE_TOO_IDLE bfqq did enjoy device idling, but did not issue
+ *   a new request before the expiration of the idling-time.
+ *
+ * Even if bfqq has expired for one of the above reasons, the process
+ * associated with the queue may be however issuing requests greedily,
+ * and thus be sensitive to the bandwidth it receives (bfqq may have
+ * remained idle for other reasons: CPU high load, bfqq not enjoying
+ * idling, I/O throttling somewhere in the path from the process to
+ * the I/O scheduler, ...). But if, after every expiration for one of
+ * the above two reasons, bfqq has to wait for the service of at least
+ * one full budget of another queue before being served again, then
+ * bfqq is likely to get a much lower bandwidth or resource time than
+ * its reserved ones. To address this issue, two countermeasures need
+ * to be taken.
+ *
+ * First, the budget and the timestamps of bfqq need to be updated in
+ * a special way on bfqq reactivation: they need to be updated as if
+ * bfqq did not remain idle and did not expire. In fact, if they are
+ * computed as if bfqq expired and remained idle until reactivation,
+ * then the process associated with bfqq is treated as if, instead of
+ * being greedy, it stopped issuing requests when bfqq remained idle,
+ * and restarts issuing requests only on this reactivation. In other
+ * words, the scheduler does not help the process recover the "service
+ * hole" between bfqq expiration and reactivation. As a consequence,
+ * the process receives a lower bandwidth than its reserved one. In
+ * contrast, to recover this hole, the budget must be updated as if
+ * bfqq was not expired at all before this reactivation, i.e., it must
+ * be set to the value of the remaining budget when bfqq was
+ * expired. Along the same line, timestamps need to be assigned the
+ * value they had the last time bfqq was selected for service, i.e.,
+ * before last expiration. Thus timestamps need to be back-shifted
+ * with respect to their normal computation (see [1] for more details
+ * on this tricky aspect).
+ *
+ * Secondly, to allow the process to recover the hole, the in-service
+ * queue must be expired too, to give bfqq the chance to preempt it
+ * immediately. In fact, if bfqq has to wait for a full budget of the
+ * in-service queue to be completed, then it may become impossible to
+ * let the process recover the hole, even if the back-shifted
+ * timestamps of bfqq are lower than those of the in-service queue. If
+ * this happens for most or all of the holes, then the process may not
+ * receive its reserved bandwidth. In this respect, it is worth noting
+ * that, being the service of outstanding requests unpreemptible, a
+ * little fraction of the holes may however be unrecoverable, thereby
+ * causing a little loss of bandwidth.
+ *
+ * The last important point is detecting whether bfqq does need this
+ * bandwidth recovery. In this respect, the next function deems the
+ * process associated with bfqq greedy, and thus allows it to recover
+ * the hole, if: 1) the process is waiting for the arrival of a new
+ * request (which implies that bfqq expired for one of the above two
+ * reasons), and 2) such a request has arrived soon. The first
+ * condition is controlled through the flag non_blocking_wait_rq,
+ * while the second through the flag arrived_in_time. If both
+ * conditions hold, then the function computes the budget in the
+ * above-described special way, and signals that the in-service queue
+ * should be expired. Timestamp back-shifting is done later in
+ * __bfq_activate_entity.
+ *
+ * 2. Reduce latency. Even if timestamps are not backshifted to let
+ * the process associated with bfqq recover a service hole, bfqq may
+ * however happen to have, after being (re)activated, a lower finish
+ * timestamp than the in-service queue.         That is, the next budget of
+ * bfqq may have to be completed before the one of the in-service
+ * queue. If this is the case, then preempting the in-service queue
+ * allows this goal to be achieved, apart from the unpreemptible,
+ * outstanding requests mentioned above.
+ *
+ * Unfortunately, regardless of which of the above two goals one wants
+ * to achieve, service trees need first to be updated to know whether
+ * the in-service queue must be preempted. To have service trees
+ * correctly updated, the in-service queue must be expired and
+ * rescheduled, and bfqq must be scheduled too. This is one of the
+ * most costly operations (in future versions, the scheduling
+ * mechanism may be re-designed in such a way to make it possible to
+ * know whether preemption is needed without needing to update service
+ * trees). In addition, queue preemptions almost always cause random
+ * I/O, and thus loss of throughput. Because of these facts, the next
+ * function adopts the following simple scheme to avoid both costly
+ * operations and too frequent preemptions: it requests the expiration
+ * of the in-service queue (unconditionally) only for queues that need
+ * to recover a hole, or that either are weight-raised or deserve to
+ * be weight-raised.
+ */
+static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
+                                               struct bfq_queue *bfqq,
+                                               bool arrived_in_time,
+                                               bool wr_or_deserves_wr)
+{
+       struct bfq_entity *entity = &bfqq->entity;
+
+       if (bfq_bfqq_non_blocking_wait_rq(bfqq) && arrived_in_time) {
+               /*
+                * We do not clear the flag non_blocking_wait_rq here, as
+                * the latter is used in bfq_activate_bfqq to signal
+                * that timestamps need to be back-shifted (and is
+                * cleared right after).
+                */
+
+               /*
+                * In next assignment we rely on that either
+                * entity->service or entity->budget are not updated
+                * on expiration if bfqq is empty (see
+                * __bfq_bfqq_recalc_budget). Thus both quantities
+                * remain unchanged after such an expiration, and the
+                * following statement therefore assigns to
+                * entity->budget the remaining budget on such an
+                * expiration. For clarity, entity->service is not
+                * updated on expiration in any case, and, in normal
+                * operation, is reset only when bfqq is selected for
+                * service (see bfq_get_next_queue).
+                */
+               entity->budget = min_t(unsigned long,
+                                      bfq_bfqq_budget_left(bfqq),
+                                      bfqq->max_budget);
+
+               return true;
+       }
+
+       entity->budget = max_t(unsigned long, bfqq->max_budget,
+                              bfq_serv_to_charge(bfqq->next_rq, bfqq));
+       bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
+       return wr_or_deserves_wr;
+}
+
+static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+{
+       u64 dur;
+
+       if (bfqd->bfq_wr_max_time > 0)
+               return bfqd->bfq_wr_max_time;
+
+       dur = bfqd->RT_prod;
+       do_div(dur, bfqd->peak_rate);
+
+       /*
+        * Limit duration between 3 and 13 seconds. Tests show that
+        * higher values than 13 seconds often yield the opposite of
+        * the desired result, i.e., worsen responsiveness by letting
+        * non-interactive and non-soft-real-time applications
+        * preserve weight raising for a too long time interval.
+        *
+        * On the other end, lower values than 3 seconds make it
+        * difficult for most interactive tasks to complete their jobs
+        * before weight-raising finishes.
+        */
+       if (dur > msecs_to_jiffies(13000))
+               dur = msecs_to_jiffies(13000);
+       else if (dur < msecs_to_jiffies(3000))
+               dur = msecs_to_jiffies(3000);
+
+       return dur;
+}
+
+static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
+                                            struct bfq_queue *bfqq,
+                                            unsigned int old_wr_coeff,
+                                            bool wr_or_deserves_wr,
+                                            bool interactive,
+                                            bool in_burst,
+                                            bool soft_rt)
+{
+       if (old_wr_coeff == 1 && wr_or_deserves_wr) {
+               /* start a weight-raising period */
+               if (interactive) {
+                       bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+                       bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+               } else {
+                       bfqq->wr_start_at_switch_to_srt = jiffies;
+                       bfqq->wr_coeff = bfqd->bfq_wr_coeff *
+                               BFQ_SOFTRT_WEIGHT_FACTOR;
+                       bfqq->wr_cur_max_time =
+                               bfqd->bfq_wr_rt_max_time;
+               }
+
+               /*
+                * If needed, further reduce budget to make sure it is
+                * close to bfqq's backlog, so as to reduce the
+                * scheduling-error component due to a too large
+                * budget. Do not care about throughput consequences,
+                * but only about latency. Finally, do not assign a
+                * too small budget either, to avoid increasing
+                * latency by causing too frequent expirations.
+                */
+               bfqq->entity.budget = min_t(unsigned long,
+                                           bfqq->entity.budget,
+                                           2 * bfq_min_budget(bfqd));
+       } else if (old_wr_coeff > 1) {
+               if (interactive) { /* update wr coeff and duration */
+                       bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+                       bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+               } else if (in_burst)
+                       bfqq->wr_coeff = 1;
+               else if (soft_rt) {
+                       /*
+                        * The application is now or still meeting the
+                        * requirements for being deemed soft rt.  We
+                        * can then correctly and safely (re)charge
+                        * the weight-raising duration for the
+                        * application with the weight-raising
+                        * duration for soft rt applications.
+                        *
+                        * In particular, doing this recharge now, i.e.,
+                        * before the weight-raising period for the
+                        * application finishes, reduces the probability
+                        * of the following negative scenario:
+                        * 1) the weight of a soft rt application is
+                        *    raised at startup (as for any newly
+                        *    created application),
+                        * 2) since the application is not interactive,
+                        *    at a certain time weight-raising is
+                        *    stopped for the application,
+                        * 3) at that time the application happens to
+                        *    still have pending requests, and hence
+                        *    is destined to not have a chance to be
+                        *    deemed soft rt before these requests are
+                        *    completed (see the comments to the
+                        *    function bfq_bfqq_softrt_next_start()
+                        *    for details on soft rt detection),
+                        * 4) these pending requests experience a high
+                        *    latency because the application is not
+                        *    weight-raised while they are pending.
+                        */
+                       if (bfqq->wr_cur_max_time !=
+                               bfqd->bfq_wr_rt_max_time) {
+                               bfqq->wr_start_at_switch_to_srt =
+                                       bfqq->last_wr_start_finish;
+
+                               bfqq->wr_cur_max_time =
+                                       bfqd->bfq_wr_rt_max_time;
+                               bfqq->wr_coeff = bfqd->bfq_wr_coeff *
+                                       BFQ_SOFTRT_WEIGHT_FACTOR;
+                       }
+                       bfqq->last_wr_start_finish = jiffies;
+               }
+       }
+}
+
+static bool bfq_bfqq_idle_for_long_time(struct bfq_data *bfqd,
+                                       struct bfq_queue *bfqq)
+{
+       return bfqq->dispatched == 0 &&
+               time_is_before_jiffies(
+                       bfqq->budget_timeout +
+                       bfqd->bfq_wr_min_idle_time);
+}
+
+static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
+                                            struct bfq_queue *bfqq,
+                                            int old_wr_coeff,
+                                            struct request *rq,
+                                            bool *interactive)
+{
+       bool soft_rt, in_burst, wr_or_deserves_wr,
+               bfqq_wants_to_preempt,
+               idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq),
+               /*
+                * See the comments on
+                * bfq_bfqq_update_budg_for_activation for
+                * details on the usage of the next variable.
+                */
+               arrived_in_time =  ktime_get_ns() <=
+                       bfqq->ttime.last_end_request +
+                       bfqd->bfq_slice_idle * 3;
+
+       bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, rq->cmd_flags);
+
+       /*
+        * bfqq deserves to be weight-raised if:
+        * - it is sync,
+        * - it does not belong to a large burst,
+        * - it has been idle for enough time or is soft real-time,
+        * - is linked to a bfq_io_cq (it is not shared in any sense).
+        */
+       in_burst = bfq_bfqq_in_large_burst(bfqq);
+       soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
+               !in_burst &&
+               time_is_before_jiffies(bfqq->soft_rt_next_start);
+       *interactive = !in_burst && idle_for_long_time;
+       wr_or_deserves_wr = bfqd->low_latency &&
+               (bfqq->wr_coeff > 1 ||
+                (bfq_bfqq_sync(bfqq) &&
+                 bfqq->bic && (*interactive || soft_rt)));
+
+       /*
+        * Using the last flag, update budget and check whether bfqq
+        * may want to preempt the in-service queue.
+        */
+       bfqq_wants_to_preempt =
+               bfq_bfqq_update_budg_for_activation(bfqd, bfqq,
+                                                   arrived_in_time,
+                                                   wr_or_deserves_wr);
+
+       /*
+        * If bfqq happened to be activated in a burst, but has been
+        * idle for much more than an interactive queue, then we
+        * assume that, in the overall I/O initiated in the burst, the
+        * I/O associated with bfqq is finished. So bfqq does not need
+        * to be treated as a queue belonging to a burst
+        * anymore. Accordingly, we reset bfqq's in_large_burst flag
+        * if set, and remove bfqq from the burst list if it's
+        * there. We do not decrement burst_size, because the fact
+        * that bfqq does not need to belong to the burst list any
+        * more does not invalidate the fact that bfqq was created in
+        * a burst.
+        */
+       if (likely(!bfq_bfqq_just_created(bfqq)) &&
+           idle_for_long_time &&
+           time_is_before_jiffies(
+                   bfqq->budget_timeout +
+                   msecs_to_jiffies(10000))) {
+               hlist_del_init(&bfqq->burst_list_node);
+               bfq_clear_bfqq_in_large_burst(bfqq);
+       }
+
+       bfq_clear_bfqq_just_created(bfqq);
+
+
+       if (!bfq_bfqq_IO_bound(bfqq)) {
+               if (arrived_in_time) {
+                       bfqq->requests_within_timer++;
+                       if (bfqq->requests_within_timer >=
+                           bfqd->bfq_requests_within_timer)
+                               bfq_mark_bfqq_IO_bound(bfqq);
+               } else
+                       bfqq->requests_within_timer = 0;
+       }
+
+       if (bfqd->low_latency) {
+               if (unlikely(time_is_after_jiffies(bfqq->split_time)))
+                       /* wraparound */
+                       bfqq->split_time =
+                               jiffies - bfqd->bfq_wr_min_idle_time - 1;
+
+               if (time_is_before_jiffies(bfqq->split_time +
+                                          bfqd->bfq_wr_min_idle_time)) {
+                       bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq,
+                                                        old_wr_coeff,
+                                                        wr_or_deserves_wr,
+                                                        *interactive,
+                                                        in_burst,
+                                                        soft_rt);
+
+                       if (old_wr_coeff != bfqq->wr_coeff)
+                               bfqq->entity.prio_changed = 1;
+               }
+       }
+
+       bfqq->last_idle_bklogged = jiffies;
+       bfqq->service_from_backlogged = 0;
+       bfq_clear_bfqq_softrt_update(bfqq);
+
+       bfq_add_bfqq_busy(bfqd, bfqq);
+
+       /*
+        * Expire in-service queue only if preemption may be needed
+        * for guarantees. In this respect, the function
+        * next_queue_may_preempt just checks a simple, necessary
+        * condition, and not a sufficient condition based on
+        * timestamps. In fact, for the latter condition to be
+        * evaluated, timestamps would need first to be updated, and
+        * this operation is quite costly (see the comments on the
+        * function bfq_bfqq_update_budg_for_activation).
+        */
+       if (bfqd->in_service_queue && bfqq_wants_to_preempt &&
+           bfqd->in_service_queue->wr_coeff < bfqq->wr_coeff &&
+           next_queue_may_preempt(bfqd))
+               bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
+                               false, BFQQE_PREEMPTED);
+}
+
+static void bfq_add_request(struct request *rq)
+{
+       struct bfq_queue *bfqq = RQ_BFQQ(rq);
+       struct bfq_data *bfqd = bfqq->bfqd;
+       struct request *next_rq, *prev;
+       unsigned int old_wr_coeff = bfqq->wr_coeff;
+       bool interactive = false;
+
+       bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
+       bfqq->queued[rq_is_sync(rq)]++;
+       bfqd->queued++;
+
+       elv_rb_add(&bfqq->sort_list, rq);
+
+       /*
+        * Check if this request is a better next-serve candidate.
+        */
+       prev = bfqq->next_rq;
+       next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
+       bfqq->next_rq = next_rq;
+
+       /*
+        * Adjust priority tree position, if next_rq changes.
+        */
+       if (prev != bfqq->next_rq)
+               bfq_pos_tree_add_move(bfqd, bfqq);
+
+       if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */
+               bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff,
+                                                rq, &interactive);
+       else {
+               if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) &&
+                   time_is_before_jiffies(
+                               bfqq->last_wr_start_finish +
+                               bfqd->bfq_wr_min_inter_arr_async)) {
+                       bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+                       bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+
+                       bfqd->wr_busy_queues++;
+                       bfqq->entity.prio_changed = 1;
+               }
+               if (prev != bfqq->next_rq)
+                       bfq_updated_next_req(bfqd, bfqq);
+       }
+
+       /*
+        * Assign jiffies to last_wr_start_finish in the following
+        * cases:
+        *
+        * . if bfqq is not going to be weight-raised, because, for
+        *   non weight-raised queues, last_wr_start_finish stores the
+        *   arrival time of the last request; as of now, this piece
+        *   of information is used only for deciding whether to
+        *   weight-raise async queues
+        *
+        * . if bfqq is not weight-raised, because, if bfqq is now
+        *   switching to weight-raised, then last_wr_start_finish
+        *   stores the time when weight-raising starts
+        *
+        * . if bfqq is interactive, because, regardless of whether
+        *   bfqq is currently weight-raised, the weight-raising
+        *   period must start or restart (this case is considered
+        *   separately because it is not detected by the above
+        *   conditions, if bfqq is already weight-raised)
+        *
+        * last_wr_start_finish has to be updated also if bfqq is soft
+        * real-time, because the weight-raising period is constantly
+        * restarted on idle-to-busy transitions for these queues, but
+        * this is already done in bfq_bfqq_handle_idle_busy_switch if
+        * needed.
+        */
+       if (bfqd->low_latency &&
+               (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive))
+               bfqq->last_wr_start_finish = jiffies;
+}
+
+static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,
+                                         struct bio *bio,
+                                         struct request_queue *q)
+{
+       struct bfq_queue *bfqq = bfqd->bio_bfqq;
+
+
+       if (bfqq)
+               return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio));
+
+       return NULL;
+}
+
+static sector_t get_sdist(sector_t last_pos, struct request *rq)
+{
+       if (last_pos)
+               return abs(blk_rq_pos(rq) - last_pos);
+
+       return 0;
+}
+
+#if 0 /* Still not clear if we can do without next two functions */
+static void bfq_activate_request(struct request_queue *q, struct request *rq)
+{
+       struct bfq_data *bfqd = q->elevator->elevator_data;
+
+       bfqd->rq_in_driver++;
+}
+
+static void bfq_deactivate_request(struct request_queue *q, struct request *rq)
+{
+       struct bfq_data *bfqd = q->elevator->elevator_data;
+
+       bfqd->rq_in_driver--;
+}
+#endif
+
+static void bfq_remove_request(struct request_queue *q,
+                              struct request *rq)
+{
+       struct bfq_queue *bfqq = RQ_BFQQ(rq);
+       struct bfq_data *bfqd = bfqq->bfqd;
+       const int sync = rq_is_sync(rq);
+
+       if (bfqq->next_rq == rq) {
+               bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
+               bfq_updated_next_req(bfqd, bfqq);
+       }
+
+       if (rq->queuelist.prev != &rq->queuelist)
+               list_del_init(&rq->queuelist);
+       bfqq->queued[sync]--;
+       bfqd->queued--;
+       elv_rb_del(&bfqq->sort_list, rq);
+
+       elv_rqhash_del(q, rq);
+       if (q->last_merge == rq)
+               q->last_merge = NULL;
+
+       if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+               bfqq->next_rq = NULL;
+
+               if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) {
+                       bfq_del_bfqq_busy(bfqd, bfqq, false);
+                       /*
+                        * bfqq emptied. In normal operation, when
+                        * bfqq is empty, bfqq->entity.service and
+                        * bfqq->entity.budget must contain,
+                        * respectively, the service received and the
+                        * budget used last time bfqq emptied. These
+                        * facts do not hold in this case, as at least
+                        * this last removal occurred while bfqq is
+                        * not in service. To avoid inconsistencies,
+                        * reset both bfqq->entity.service and
+                        * bfqq->entity.budget, if bfqq has still a
+                        * process that may issue I/O requests to it.
+                        */
+                       bfqq->entity.budget = bfqq->entity.service = 0;
+               }
+
+               /*
+                * Remove queue from request-position tree as it is empty.
+                */
+               if (bfqq->pos_root) {
+                       rb_erase(&bfqq->pos_node, bfqq->pos_root);
+                       bfqq->pos_root = NULL;
+               }
+       }
+
+       if (rq->cmd_flags & REQ_META)
+               bfqq->meta_pending--;
+
+       bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
+}
+
+static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
+{
+       struct request_queue *q = hctx->queue;
+       struct bfq_data *bfqd = q->elevator->elevator_data;
+       struct request *free = NULL;
+       /*
+        * bfq_bic_lookup grabs the queue_lock: invoke it now and
+        * store its return value for later use, to avoid nesting
+        * queue_lock inside the bfqd->lock. We assume that the bic
+        * returned by bfq_bic_lookup does not go away before
+        * bfqd->lock is taken.
+        */
+       struct bfq_io_cq *bic = bfq_bic_lookup(bfqd, current->io_context, q);
+       bool ret;
+
+       spin_lock_irq(&bfqd->lock);
+
+       if (bic)
+               bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
+       else
+               bfqd->bio_bfqq = NULL;
+       bfqd->bio_bic = bic;
+
+       ret = blk_mq_sched_try_merge(q, bio, &free);
+
+       if (free)
+               blk_mq_free_request(free);
+       spin_unlock_irq(&bfqd->lock);
+
+       return ret;
+}
+
+static int bfq_request_merge(struct request_queue *q, struct request **req,
+                            struct bio *bio)
+{
+       struct bfq_data *bfqd = q->elevator->elevator_data;
+       struct request *__rq;
+
+       __rq = bfq_find_rq_fmerge(bfqd, bio, q);
+       if (__rq && elv_bio_merge_ok(__rq, bio)) {
+               *req = __rq;
+               return ELEVATOR_FRONT_MERGE;
+       }
+
+       return ELEVATOR_NO_MERGE;
+}
+
+static void bfq_request_merged(struct request_queue *q, struct request *req,
+                              enum elv_merge type)
+{
+       if (type == ELEVATOR_FRONT_MERGE &&
+           rb_prev(&req->rb_node) &&
+           blk_rq_pos(req) <
+           blk_rq_pos(container_of(rb_prev(&req->rb_node),
+                                   struct request, rb_node))) {
+               struct bfq_queue *bfqq = RQ_BFQQ(req);
+               struct bfq_data *bfqd = bfqq->bfqd;
+               struct request *prev, *next_rq;
+
+               /* Reposition request in its sort_list */
+               elv_rb_del(&bfqq->sort_list, req);
+               elv_rb_add(&bfqq->sort_list, req);
+
+               /* Choose next request to be served for bfqq */
+               prev = bfqq->next_rq;
+               next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req,
+                                        bfqd->last_position);
+               bfqq->next_rq = next_rq;
+               /*
+                * If next_rq changes, update both the queue's budget to
+                * fit the new request and the queue's position in its
+                * rq_pos_tree.
+                */
+               if (prev != bfqq->next_rq) {
+                       bfq_updated_next_req(bfqd, bfqq);
+                       bfq_pos_tree_add_move(bfqd, bfqq);
+               }
+       }
+}
+
+static void bfq_requests_merged(struct request_queue *q, struct request *rq,
+                               struct request *next)
+{
+       struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
+
+       if (!RB_EMPTY_NODE(&rq->rb_node))
+               goto end;
+       spin_lock_irq(&bfqq->bfqd->lock);
+
+       /*
+        * If next and rq belong to the same bfq_queue and next is older
+        * than rq, then reposition rq in the fifo (by substituting next
+        * with rq). Otherwise, if next and rq belong to different
+        * bfq_queues, never reposition rq: in fact, we would have to
+        * reposition it with respect to next's position in its own fifo,
+        * which would most certainly be too expensive with respect to
+        * the benefits.
+        */
+       if (bfqq == next_bfqq &&
+           !list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
+           next->fifo_time < rq->fifo_time) {
+               list_del_init(&rq->queuelist);
+               list_replace_init(&next->queuelist, &rq->queuelist);
+               rq->fifo_time = next->fifo_time;
+       }
+
+       if (bfqq->next_rq == next)
+               bfqq->next_rq = rq;
+
+       bfq_remove_request(q, next);
+
+       spin_unlock_irq(&bfqq->bfqd->lock);
+end:
+       bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
+}
+
+/* Must be called with bfqq != NULL */
+static void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
+{
+       if (bfq_bfqq_busy(bfqq))
+               bfqq->bfqd->wr_busy_queues--;
+       bfqq->wr_coeff = 1;
+       bfqq->wr_cur_max_time = 0;
+       bfqq->last_wr_start_finish = jiffies;
+       /*
+        * Trigger a weight change on the next invocation of
+        * __bfq_entity_update_weight_prio.
+        */
+       bfqq->entity.prio_changed = 1;
+}
+
+void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+                            struct bfq_group *bfqg)
+{
+       int i, j;
+
+       for (i = 0; i < 2; i++)
+               for (j = 0; j < IOPRIO_BE_NR; j++)
+                       if (bfqg->async_bfqq[i][j])
+                               bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]);
+       if (bfqg->async_idle_bfqq)
+               bfq_bfqq_end_wr(bfqg->async_idle_bfqq);
+}
+
+static void bfq_end_wr(struct bfq_data *bfqd)
+{
+       struct bfq_queue *bfqq;
+
+       spin_lock_irq(&bfqd->lock);
+
+       list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
+               bfq_bfqq_end_wr(bfqq);
+       list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)
+               bfq_bfqq_end_wr(bfqq);
+       bfq_end_wr_async(bfqd);
+
+       spin_unlock_irq(&bfqd->lock);
+}
+
+static sector_t bfq_io_struct_pos(void *io_struct, bool request)
+{
+       if (request)
+               return blk_rq_pos(io_struct);
+       else
+               return ((struct bio *)io_struct)->bi_iter.bi_sector;
+}
+
+static int bfq_rq_close_to_sector(void *io_struct, bool request,
+                                 sector_t sector)
+{
+       return abs(bfq_io_struct_pos(io_struct, request) - sector) <=
+              BFQQ_CLOSE_THR;
+}
+
+static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
+                                        struct bfq_queue *bfqq,
+                                        sector_t sector)
+{
+       struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
+       struct rb_node *parent, *node;
+       struct bfq_queue *__bfqq;
+
+       if (RB_EMPTY_ROOT(root))
+               return NULL;
+
+       /*
+        * First, if we find a request starting at the end of the last
+        * request, choose it.
+        */
+       __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);
+       if (__bfqq)
+               return __bfqq;
+
+       /*
+        * If the exact sector wasn't found, the parent of the NULL leaf
+        * will contain the closest sector (rq_pos_tree sorted by
+        * next_request position).
+        */
+       __bfqq = rb_entry(parent, struct bfq_queue, pos_node);
+       if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
+               return __bfqq;
+
+       if (blk_rq_pos(__bfqq->next_rq) < sector)
+               node = rb_next(&__bfqq->pos_node);
+       else
+               node = rb_prev(&__bfqq->pos_node);
+       if (!node)
+               return NULL;
+
+       __bfqq = rb_entry(node, struct bfq_queue, pos_node);
+       if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
+               return __bfqq;
+
+       return NULL;
+}
+
+static struct bfq_queue *bfq_find_close_cooperator(struct bfq_data *bfqd,
+                                                  struct bfq_queue *cur_bfqq,
+                                                  sector_t sector)
+{
+       struct bfq_queue *bfqq;
+
+       /*
+        * We shall notice if some of the queues are cooperating,
+        * e.g., working closely on the same area of the device. In
+        * that case, we can group them together and: 1) don't waste
+        * time idling, and 2) serve the union of their requests in
+        * the best possible order for throughput.
+        */
+       bfqq = bfqq_find_close(bfqd, cur_bfqq, sector);
+       if (!bfqq || bfqq == cur_bfqq)
+               return NULL;
+
+       return bfqq;
+}
+
+static struct bfq_queue *
+bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+{
+       int process_refs, new_process_refs;
+       struct bfq_queue *__bfqq;
+
+       /*
+        * If there are no process references on the new_bfqq, then it is
+        * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
+        * may have dropped their last reference (not just their last process
+        * reference).
+        */
+       if (!bfqq_process_refs(new_bfqq))
+               return NULL;
+
+       /* Avoid a circular list and skip interim queue merges. */
+       while ((__bfqq = new_bfqq->new_bfqq)) {
+               if (__bfqq == bfqq)
+                       return NULL;
+               new_bfqq = __bfqq;
+       }
+
+       process_refs = bfqq_process_refs(bfqq);
+       new_process_refs = bfqq_process_refs(new_bfqq);
+       /*
+        * If the process for the bfqq has gone away, there is no
+        * sense in merging the queues.
+        */
+       if (process_refs == 0 || new_process_refs == 0)
+               return NULL;
+
+       bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
+               new_bfqq->pid);
+
+       /*
+        * Merging is just a redirection: the requests of the process
+        * owning one of the two queues are redirected to the other queue.
+        * The latter queue, in its turn, is set as shared if this is the
+        * first time that the requests of some process are redirected to
+        * it.
+        *
+        * We redirect bfqq to new_bfqq and not the opposite, because
+        * we are in the context of the process owning bfqq, thus we
+        * have the io_cq of this process. So we can immediately
+        * configure this io_cq to redirect the requests of the
+        * process to new_bfqq. In contrast, the io_cq of new_bfqq is
+        * not available any more (new_bfqq->bic == NULL).
+        *
+        * Anyway, even in case new_bfqq coincides with the in-service
+        * queue, redirecting requests the in-service queue is the
+        * best option, as we feed the in-service queue with new
+        * requests close to the last request served and, by doing so,
+        * are likely to increase the throughput.
+        */
+       bfqq->new_bfqq = new_bfqq;
+       new_bfqq->ref += process_refs;
+       return new_bfqq;
+}
+
+static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+                                       struct bfq_queue *new_bfqq)
+{
+       if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) ||
+           (bfqq->ioprio_class != new_bfqq->ioprio_class))
+               return false;
+
+       /*
+        * If either of the queues has already been detected as seeky,
+        * then merging it with the other queue is unlikely to lead to
+        * sequential I/O.
+        */
+       if (BFQQ_SEEKY(bfqq) || BFQQ_SEEKY(new_bfqq))
+               return false;
+
+       /*
+        * Interleaved I/O is known to be done by (some) applications
+        * only for reads, so it does not make sense to merge async
+        * queues.
+        */
+       if (!bfq_bfqq_sync(bfqq) || !bfq_bfqq_sync(new_bfqq))
+               return false;
+
+       return true;
+}
+
+/*
+ * If this function returns true, then bfqq cannot be merged. The idea
+ * is that true cooperation happens very early after processes start
+ * to do I/O. Usually, late cooperations are just accidental false
+ * positives. In case bfqq is weight-raised, such false positives
+ * would evidently degrade latency guarantees for bfqq.
+ */
+static bool wr_from_too_long(struct bfq_queue *bfqq)
+{
+       return bfqq->wr_coeff > 1 &&
+               time_is_before_jiffies(bfqq->last_wr_start_finish +
+                                      msecs_to_jiffies(100));
+}
+
+/*
+ * Attempt to schedule a merge of bfqq with the currently in-service
+ * queue or with a close queue among the scheduled queues.  Return
+ * NULL if no merge was scheduled, a pointer to the shared bfq_queue
+ * structure otherwise.
+ *
+ * The OOM queue is not allowed to participate to cooperation: in fact, since
+ * the requests temporarily redirected to the OOM queue could be redirected
+ * again to dedicated queues at any time, the state needed to correctly
+ * handle merging with the OOM queue would be quite complex and expensive
+ * to maintain. Besides, in such a critical condition as an out of memory,
+ * the benefits of queue merging may be little relevant, or even negligible.
+ *
+ * Weight-raised queues can be merged only if their weight-raising
+ * period has just started. In fact cooperating processes are usually
+ * started together. Thus, with this filter we avoid false positives
+ * that would jeopardize low-latency guarantees.
+ *
+ * WARNING: queue merging may impair fairness among non-weight raised
+ * queues, for at least two reasons: 1) the original weight of a
+ * merged queue may change during the merged state, 2) even being the
+ * weight the same, a merged queue may be bloated with many more
+ * requests than the ones produced by its originally-associated
+ * process.
+ */
+static struct bfq_queue *
+bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                    void *io_struct, bool request)
+{
+       struct bfq_queue *in_service_bfqq, *new_bfqq;
+
+       if (bfqq->new_bfqq)
+               return bfqq->new_bfqq;
+
+       if (!io_struct ||
+           wr_from_too_long(bfqq) ||
+           unlikely(bfqq == &bfqd->oom_bfqq))
+               return NULL;
+
+       /* If there is only one backlogged queue, don't search. */
+       if (bfqd->busy_queues == 1)
+               return NULL;
+
+       in_service_bfqq = bfqd->in_service_queue;
+
+       if (!in_service_bfqq || in_service_bfqq == bfqq
+           || wr_from_too_long(in_service_bfqq) ||
+           unlikely(in_service_bfqq == &bfqd->oom_bfqq))
+               goto check_scheduled;
+
+       if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
+           bfqq->entity.parent == in_service_bfqq->entity.parent &&
+           bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) {
+               new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
+               if (new_bfqq)
+                       return new_bfqq;
+       }
+       /*
+        * Check whether there is a cooperator among currently scheduled
+        * queues. The only thing we need is that the bio/request is not
+        * NULL, as we need it to establish whether a cooperator exists.
+        */
+check_scheduled:
+       new_bfqq = bfq_find_close_cooperator(bfqd, bfqq,
+                       bfq_io_struct_pos(io_struct, request));
+
+       if (new_bfqq && !wr_from_too_long(new_bfqq) &&
+           likely(new_bfqq != &bfqd->oom_bfqq) &&
+           bfq_may_be_close_cooperator(bfqq, new_bfqq))
+               return bfq_setup_merge(bfqq, new_bfqq);
+
+       return NULL;
+}
+
+static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
+{
+       struct bfq_io_cq *bic = bfqq->bic;
+
+       /*
+        * If !bfqq->bic, the queue is already shared or its requests
+        * have already been redirected to a shared queue; both idle window
+        * and weight raising state have already been saved. Do nothing.
+        */
+       if (!bic)
+               return;
+
+       bic->saved_ttime = bfqq->ttime;
+       bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
+       bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
+       bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
+       bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
+       bic->saved_wr_coeff = bfqq->wr_coeff;
+       bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt;
+       bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
+       bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
+}
+
+static void
+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+               struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+{
+       bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
+               (unsigned long)new_bfqq->pid);
+       /* Save weight raising and idle window of the merged queues */
+       bfq_bfqq_save_state(bfqq);
+       bfq_bfqq_save_state(new_bfqq);
+       if (bfq_bfqq_IO_bound(bfqq))
+               bfq_mark_bfqq_IO_bound(new_bfqq);
+       bfq_clear_bfqq_IO_bound(bfqq);
+
+       /*
+        * If bfqq is weight-raised, then let new_bfqq inherit
+        * weight-raising. To reduce false positives, neglect the case
+        * where bfqq has just been created, but has not yet made it
+        * to be weight-raised (which may happen because EQM may merge
+        * bfqq even before bfq_add_request is executed for the first
+        * time for bfqq). Handling this case would however be very
+        * easy, thanks to the flag just_created.
+        */
+       if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) {
+               new_bfqq->wr_coeff = bfqq->wr_coeff;
+               new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time;
+               new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish;
+               new_bfqq->wr_start_at_switch_to_srt =
+                       bfqq->wr_start_at_switch_to_srt;
+               if (bfq_bfqq_busy(new_bfqq))
+                       bfqd->wr_busy_queues++;
+               new_bfqq->entity.prio_changed = 1;
+       }
+
+       if (bfqq->wr_coeff > 1) { /* bfqq has given its wr to new_bfqq */
+               bfqq->wr_coeff = 1;
+               bfqq->entity.prio_changed = 1;
+               if (bfq_bfqq_busy(bfqq))
+                       bfqd->wr_busy_queues--;
+       }
+
+       bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d",
+                    bfqd->wr_busy_queues);
+
+       /*
+        * Merge queues (that is, let bic redirect its requests to new_bfqq)
+        */
+       bic_set_bfqq(bic, new_bfqq, 1);
+       bfq_mark_bfqq_coop(new_bfqq);
+       /*
+        * new_bfqq now belongs to at least two bics (it is a shared queue):
+        * set new_bfqq->bic to NULL. bfqq either:
+        * - does not belong to any bic any more, and hence bfqq->bic must
+        *   be set to NULL, or
+        * - is a queue whose owning bics have already been redirected to a
+        *   different queue, hence the queue is destined to not belong to
+        *   any bic soon and bfqq->bic is already NULL (therefore the next
+        *   assignment causes no harm).
+        */
+       new_bfqq->bic = NULL;
+       bfqq->bic = NULL;
+       /* release process reference to bfqq */
+       bfq_put_queue(bfqq);
+}
+
+static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
+                               struct bio *bio)
+{
+       struct bfq_data *bfqd = q->elevator->elevator_data;
+       bool is_sync = op_is_sync(bio->bi_opf);
+       struct bfq_queue *bfqq = bfqd->bio_bfqq, *new_bfqq;
+
+       /*
+        * Disallow merge of a sync bio into an async request.
+        */
+       if (is_sync && !rq_is_sync(rq))
+               return false;
+
+       /*
+        * Lookup the bfqq that this bio will be queued with. Allow
+        * merge only if rq is queued there.
+        */
+       if (!bfqq)
+               return false;
+
+       /*
+        * We take advantage of this function to perform an early merge
+        * of the queues of possible cooperating processes.
+        */
+       new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);
+       if (new_bfqq) {
+               /*
+                * bic still points to bfqq, then it has not yet been
+                * redirected to some other bfq_queue, and a queue
+                * merge beween bfqq and new_bfqq can be safely
+                * fulfillled, i.e., bic can be redirected to new_bfqq
+                * and bfqq can be put.
+                */
+               bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq,
+                               new_bfqq);
+               /*
+                * If we get here, bio will be queued into new_queue,
+                * so use new_bfqq to decide whether bio and rq can be
+                * merged.
+                */
+               bfqq = new_bfqq;
+
+               /*
+                * Change also bqfd->bio_bfqq, as
+                * bfqd->bio_bic now points to new_bfqq, and
+                * this function may be invoked again (and then may
+                * use again bqfd->bio_bfqq).
+                */
+               bfqd->bio_bfqq = bfqq;
+       }
+
+       return bfqq == RQ_BFQQ(rq);
+}
+
+/*
+ * Set the maximum time for the in-service queue to consume its
+ * budget. This prevents seeky processes from lowering the throughput.
+ * In practice, a time-slice service scheme is used with seeky
+ * processes.
+ */
+static void bfq_set_budget_timeout(struct bfq_data *bfqd,
+                                  struct bfq_queue *bfqq)
+{
+       unsigned int timeout_coeff;
+
+       if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time)
+               timeout_coeff = 1;
+       else
+               timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
+
+       bfqd->last_budget_start = ktime_get();
+
+       bfqq->budget_timeout = jiffies +
+               bfqd->bfq_timeout * timeout_coeff;
+}
+
+static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
+                                      struct bfq_queue *bfqq)
+{
+       if (bfqq) {
+               bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
+               bfq_clear_bfqq_fifo_expire(bfqq);
+
+               bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8;
+
+               if (time_is_before_jiffies(bfqq->last_wr_start_finish) &&
+                   bfqq->wr_coeff > 1 &&
+                   bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
+                   time_is_before_jiffies(bfqq->budget_timeout)) {
+                       /*
+                        * For soft real-time queues, move the start
+                        * of the weight-raising period forward by the
+                        * time the queue has not received any
+                        * service. Otherwise, a relatively long
+                        * service delay is likely to cause the
+                        * weight-raising period of the queue to end,
+                        * because of the short duration of the
+                        * weight-raising period of a soft real-time
+                        * queue.  It is worth noting that this move
+                        * is not so dangerous for the other queues,
+                        * because soft real-time queues are not
+                        * greedy.
+                        *
+                        * To not add a further variable, we use the
+                        * overloaded field budget_timeout to
+                        * determine for how long the queue has not
+                        * received service, i.e., how much time has
+                        * elapsed since the queue expired. However,
+                        * this is a little imprecise, because
+                        * budget_timeout is set to jiffies if bfqq
+                        * not only expires, but also remains with no
+                        * request.
+                        */
+                       if (time_after(bfqq->budget_timeout,
+                                      bfqq->last_wr_start_finish))
+                               bfqq->last_wr_start_finish +=
+                                       jiffies - bfqq->budget_timeout;
+                       else
+                               bfqq->last_wr_start_finish = jiffies;
+               }
+
+               bfq_set_budget_timeout(bfqd, bfqq);
+               bfq_log_bfqq(bfqd, bfqq,
+                            "set_in_service_queue, cur-budget = %d",
+                            bfqq->entity.budget);
+       }
+
+       bfqd->in_service_queue = bfqq;
+}
+
+/*
+ * Get and set a new queue for service.
+ */
+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
+{
+       struct bfq_queue *bfqq = bfq_get_next_queue(bfqd);
+
+       __bfq_set_in_service_queue(bfqd, bfqq);
+       return bfqq;
+}
+
+static void bfq_arm_slice_timer(struct bfq_data *bfqd)
+{
+       struct bfq_queue *bfqq = bfqd->in_service_queue;
+       u32 sl;
+
+       bfq_mark_bfqq_wait_request(bfqq);
+
+       /*
+        * We don't want to idle for seeks, but we do want to allow
+        * fair distribution of slice time for a process doing back-to-back
+        * seeks. So allow a little bit of time for him to submit a new rq.
+        */
+       sl = bfqd->bfq_slice_idle;
+       /*
+        * Unless the queue is being weight-raised or the scenario is
+        * asymmetric, grant only minimum idle time if the queue
+        * is seeky. A long idling is preserved for a weight-raised
+        * queue, or, more in general, in an asymmetric scenario,
+        * because a long idling is needed for guaranteeing to a queue
+        * its reserved share of the throughput (in particular, it is
+        * needed if the queue has a higher weight than some other
+        * queue).
+        */
+       if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 &&
+           bfq_symmetric_scenario(bfqd))
+               sl = min_t(u64, sl, BFQ_MIN_TT);
+
+       bfqd->last_idling_start = ktime_get();
+       hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl),
+                     HRTIMER_MODE_REL);
+       bfqg_stats_set_start_idle_time(bfqq_group(bfqq));
+}
+
+/*
+ * In autotuning mode, max_budget is dynamically recomputed as the
+ * amount of sectors transferred in timeout at the estimated peak
+ * rate. This enables BFQ to utilize a full timeslice with a full
+ * budget, even if the in-service queue is served at peak rate. And
+ * this maximises throughput with sequential workloads.
+ */
+static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd)
+{
+       return (u64)bfqd->peak_rate * USEC_PER_MSEC *
+               jiffies_to_msecs(bfqd->bfq_timeout)>>BFQ_RATE_SHIFT;
+}
+
+/*
+ * Update parameters related to throughput and responsiveness, as a
+ * function of the estimated peak rate. See comments on
+ * bfq_calc_max_budget(), and on T_slow and T_fast arrays.
+ */
+static void update_thr_responsiveness_params(struct bfq_data *bfqd)
+{
+       int dev_type = blk_queue_nonrot(bfqd->queue);
+
+       if (bfqd->bfq_user_max_budget == 0)
+               bfqd->bfq_max_budget =
+                       bfq_calc_max_budget(bfqd);
+
+       if (bfqd->device_speed == BFQ_BFQD_FAST &&
+           bfqd->peak_rate < device_speed_thresh[dev_type]) {
+               bfqd->device_speed = BFQ_BFQD_SLOW;
+               bfqd->RT_prod = R_slow[dev_type] *
+                       T_slow[dev_type];
+       } else if (bfqd->device_speed == BFQ_BFQD_SLOW &&
+                  bfqd->peak_rate > device_speed_thresh[dev_type]) {
+               bfqd->device_speed = BFQ_BFQD_FAST;
+               bfqd->RT_prod = R_fast[dev_type] *
+                       T_fast[dev_type];
+       }
+
+       bfq_log(bfqd,
+"dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec",
+               dev_type == 0 ? "ROT" : "NONROT",
+               bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW",
+               bfqd->device_speed == BFQ_BFQD_FAST ?
+               (USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT :
+               (USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT,
+               (USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>>
+               BFQ_RATE_SHIFT);
+}
+
+static void bfq_reset_rate_computation(struct bfq_data *bfqd,
+                                      struct request *rq)
+{
+       if (rq != NULL) { /* new rq dispatch now, reset accordingly */
+               bfqd->last_dispatch = bfqd->first_dispatch = ktime_get_ns();
+               bfqd->peak_rate_samples = 1;
+               bfqd->sequential_samples = 0;
+               bfqd->tot_sectors_dispatched = bfqd->last_rq_max_size =
+                       blk_rq_sectors(rq);
+       } else /* no new rq dispatched, just reset the number of samples */
+               bfqd->peak_rate_samples = 0; /* full re-init on next disp. */
+
+       bfq_log(bfqd,
+               "reset_rate_computation at end, sample %u/%u tot_sects %llu",
+               bfqd->peak_rate_samples, bfqd->sequential_samples,
+               bfqd->tot_sectors_dispatched);
+}
+
+static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
+{
+       u32 rate, weight, divisor;
+
+       /*
+        * For the convergence property to hold (see comments on
+        * bfq_update_peak_rate()) and for the assessment to be
+        * reliable, a minimum number of samples must be present, and
+        * a minimum amount of time must have elapsed. If not so, do
+        * not compute new rate. Just reset parameters, to get ready
+        * for a new evaluation attempt.
+        */
+       if (bfqd->peak_rate_samples < BFQ_RATE_MIN_SAMPLES ||
+           bfqd->delta_from_first < BFQ_RATE_MIN_INTERVAL)
+               goto reset_computation;
+
+       /*
+        * If a new request completion has occurred after last
+        * dispatch, then, to approximate the rate at which requests
+        * have been served by the device, it is more precise to
+        * extend the observation interval to the last completion.
+        */
+       bfqd->delta_from_first =
+               max_t(u64, bfqd->delta_from_first,
+                     bfqd->last_completion - bfqd->first_dispatch);
+
+       /*
+        * Rate computed in sects/usec, and not sects/nsec, for
+        * precision issues.
+        */
+       rate = div64_ul(bfqd->tot_sectors_dispatched<<BFQ_RATE_SHIFT,
+                       div_u64(bfqd->delta_from_first, NSEC_PER_USEC));
+
+       /*
+        * Peak rate not updated if:
+        * - the percentage of sequential dispatches is below 3/4 of the
+        *   total, and rate is below the current estimated peak rate
+        * - rate is unreasonably high (> 20M sectors/sec)
+        */
+       if ((bfqd->sequential_samples < (3 * bfqd->peak_rate_samples)>>2 &&
+            rate <= bfqd->peak_rate) ||
+               rate > 20<<BFQ_RATE_SHIFT)
+               goto reset_computation;
+
+       /*
+        * We have to update the peak rate, at last! To this purpose,
+        * we use a low-pass filter. We compute the smoothing constant
+        * of the filter as a function of the 'weight' of the new
+        * measured rate.
+        *
+        * As can be seen in next formulas, we define this weight as a
+        * quantity proportional to how sequential the workload is,
+        * and to how long the observation time interval is.
+        *
+        * The weight runs from 0 to 8. The maximum value of the
+        * weight, 8, yields the minimum value for the smoothing
+        * constant. At this minimum value for the smoothing constant,
+        * the measured rate contributes for half of the next value of
+        * the estimated peak rate.
+        *
+        * So, the first step is to compute the weight as a function
+        * of how sequential the workload is. Note that the weight
+        * cannot reach 9, because bfqd->sequential_samples cannot
+        * become equal to bfqd->peak_rate_samples, which, in its
+        * turn, holds true because bfqd->sequential_samples is not
+        * incremented for the first sample.
+        */
+       weight = (9 * bfqd->sequential_samples) / bfqd->peak_rate_samples;
+
+       /*
+        * Second step: further refine the weight as a function of the
+        * duration of the observation interval.
+        */
+       weight = min_t(u32, 8,
+                      div_u64(weight * bfqd->delta_from_first,
+                              BFQ_RATE_REF_INTERVAL));
+
+       /*
+        * Divisor ranging from 10, for minimum weight, to 2, for
+        * maximum weight.
+        */
+       divisor = 10 - weight;
+
+       /*
+        * Finally, update peak rate:
+        *
+        * peak_rate = peak_rate * (divisor-1) / divisor  +  rate / divisor
+        */
+       bfqd->peak_rate *= divisor-1;
+       bfqd->peak_rate /= divisor;
+       rate /= divisor; /* smoothing constant alpha = 1/divisor */
+
+       bfqd->peak_rate += rate;
+       update_thr_responsiveness_params(bfqd);
+
+reset_computation:
+       bfq_reset_rate_computation(bfqd, rq);
+}
+
+/*
+ * Update the read/write peak rate (the main quantity used for
+ * auto-tuning, see update_thr_responsiveness_params()).
+ *
+ * It is not trivial to estimate the peak rate (correctly): because of
+ * the presence of sw and hw queues between the scheduler and the
+ * device components that finally serve I/O requests, it is hard to
+ * say exactly when a given dispatched request is served inside the
+ * device, and for how long. As a consequence, it is hard to know
+ * precisely at what rate a given set of requests is actually served
+ * by the device.
+ *
+ * On the opposite end, the dispatch time of any request is trivially
+ * available, and, from this piece of information, the "dispatch rate"
+ * of requests can be immediately computed. So, the idea in the next
+ * function is to use what is known, namely request dispatch times
+ * (plus, when useful, request completion times), to estimate what is
+ * unknown, namely in-device request service rate.
+ *
+ * The main issue is that, because of the above facts, the rate at
+ * which a certain set of requests is dispatched over a certain time
+ * interval can vary greatly with respect to the rate at which the
+ * same requests are then served. But, since the size of any
+ * intermediate queue is limited, and the service scheme is lossless
+ * (no request is silently dropped), the following obvious convergence
+ * property holds: the number of requests dispatched MUST become
+ * closer and closer to the number of requests completed as the
+ * observation interval grows. This is the key property used in
+ * the next function to estimate the peak service rate as a function
+ * of the observed dispatch rate. The function assumes to be invoked
+ * on every request dispatch.
+ */
+static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq)
+{
+       u64 now_ns = ktime_get_ns();
+
+       if (bfqd->peak_rate_samples == 0) { /* first dispatch */
+               bfq_log(bfqd, "update_peak_rate: goto reset, samples %d",
+                       bfqd->peak_rate_samples);
+               bfq_reset_rate_computation(bfqd, rq);
+               goto update_last_values; /* will add one sample */
+       }
+
+       /*
+        * Device idle for very long: the observation interval lasting
+        * up to this dispatch cannot be a valid observation interval
+        * for computing a new peak rate (similarly to the late-
+        * completion event in bfq_completed_request()). Go to
+        * update_rate_and_reset to have the following three steps
+        * taken:
+        * - close the observation interval at the last (previous)
+        *   request dispatch or completion
+        * - compute rate, if possible, for that observation interval
+        * - start a new observation interval with this dispatch
+        */
+       if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC &&
+           bfqd->rq_in_driver == 0)
+               goto update_rate_and_reset;
+
+       /* Update sampling information */
+       bfqd->peak_rate_samples++;
+
+       if ((bfqd->rq_in_driver > 0 ||
+               now_ns - bfqd->last_completion < BFQ_MIN_TT)
+            && get_sdist(bfqd->last_position, rq) < BFQQ_SEEK_THR)
+               bfqd->sequential_samples++;
+
+       bfqd->tot_sectors_dispatched += blk_rq_sectors(rq);
+
+       /* Reset max observed rq size every 32 dispatches */
+       if (likely(bfqd->peak_rate_samples % 32))
+               bfqd->last_rq_max_size =
+                       max_t(u32, blk_rq_sectors(rq), bfqd->last_rq_max_size);
+       else
+               bfqd->last_rq_max_size = blk_rq_sectors(rq);
+
+       bfqd->delta_from_first = now_ns - bfqd->first_dispatch;
+
+       /* Target observation interval not yet reached, go on sampling */
+       if (bfqd->delta_from_first < BFQ_RATE_REF_INTERVAL)
+               goto update_last_values;
+
+update_rate_and_reset:
+       bfq_update_rate_reset(bfqd, rq);
+update_last_values:
+       bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
+       bfqd->last_dispatch = now_ns;
+}
+
+/*
+ * Remove request from internal lists.
+ */
+static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
+{
+       struct bfq_queue *bfqq = RQ_BFQQ(rq);
+
+       /*
+        * For consistency, the next instruction should have been
+        * executed after removing the request from the queue and
+        * dispatching it.  We execute instead this instruction before
+        * bfq_remove_request() (and hence introduce a temporary
+        * inconsistency), for efficiency.  In fact, should this
+        * dispatch occur for a non in-service bfqq, this anticipated
+        * increment prevents two counters related to bfqq->dispatched
+        * from risking to be, first, uselessly decremented, and then
+        * incremented again when the (new) value of bfqq->dispatched
+        * happens to be taken into account.
+        */
+       bfqq->dispatched++;
+       bfq_update_peak_rate(q->elevator->elevator_data, rq);
+
+       bfq_remove_request(q, rq);
+}
+
+static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+       /*
+        * If this bfqq is shared between multiple processes, check
+        * to make sure that those processes are still issuing I/Os
+        * within the mean seek distance. If not, it may be time to
+        * break the queues apart again.
+        */
+       if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))
+               bfq_mark_bfqq_split_coop(bfqq);
+
+       if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+               if (bfqq->dispatched == 0)
+                       /*
+                        * Overloading budget_timeout field to store
+                        * the time at which the queue remains with no
+                        * backlog and no outstanding request; used by
+                        * the weight-raising mechanism.
+                        */
+                       bfqq->budget_timeout = jiffies;
+
+               bfq_del_bfqq_busy(bfqd, bfqq, true);
+       } else {
+               bfq_requeue_bfqq(bfqd, bfqq);
+               /*
+                * Resort priority tree of potential close cooperators.
+                */
+               bfq_pos_tree_add_move(bfqd, bfqq);
+       }
+
+       /*
+        * All in-service entities must have been properly deactivated
+        * or requeued before executing the next function, which
+        * resets all in-service entites as no more in service.
+        */
+       __bfq_bfqd_reset_in_service(bfqd);
+}
+
+/**
+ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior.
+ * @bfqd: device data.
+ * @bfqq: queue to update.
+ * @reason: reason for expiration.
+ *
+ * Handle the feedback on @bfqq budget at queue expiration.
+ * See the body for detailed comments.
+ */
+static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+                                    struct bfq_queue *bfqq,
+                                    enum bfqq_expiration reason)
+{
+       struct request *next_rq;
+       int budget, min_budget;
+
+       min_budget = bfq_min_budget(bfqd);
+
+       if (bfqq->wr_coeff == 1)
+               budget = bfqq->max_budget;
+       else /*
+             * Use a constant, low budget for weight-raised queues,
+             * to help achieve a low latency. Keep it slightly higher
+             * than the minimum possible budget, to cause a little
+             * bit fewer expirations.
+             */
+               budget = 2 * min_budget;
+
+       bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d",
+               bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
+       bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d",
+               budget, bfq_min_budget(bfqd));
+       bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
+               bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
+
+       if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) {
+               switch (reason) {
+               /*
+                * Caveat: in all the following cases we trade latency
+                * for throughput.
+                */
+               case BFQQE_TOO_IDLE:
+                       /*
+                        * This is the only case where we may reduce
+                        * the budget: if there is no request of the
+                        * process still waiting for completion, then
+                        * we assume (tentatively) that the timer has
+                        * expired because the batch of requests of
+                        * the process could have been served with a
+                        * smaller budget.  Hence, betting that
+                        * process will behave in the same way when it
+                        * becomes backlogged again, we reduce its
+                        * next budget.  As long as we guess right,
+                        * this budget cut reduces the latency
+                        * experienced by the process.
+                        *
+                        * However, if there are still outstanding
+                        * requests, then the process may have not yet
+                        * issued its next request just because it is
+                        * still waiting for the completion of some of
+                        * the still outstanding ones.  So in this
+                        * subcase we do not reduce its budget, on the
+                        * contrary we increase it to possibly boost
+                        * the throughput, as discussed in the
+                        * comments to the BUDGET_TIMEOUT case.
+                        */
+                       if (bfqq->dispatched > 0) /* still outstanding reqs */
+                               budget = min(budget * 2, bfqd->bfq_max_budget);
+                       else {
+                               if (budget > 5 * min_budget)
+                                       budget -= 4 * min_budget;
+                               else
+                                       budget = min_budget;
+                       }
+                       break;
+               case BFQQE_BUDGET_TIMEOUT:
+                       /*
+                        * We double the budget here because it gives
+                        * the chance to boost the throughput if this
+                        * is not a seeky process (and has bumped into
+                        * this timeout because of, e.g., ZBR).
+                        */
+                       budget = min(budget * 2, bfqd->bfq_max_budget);
+                       break;
+               case BFQQE_BUDGET_EXHAUSTED:
+                       /*
+                        * The process still has backlog, and did not
+                        * let either the budget timeout or the disk
+                        * idling timeout expire. Hence it is not
+                        * seeky, has a short thinktime and may be
+                        * happy with a higher budget too. So
+                        * definitely increase the budget of this good
+                        * candidate to boost the disk throughput.
+                        */
+                       budget = min(budget * 4, bfqd->bfq_max_budget);
+                       break;
+               case BFQQE_NO_MORE_REQUESTS:
+                       /*
+                        * For queues that expire for this reason, it
+                        * is particularly important to keep the
+                        * budget close to the actual service they
+                        * need. Doing so reduces the timestamp
+                        * misalignment problem described in the
+                        * comments in the body of
+                        * __bfq_activate_entity. In fact, suppose
+                        * that a queue systematically expires for
+                        * BFQQE_NO_MORE_REQUESTS and presents a
+                        * new request in time to enjoy timestamp
+                        * back-shifting. The larger the budget of the
+                        * queue is with respect to the service the
+                        * queue actually requests in each service
+                        * slot, the more times the queue can be
+                        * reactivated with the same virtual finish
+                        * time. It follows that, even if this finish
+                        * time is pushed to the system virtual time
+                        * to reduce the consequent timestamp
+                        * misalignment, the queue unjustly enjoys for
+                        * many re-activations a lower finish time
+                        * than all newly activated queues.
+                        *
+                        * The service needed by bfqq is measured
+                        * quite precisely by bfqq->entity.service.
+                        * Since bfqq does not enjoy device idling,
+                        * bfqq->entity.service is equal to the number
+                        * of sectors that the process associated with
+                        * bfqq requested to read/write before waiting
+                        * for request completions, or blocking for
+                        * other reasons.
+                        */
+                       budget = max_t(int, bfqq->entity.service, min_budget);
+                       break;
+               default:
+                       return;
+               }
+       } else if (!bfq_bfqq_sync(bfqq)) {
+               /*
+                * Async queues get always the maximum possible
+                * budget, as for them we do not care about latency
+                * (in addition, their ability to dispatch is limited
+                * by the charging factor).
+                */
+               budget = bfqd->bfq_max_budget;
+       }
+
+       bfqq->max_budget = budget;
+
+       if (bfqd->budgets_assigned >= bfq_stats_min_budgets &&
+           !bfqd->bfq_user_max_budget)
+               bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget);
+
+       /*
+        * If there is still backlog, then assign a new budget, making
+        * sure that it is large enough for the next request.  Since
+        * the finish time of bfqq must be kept in sync with the
+        * budget, be sure to call __bfq_bfqq_expire() *after* this
+        * update.
+        *
+        * If there is no backlog, then no need to update the budget;
+        * it will be updated on the arrival of a new request.
+        */
+       next_rq = bfqq->next_rq;
+       if (next_rq)
+               bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
+                                           bfq_serv_to_charge(next_rq, bfqq));
+
+       bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %d",
+                       next_rq ? blk_rq_sectors(next_rq) : 0,
+                       bfqq->entity.budget);
+}
+
+/*
+ * Return true if the process associated with bfqq is "slow". The slow
+ * flag is used, in addition to the budget timeout, to reduce the
+ * amount of service provided to seeky processes, and thus reduce
+ * their chances to lower the throughput. More details in the comments
+ * on the function bfq_bfqq_expire().
+ *
+ * An important observation is in order: as discussed in the comments
+ * on the function bfq_update_peak_rate(), with devices with internal
+ * queues, it is hard if ever possible to know when and for how long
+ * an I/O request is processed by the device (apart from the trivial
+ * I/O pattern where a new request is dispatched only after the
+ * previous one has been completed). This makes it hard to evaluate
+ * the real rate at which the I/O requests of each bfq_queue are
+ * served.  In fact, for an I/O scheduler like BFQ, serving a
+ * bfq_queue means just dispatching its requests during its service
+ * slot (i.e., until the budget of the queue is exhausted, or the
+ * queue remains idle, or, finally, a timeout fires). But, during the
+ * service slot of a bfq_queue, around 100 ms at most, the device may
+ * be even still processing requests of bfq_queues served in previous
+ * service slots. On the opposite end, the requests of the in-service
+ * bfq_queue may be completed after the service slot of the queue
+ * finishes.
+ *
+ * Anyway, unless more sophisticated solutions are used
+ * (where possible), the sum of the sizes of the requests dispatched
+ * during the service slot of a bfq_queue is probably the only
+ * approximation available for the service received by the bfq_queue
+ * during its service slot. And this sum is the quantity used in this
+ * function to evaluate the I/O speed of a process.
+ */
+static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                                bool compensate, enum bfqq_expiration reason,
+                                unsigned long *delta_ms)
+{
+       ktime_t delta_ktime;
+       u32 delta_usecs;
+       bool slow = BFQQ_SEEKY(bfqq); /* if delta too short, use seekyness */
+
+       if (!bfq_bfqq_sync(bfqq))
+               return false;
+
+       if (compensate)
+               delta_ktime = bfqd->last_idling_start;
+       else
+               delta_ktime = ktime_get();
+       delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start);
+       delta_usecs = ktime_to_us(delta_ktime);
+
+       /* don't use too short time intervals */
+       if (delta_usecs < 1000) {
+               if (blk_queue_nonrot(bfqd->queue))
+                        /*
+                         * give same worst-case guarantees as idling
+                         * for seeky
+                         */
+                       *delta_ms = BFQ_MIN_TT / NSEC_PER_MSEC;
+               else /* charge at least one seek */
+                       *delta_ms = bfq_slice_idle / NSEC_PER_MSEC;
+
+               return slow;
+       }
+
+       *delta_ms = delta_usecs / USEC_PER_MSEC;
+
+       /*
+        * Use only long (> 20ms) intervals to filter out excessive
+        * spikes in service rate estimation.
+        */
+       if (delta_usecs > 20000) {
+               /*
+                * Caveat for rotational devices: processes doing I/O
+                * in the slower disk zones tend to be slow(er) even
+                * if not seeky. In this respect, the estimated peak
+                * rate is likely to be an average over the disk
+                * surface. Accordingly, to not be too harsh with
+                * unlucky processes, a process is deemed slow only if
+                * its rate has been lower than half of the estimated
+                * peak rate.
+                */
+               slow = bfqq->entity.service < bfqd->bfq_max_budget / 2;
+       }
+
+       bfq_log_bfqq(bfqd, bfqq, "bfq_bfqq_is_slow: slow %d", slow);
+
+       return slow;
+}
+
+/*
+ * To be deemed as soft real-time, an application must meet two
+ * requirements. First, the application must not require an average
+ * bandwidth higher than the approximate bandwidth required to playback or
+ * record a compressed high-definition video.
+ * The next function is invoked on the completion of the last request of a
+ * batch, to compute the next-start time instant, soft_rt_next_start, such
+ * that, if the next request of the application does not arrive before
+ * soft_rt_next_start, then the above requirement on the bandwidth is met.
+ *
+ * The second requirement is that the request pattern of the application is
+ * isochronous, i.e., that, after issuing a request or a batch of requests,
+ * the application stops issuing new requests until all its pending requests
+ * have been completed. After that, the application may issue a new batch,
+ * and so on.
+ * For this reason the next function is invoked to compute
+ * soft_rt_next_start only for applications that meet this requirement,
+ * whereas soft_rt_next_start is set to infinity for applications that do
+ * not.
+ *
+ * Unfortunately, even a greedy application may happen to behave in an
+ * isochronous way if the CPU load is high. In fact, the application may
+ * stop issuing requests while the CPUs are busy serving other processes,
+ * then restart, then stop again for a while, and so on. In addition, if
+ * the disk achieves a low enough throughput with the request pattern
+ * issued by the application (e.g., because the request pattern is random
+ * and/or the device is slow), then the application may meet the above
+ * bandwidth requirement too. To prevent such a greedy application to be
+ * deemed as soft real-time, a further rule is used in the computation of
+ * soft_rt_next_start: soft_rt_next_start must be higher than the current
+ * time plus the maximum time for which the arrival of a request is waited
+ * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle.
+ * This filters out greedy applications, as the latter issue instead their
+ * next request as soon as possible after the last one has been completed
+ * (in contrast, when a batch of requests is completed, a soft real-time
+ * application spends some time processing data).
+ *
+ * Unfortunately, the last filter may easily generate false positives if
+ * only bfqd->bfq_slice_idle is used as a reference time interval and one
+ * or both the following cases occur:
+ * 1) HZ is so low that the duration of a jiffy is comparable to or higher
+ *    than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with
+ *    HZ=100.
+ * 2) jiffies, instead of increasing at a constant rate, may stop increasing
+ *    for a while, then suddenly 'jump' by several units to recover the lost
+ *    increments. This seems to happen, e.g., inside virtual machines.
+ * To address this issue, we do not use as a reference time interval just
+ * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In
+ * particular we add the minimum number of jiffies for which the filter
+ * seems to be quite precise also in embedded systems and KVM/QEMU virtual
+ * machines.
+ */
+static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
+                                               struct bfq_queue *bfqq)
+{
+       return max(bfqq->last_idle_bklogged +
+                  HZ * bfqq->service_from_backlogged /
+                  bfqd->bfq_wr_max_softrt_rate,
+                  jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4);
+}
+
+/*
+ * Return the farthest future time instant according to jiffies
+ * macros.
+ */
+static unsigned long bfq_greatest_from_now(void)
+{
+       return jiffies + MAX_JIFFY_OFFSET;
+}
+
+/*
+ * Return the farthest past time instant according to jiffies
+ * macros.
+ */
+static unsigned long bfq_smallest_from_now(void)
+{
+       return jiffies - MAX_JIFFY_OFFSET;
+}
+
+/**
+ * bfq_bfqq_expire - expire a queue.
+ * @bfqd: device owning the queue.
+ * @bfqq: the queue to expire.
+ * @compensate: if true, compensate for the time spent idling.
+ * @reason: the reason causing the expiration.
+ *
+ * If the process associated with bfqq does slow I/O (e.g., because it
+ * issues random requests), we charge bfqq with the time it has been
+ * in service instead of the service it has received (see
+ * bfq_bfqq_charge_time for details on how this goal is achieved). As
+ * a consequence, bfqq will typically get higher timestamps upon
+ * reactivation, and hence it will be rescheduled as if it had
+ * received more service than what it has actually received. In the
+ * end, bfqq receives less service in proportion to how slowly its
+ * associated process consumes its budgets (and hence how seriously it
+ * tends to lower the throughput). In addition, this time-charging
+ * strategy guarantees time fairness among slow processes. In
+ * contrast, if the process associated with bfqq is not slow, we
+ * charge bfqq exactly with the service it has received.
+ *
+ * Charging time to the first type of queues and the exact service to
+ * the other has the effect of using the WF2Q+ policy to schedule the
+ * former on a timeslice basis, without violating service domain
+ * guarantees among the latter.
+ */
+void bfq_bfqq_expire(struct bfq_data *bfqd,
+                    struct bfq_queue *bfqq,
+                    bool compensate,
+                    enum bfqq_expiration reason)
+{
+       bool slow;
+       unsigned long delta = 0;
+       struct bfq_entity *entity = &bfqq->entity;
+       int ref;
+
+       /*
+        * Check whether the process is slow (see bfq_bfqq_is_slow).
+        */
+       slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta);
+
+       /*
+        * Increase service_from_backlogged before next statement,
+        * because the possible next invocation of
+        * bfq_bfqq_charge_time would likely inflate
+        * entity->service. In contrast, service_from_backlogged must
+        * contain real service, to enable the soft real-time
+        * heuristic to correctly compute the bandwidth consumed by
+        * bfqq.
+        */
+       bfqq->service_from_backlogged += entity->service;
+
+       /*
+        * As above explained, charge slow (typically seeky) and
+        * timed-out queues with the time and not the service
+        * received, to favor sequential workloads.
+        *
+        * Processes doing I/O in the slower disk zones will tend to
+        * be slow(er) even if not seeky. Therefore, since the
+        * estimated peak rate is actually an average over the disk
+        * surface, these processes may timeout just for bad luck. To
+        * avoid punishing them, do not charge time to processes that
+        * succeeded in consuming at least 2/3 of their budget. This
+        * allows BFQ to preserve enough elasticity to still perform
+        * bandwidth, and not time, distribution with little unlucky
+        * or quasi-sequential processes.
+        */
+       if (bfqq->wr_coeff == 1 &&
+           (slow ||
+            (reason == BFQQE_BUDGET_TIMEOUT &&
+             bfq_bfqq_budget_left(bfqq) >=  entity->budget / 3)))
+               bfq_bfqq_charge_time(bfqd, bfqq, delta);
+
+       if (reason == BFQQE_TOO_IDLE &&
+           entity->service <= 2 * entity->budget / 10)
+               bfq_clear_bfqq_IO_bound(bfqq);
+
+       if (bfqd->low_latency && bfqq->wr_coeff == 1)
+               bfqq->last_wr_start_finish = jiffies;
+
+       if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 &&
+           RB_EMPTY_ROOT(&bfqq->sort_list)) {
+               /*
+                * If we get here, and there are no outstanding
+                * requests, then the request pattern is isochronous
+                * (see the comments on the function
+                * bfq_bfqq_softrt_next_start()). Thus we can compute
+                * soft_rt_next_start. If, instead, the queue still
+                * has outstanding requests, then we have to wait for
+                * the completion of all the outstanding requests to
+                * discover whether the request pattern is actually
+                * isochronous.
+                */
+               if (bfqq->dispatched == 0)
+                       bfqq->soft_rt_next_start =
+                               bfq_bfqq_softrt_next_start(bfqd, bfqq);
+               else {
+                       /*
+                        * The application is still waiting for the
+                        * completion of one or more requests:
+                        * prevent it from possibly being incorrectly
+                        * deemed as soft real-time by setting its
+                        * soft_rt_next_start to infinity. In fact,
+                        * without this assignment, the application
+                        * would be incorrectly deemed as soft
+                        * real-time if:
+                        * 1) it issued a new request before the
+                        *    completion of all its in-flight
+                        *    requests, and
+                        * 2) at that time, its soft_rt_next_start
+                        *    happened to be in the past.
+                        */
+                       bfqq->soft_rt_next_start =
+                               bfq_greatest_from_now();
+                       /*
+                        * Schedule an update of soft_rt_next_start to when
+                        * the task may be discovered to be isochronous.
+                        */
+                       bfq_mark_bfqq_softrt_update(bfqq);
+               }
+       }
+
+       bfq_log_bfqq(bfqd, bfqq,
+               "expire (%d, slow %d, num_disp %d, idle_win %d)", reason,
+               slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
+
+       /*
+        * Increase, decrease or leave budget unchanged according to
+        * reason.
+        */
+       __bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
+       ref = bfqq->ref;
+       __bfq_bfqq_expire(bfqd, bfqq);
+
+       /* mark bfqq as waiting a request only if a bic still points to it */
+       if (ref > 1 && !bfq_bfqq_busy(bfqq) &&
+           reason != BFQQE_BUDGET_TIMEOUT &&
+           reason != BFQQE_BUDGET_EXHAUSTED)
+               bfq_mark_bfqq_non_blocking_wait_rq(bfqq);
+}
+
+/*
+ * Budget timeout is not implemented through a dedicated timer, but
+ * just checked on request arrivals and completions, as well as on
+ * idle timer expirations.
+ */
+static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
+{
+       return time_is_before_eq_jiffies(bfqq->budget_timeout);
+}
+
+/*
+ * If we expire a queue that is actively waiting (i.e., with the
+ * device idled) for the arrival of a new request, then we may incur
+ * the timestamp misalignment problem described in the body of the
+ * function __bfq_activate_entity. Hence we return true only if this
+ * condition does not hold, or if the queue is slow enough to deserve
+ * only to be kicked off for preserving a high throughput.
+ */
+static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
+{
+       bfq_log_bfqq(bfqq->bfqd, bfqq,
+               "may_budget_timeout: wait_request %d left %d timeout %d",
+               bfq_bfqq_wait_request(bfqq),
+                       bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3,
+               bfq_bfqq_budget_timeout(bfqq));
+
+       return (!bfq_bfqq_wait_request(bfqq) ||
+               bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3)
+               &&
+               bfq_bfqq_budget_timeout(bfqq);
+}
+
+/*
+ * For a queue that becomes empty, device idling is allowed only if
+ * this function returns true for the queue. As a consequence, since
+ * device idling plays a critical role in both throughput boosting and
+ * service guarantees, the return value of this function plays a
+ * critical role in both these aspects as well.
+ *
+ * In a nutshell, this function returns true only if idling is
+ * beneficial for throughput or, even if detrimental for throughput,
+ * idling is however necessary to preserve service guarantees (low
+ * latency, desired throughput distribution, ...). In particular, on
+ * NCQ-capable devices, this function tries to return false, so as to
+ * help keep the drives' internal queues full, whenever this helps the
+ * device boost the throughput without causing any service-guarantee
+ * issue.
+ *
+ * In more detail, the return value of this function is obtained by,
+ * first, computing a number of boolean variables that take into
+ * account throughput and service-guarantee issues, and, then,
+ * combining these variables in a logical expression. Most of the
+ * issues taken into account are not trivial. We discuss these issues
+ * individually while introducing the variables.
+ */
+static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+{
+       struct bfq_data *bfqd = bfqq->bfqd;
+       bool idling_boosts_thr, idling_boosts_thr_without_issues,
+               idling_needed_for_service_guarantees,
+               asymmetric_scenario;
+
+       if (bfqd->strict_guarantees)
+               return true;
+
+       /*
+        * The next variable takes into account the cases where idling
+        * boosts the throughput.
+        *
+        * The value of the variable is computed considering, first, that
+        * idling is virtually always beneficial for the throughput if:
+        * (a) the device is not NCQ-capable, or
+        * (b) regardless of the presence of NCQ, the device is rotational
+        *     and the request pattern for bfqq is I/O-bound and sequential.
+        *
+        * Secondly, and in contrast to the above item (b), idling an
+        * NCQ-capable flash-based device would not boost the
+        * throughput even with sequential I/O; rather it would lower
+        * the throughput in proportion to how fast the device
+        * is. Accordingly, the next variable is true if any of the
+        * above conditions (a) and (b) is true, and, in particular,
+        * happens to be false if bfqd is an NCQ-capable flash-based
+        * device.
+        */
+       idling_boosts_thr = !bfqd->hw_tag ||
+               (!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) &&
+                bfq_bfqq_idle_window(bfqq));
+
+       /*
+        * The value of the next variable,
+        * idling_boosts_thr_without_issues, is equal to that of
+        * idling_boosts_thr, unless a special case holds. In this
+        * special case, described below, idling may cause problems to
+        * weight-raised queues.
+        *
+        * When the request pool is saturated (e.g., in the presence
+        * of write hogs), if the processes associated with
+        * non-weight-raised queues ask for requests at a lower rate,
+        * then processes associated with weight-raised queues have a
+        * higher probability to get a request from the pool
+        * immediately (or at least soon) when they need one. Thus
+        * they have a higher probability to actually get a fraction
+        * of the device throughput proportional to their high
+        * weight. This is especially true with NCQ-capable drives,
+        * which enqueue several requests in advance, and further
+        * reorder internally-queued requests.
+        *
+        * For this reason, we force to false the value of
+        * idling_boosts_thr_without_issues if there are weight-raised
+        * busy queues. In this case, and if bfqq is not weight-raised,
+        * this guarantees that the device is not idled for bfqq (if,
+        * instead, bfqq is weight-raised, then idling will be
+        * guaranteed by another variable, see below). Combined with
+        * the timestamping rules of BFQ (see [1] for details), this
+        * behavior causes bfqq, and hence any sync non-weight-raised
+        * queue, to get a lower number of requests served, and thus
+        * to ask for a lower number of requests from the request
+        * pool, before the busy weight-raised queues get served
+        * again. This often mitigates starvation problems in the
+        * presence of heavy write workloads and NCQ, thereby
+        * guaranteeing a higher application and system responsiveness
+        * in these hostile scenarios.
+        */
+       idling_boosts_thr_without_issues = idling_boosts_thr &&
+               bfqd->wr_busy_queues == 0;
+
+       /*
+        * There is then a case where idling must be performed not
+        * for throughput concerns, but to preserve service
+        * guarantees.
+        *
+        * To introduce this case, we can note that allowing the drive
+        * to enqueue more than one request at a time, and hence
+        * delegating de facto final scheduling decisions to the
+        * drive's internal scheduler, entails loss of control on the
+        * actual request service order. In particular, the critical
+        * situation is when requests from different processes happen
+        * to be present, at the same time, in the internal queue(s)
+        * of the drive. In such a situation, the drive, by deciding
+        * the service order of the internally-queued requests, does
+        * determine also the actual throughput distribution among
+        * these processes. But the drive typically has no notion or
+        * concern about per-process throughput distribution, and
+        * makes its decisions only on a per-request basis. Therefore,
+        * the service distribution enforced by the drive's internal
+        * scheduler is likely to coincide with the desired
+        * device-throughput distribution only in a completely
+        * symmetric scenario where:
+        * (i)  each of these processes must get the same throughput as
+        *      the others;
+        * (ii) all these processes have the same I/O pattern
+               (either sequential or random).
+        * In fact, in such a scenario, the drive will tend to treat
+        * the requests of each of these processes in about the same
+        * way as the requests of the others, and thus to provide
+        * each of these processes with about the same throughput
+        * (which is exactly the desired throughput distribution). In
+        * contrast, in any asymmetric scenario, device idling is
+        * certainly needed to guarantee that bfqq receives its
+        * assigned fraction of the device throughput (see [1] for
+        * details).
+        *
+        * We address this issue by controlling, actually, only the
+        * symmetry sub-condition (i), i.e., provided that
+        * sub-condition (i) holds, idling is not performed,
+        * regardless of whether sub-condition (ii) holds. In other
+        * words, only if sub-condition (i) holds, then idling is
+        * allowed, and the device tends to be prevented from queueing
+        * many requests, possibly of several processes. The reason
+        * for not controlling also sub-condition (ii) is that we
+        * exploit preemption to preserve guarantees in case of
+        * symmetric scenarios, even if (ii) does not hold, as
+        * explained in the next two paragraphs.
+        *
+        * Even if a queue, say Q, is expired when it remains idle, Q
+        * can still preempt the new in-service queue if the next
+        * request of Q arrives soon (see the comments on
+        * bfq_bfqq_update_budg_for_activation). If all queues and
+        * groups have the same weight, this form of preemption,
+        * combined with the hole-recovery heuristic described in the
+        * comments on function bfq_bfqq_update_budg_for_activation,
+        * are enough to preserve a correct bandwidth distribution in
+        * the mid term, even without idling. In fact, even if not
+        * idling allows the internal queues of the device to contain
+        * many requests, and thus to reorder requests, we can rather
+        * safely assume that the internal scheduler still preserves a
+        * minimum of mid-term fairness. The motivation for using
+        * preemption instead of idling is that, by not idling,
+        * service guarantees are preserved without minimally
+        * sacrificing throughput. In other words, both a high
+        * throughput and its desired distribution are obtained.
+        *
+        * More precisely, this preemption-based, idleless approach
+        * provides fairness in terms of IOPS, and not sectors per
+        * second. This can be seen with a simple example. Suppose
+        * that there are two queues with the same weight, but that
+        * the first queue receives requests of 8 sectors, while the
+        * second queue receives requests of 1024 sectors. In
+        * addition, suppose that each of the two queues contains at
+        * most one request at a time, which implies that each queue
+        * always remains idle after it is served. Finally, after
+        * remaining idle, each queue receives very quickly a new
+        * request. It follows that the two queues are served
+        * alternatively, preempting each other if needed. This
+        * implies that, although both queues have the same weight,
+        * the queue with large requests receives a service that is
+        * 1024/8 times as high as the service received by the other
+        * queue.
+        *
+        * On the other hand, device idling is performed, and thus
+        * pure sector-domain guarantees are provided, for the
+        * following queues, which are likely to need stronger
+        * throughput guarantees: weight-raised queues, and queues
+        * with a higher weight than other queues. When such queues
+        * are active, sub-condition (i) is false, which triggers
+        * device idling.
+        *
+        * According to the above considerations, the next variable is
+        * true (only) if sub-condition (i) holds. To compute the
+        * value of this variable, we not only use the return value of
+        * the function bfq_symmetric_scenario(), but also check
+        * whether bfqq is being weight-raised, because
+        * bfq_symmetric_scenario() does not take into account also
+        * weight-raised queues (see comments on
+        * bfq_weights_tree_add()).
+        *
+        * As a side note, it is worth considering that the above
+        * device-idling countermeasures may however fail in the
+        * following unlucky scenario: if idling is (correctly)
+        * disabled in a time period during which all symmetry
+        * sub-conditions hold, and hence the device is allowed to
+        * enqueue many requests, but at some later point in time some
+        * sub-condition stops to hold, then it may become impossible
+        * to let requests be served in the desired order until all
+        * the requests already queued in the device have been served.
+        */
+       asymmetric_scenario = bfqq->wr_coeff > 1 ||
+               !bfq_symmetric_scenario(bfqd);
+
+       /*
+        * Finally, there is a case where maximizing throughput is the
+        * best choice even if it may cause unfairness toward
+        * bfqq. Such a case is when bfqq became active in a burst of
+        * queue activations. Queues that became active during a large
+        * burst benefit only from throughput, as discussed in the
+        * comments on bfq_handle_burst. Thus, if bfqq became active
+        * in a burst and not idling the device maximizes throughput,
+        * then the device must no be idled, because not idling the
+        * device provides bfqq and all other queues in the burst with
+        * maximum benefit. Combining this and the above case, we can
+        * now establish when idling is actually needed to preserve
+        * service guarantees.
+        */
+       idling_needed_for_service_guarantees =
+               asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
+
+       /*
+        * We have now all the components we need to compute the return
+        * value of the function, which is true only if both the following
+        * conditions hold:
+        * 1) bfqq is sync, because idling make sense only for sync queues;
+        * 2) idling either boosts the throughput (without issues), or
+        *    is necessary to preserve service guarantees.
+        */
+       return bfq_bfqq_sync(bfqq) &&
+               (idling_boosts_thr_without_issues ||
+                idling_needed_for_service_guarantees);
+}
+
+/*
+ * If the in-service queue is empty but the function bfq_bfqq_may_idle
+ * returns true, then:
+ * 1) the queue must remain in service and cannot be expired, and
+ * 2) the device must be idled to wait for the possible arrival of a new
+ *    request for the queue.
+ * See the comments on the function bfq_bfqq_may_idle for the reasons
+ * why performing device idling is the best choice to boost the throughput
+ * and preserve service guarantees when bfq_bfqq_may_idle itself
+ * returns true.
+ */
+static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
+{
+       struct bfq_data *bfqd = bfqq->bfqd;
+
+       return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 &&
+              bfq_bfqq_may_idle(bfqq);
+}
+
+/*
+ * Select a queue for service.  If we have a current queue in service,
+ * check whether to continue servicing it, or retrieve and set a new one.
+ */
+static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
+{
+       struct bfq_queue *bfqq;
+       struct request *next_rq;
+       enum bfqq_expiration reason = BFQQE_BUDGET_TIMEOUT;
+
+       bfqq = bfqd->in_service_queue;
+       if (!bfqq)
+               goto new_queue;
+
+       bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");
+
+       if (bfq_may_expire_for_budg_timeout(bfqq) &&
+           !bfq_bfqq_wait_request(bfqq) &&
+           !bfq_bfqq_must_idle(bfqq))
+               goto expire;
+
+check_queue:
+       /*
+        * This loop is rarely executed more than once. Even when it
+        * happens, it is much more convenient to re-execute this loop
+        * than to return NULL and trigger a new dispatch to get a
+        * request served.
+        */
+       next_rq = bfqq->next_rq;
+       /*
+        * If bfqq has requests queued and it has enough budget left to
+        * serve them, keep the queue, otherwise expire it.
+        */
+       if (next_rq) {
+               if (bfq_serv_to_charge(next_rq, bfqq) >
+                       bfq_bfqq_budget_left(bfqq)) {
+                       /*
+                        * Expire the queue for budget exhaustion,
+                        * which makes sure that the next budget is
+                        * enough to serve the next request, even if
+                        * it comes from the fifo expired path.
+                        */
+                       reason = BFQQE_BUDGET_EXHAUSTED;
+                       goto expire;
+               } else {
+                       /*
+                        * The idle timer may be pending because we may
+                        * not disable disk idling even when a new request
+                        * arrives.
+                        */
+                       if (bfq_bfqq_wait_request(bfqq)) {
+                               /*
+                                * If we get here: 1) at least a new request
+                                * has arrived but we have not disabled the
+                                * timer because the request was too small,
+                                * 2) then the block layer has unplugged
+                                * the device, causing the dispatch to be
+                                * invoked.
+                                *
+                                * Since the device is unplugged, now the
+                                * requests are probably large enough to
+                                * provide a reasonable throughput.
+                                * So we disable idling.
+                                */
+                               bfq_clear_bfqq_wait_request(bfqq);
+                               hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
+                               bfqg_stats_update_idle_time(bfqq_group(bfqq));
+                       }
+                       goto keep_queue;
+               }
+       }
+
+       /*
+        * No requests pending. However, if the in-service queue is idling
+        * for a new request, or has requests waiting for a completion and
+        * may idle after their completion, then keep it anyway.
+        */
+       if (bfq_bfqq_wait_request(bfqq) ||
+           (bfqq->dispatched != 0 && bfq_bfqq_may_idle(bfqq))) {
+               bfqq = NULL;
+               goto keep_queue;
+       }
+
+       reason = BFQQE_NO_MORE_REQUESTS;
+expire:
+       bfq_bfqq_expire(bfqd, bfqq, false, reason);
+new_queue:
+       bfqq = bfq_set_in_service_queue(bfqd);
+       if (bfqq) {
+               bfq_log_bfqq(bfqd, bfqq, "select_queue: checking new queue");
+               goto check_queue;
+       }
+keep_queue:
+       if (bfqq)
+               bfq_log_bfqq(bfqd, bfqq, "select_queue: returned this queue");
+       else
+               bfq_log(bfqd, "select_queue: no queue returned");
+
+       return bfqq;
+}
+
+static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+       struct bfq_entity *entity = &bfqq->entity;
+
+       if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */
+               bfq_log_bfqq(bfqd, bfqq,
+                       "raising period dur %u/%u msec, old coeff %u, w %d(%d)",
+                       jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
+                       jiffies_to_msecs(bfqq->wr_cur_max_time),
+                       bfqq->wr_coeff,
+                       bfqq->entity.weight, bfqq->entity.orig_weight);
+
+               if (entity->prio_changed)
+                       bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
+
+               /*
+                * If the queue was activated in a burst, or too much
+                * time has elapsed from the beginning of this
+                * weight-raising period, then end weight raising.
+                */
+               if (bfq_bfqq_in_large_burst(bfqq))
+                       bfq_bfqq_end_wr(bfqq);
+               else if (time_is_before_jiffies(bfqq->last_wr_start_finish +
+                                               bfqq->wr_cur_max_time)) {
+                       if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time ||
+                       time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt +
+                                              bfq_wr_duration(bfqd)))
+                               bfq_bfqq_end_wr(bfqq);
+                       else {
+                               /* switch back to interactive wr */
+                               bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+                               bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+                               bfqq->last_wr_start_finish =
+                                       bfqq->wr_start_at_switch_to_srt;
+                               bfqq->entity.prio_changed = 1;
+                       }
+               }
+       }
+       /* Update weight both if it must be raised and if it must be lowered */
+       if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
+               __bfq_entity_update_weight_prio(
+                       bfq_entity_service_tree(entity),
+                       entity);
+}
+
+/*
+ * Dispatch next request from bfqq.
+ */
+static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd,
+                                                struct bfq_queue *bfqq)
+{
+       struct request *rq = bfqq->next_rq;
+       unsigned long service_to_charge;
+
+       service_to_charge = bfq_serv_to_charge(rq, bfqq);
+
+       bfq_bfqq_served(bfqq, service_to_charge);
+
+       bfq_dispatch_remove(bfqd->queue, rq);
+
+       /*
+        * If weight raising has to terminate for bfqq, then next
+        * function causes an immediate update of bfqq's weight,
+        * without waiting for next activation. As a consequence, on
+        * expiration, bfqq will be timestamped as if has never been
+        * weight-raised during this service slot, even if it has
+        * received part or even most of the service as a
+        * weight-raised queue. This inflates bfqq's timestamps, which
+        * is beneficial, as bfqq is then more willing to leave the
+        * device immediately to possible other weight-raised queues.
+        */
+       bfq_update_wr_data(bfqd, bfqq);
+
+       /*
+        * Expire bfqq, pretending that its budget expired, if bfqq
+        * belongs to CLASS_IDLE and other queues are waiting for
+        * service.
+        */
+       if (bfqd->busy_queues > 1 && bfq_class_idle(bfqq))
+               goto expire;
+
+       return rq;
+
+expire:
+       bfq_bfqq_expire(bfqd, bfqq, false, BFQQE_BUDGET_EXHAUSTED);
+       return rq;
+}
+
+static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
+{
+       struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+
+       /*
+        * Avoiding lock: a race on bfqd->busy_queues should cause at
+        * most a call to dispatch for nothing
+        */
+       return !list_empty_careful(&bfqd->dispatch) ||
+               bfqd->busy_queues > 0;
+}
+
+static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
+{
+       struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+       struct request *rq = NULL;
+       struct bfq_queue *bfqq = NULL;
+
+       if (!list_empty(&bfqd->dispatch)) {
+               rq = list_first_entry(&bfqd->dispatch, struct request,
+                                     queuelist);
+               list_del_init(&rq->queuelist);
+
+               bfqq = RQ_BFQQ(rq);
+
+               if (bfqq) {
+                       /*
+                        * Increment counters here, because this
+                        * dispatch does not follow the standard
+                        * dispatch flow (where counters are
+                        * incremented)
+                        */
+                       bfqq->dispatched++;
+
+                       goto inc_in_driver_start_rq;
+               }
+
+               /*
+                * We exploit the put_rq_private hook to decrement
+                * rq_in_driver, but put_rq_private will not be
+                * invoked on this request. So, to avoid unbalance,
+                * just start this request, without incrementing
+                * rq_in_driver. As a negative consequence,
+                * rq_in_driver is deceptively lower than it should be
+                * while this request is in service. This may cause
+                * bfq_schedule_dispatch to be invoked uselessly.
+                *
+                * As for implementing an exact solution, the
+                * put_request hook, if defined, is probably invoked
+                * also on this request. So, by exploiting this hook,
+                * we could 1) increment rq_in_driver here, and 2)
+                * decrement it in put_request. Such a solution would
+                * let the value of the counter be always accurate,
+                * but it would entail using an extra interface
+                * function. This cost seems higher than the benefit,
+                * being the frequency of non-elevator-private
+                * requests very low.
+                */
+               goto start_rq;
+       }
+
+       bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
+
+       if (bfqd->busy_queues == 0)
+               goto exit;
+
+       /*
+        * Force device to serve one request at a time if
+        * strict_guarantees is true. Forcing this service scheme is
+        * currently the ONLY way to guarantee that the request
+        * service order enforced by the scheduler is respected by a
+        * queueing device. Otherwise the device is free even to make
+        * some unlucky request wait for as long as the device
+        * wishes.
+        *
+        * Of course, serving one request at at time may cause loss of
+        * throughput.
+        */
+       if (bfqd->strict_guarantees && bfqd->rq_in_driver > 0)
+               goto exit;
+
+       bfqq = bfq_select_queue(bfqd);
+       if (!bfqq)
+               goto exit;
+
+       rq = bfq_dispatch_rq_from_bfqq(bfqd, bfqq);
+
+       if (rq) {
+inc_in_driver_start_rq:
+               bfqd->rq_in_driver++;
+start_rq:
+               rq->rq_flags |= RQF_STARTED;
+       }
+exit:
+       return rq;
+}
+
+static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
+{
+       struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+       struct request *rq;
+
+       spin_lock_irq(&bfqd->lock);
+
+       rq = __bfq_dispatch_request(hctx);
+       spin_unlock_irq(&bfqd->lock);
+
+       return rq;
+}
+
+/*
+ * Task holds one reference to the queue, dropped when task exits.  Each rq
+ * in-flight on this queue also holds a reference, dropped when rq is freed.
+ *
+ * Scheduler lock must be held here. Recall not to use bfqq after calling
+ * this function on it.
+ */
+void bfq_put_queue(struct bfq_queue *bfqq)
+{
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       struct bfq_group *bfqg = bfqq_group(bfqq);
+#endif
+
+       if (bfqq->bfqd)
+               bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d",
+                            bfqq, bfqq->ref);
+
+       bfqq->ref--;
+       if (bfqq->ref)
+               return;
+
+       if (bfq_bfqq_sync(bfqq))
+               /*
+                * The fact that this queue is being destroyed does not
+                * invalidate the fact that this queue may have been
+                * activated during the current burst. As a consequence,
+                * although the queue does not exist anymore, and hence
+                * needs to be removed from the burst list if there,
+                * the burst size has not to be decremented.
+                */
+               hlist_del_init(&bfqq->burst_list_node);
+
+       kmem_cache_free(bfq_pool, bfqq);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       bfqg_put(bfqg);
+#endif
+}
+
+static void bfq_put_cooperator(struct bfq_queue *bfqq)
+{
+       struct bfq_queue *__bfqq, *next;
+
+       /*
+        * If this queue was scheduled to merge with another queue, be
+        * sure to drop the reference taken on that queue (and others in
+        * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.
+        */
+       __bfqq = bfqq->new_bfqq;
+       while (__bfqq) {
+               if (__bfqq == bfqq)
+                       break;
+               next = __bfqq->new_bfqq;
+               bfq_put_queue(__bfqq);
+               __bfqq = next;
+       }
+}
+
+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+       if (bfqq == bfqd->in_service_queue) {
+               __bfq_bfqq_expire(bfqd, bfqq);
+               bfq_schedule_dispatch(bfqd);
+       }
+
+       bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref);
+
+       bfq_put_cooperator(bfqq);
+
+       bfq_put_queue(bfqq); /* release process reference */
+}
+
+static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
+{
+       struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync);
+       struct bfq_data *bfqd;
+
+       if (bfqq)
+               bfqd = bfqq->bfqd; /* NULL if scheduler already exited */
+
+       if (bfqq && bfqd) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&bfqd->lock, flags);
+               bfq_exit_bfqq(bfqd, bfqq);
+               bic_set_bfqq(bic, NULL, is_sync);
+               spin_unlock_irqrestore(&bfqd->lock, flags);
+       }
+}
+
+static void bfq_exit_icq(struct io_cq *icq)
+{
+       struct bfq_io_cq *bic = icq_to_bic(icq);
+
+       bfq_exit_icq_bfqq(bic, true);
+       bfq_exit_icq_bfqq(bic, false);
+}
+
+/*
+ * Update the entity prio values; note that the new values will not
+ * be used until the next (re)activation.
+ */
+static void
+bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+{
+       struct task_struct *tsk = current;
+       int ioprio_class;
+       struct bfq_data *bfqd = bfqq->bfqd;
+
+       if (!bfqd)
+               return;
+
+       ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
+       switch (ioprio_class) {
+       default:
+               dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
+                       "bfq: bad prio class %d\n", ioprio_class);
+       case IOPRIO_CLASS_NONE:
+               /*
+                * No prio set, inherit CPU scheduling settings.
+                */
+               bfqq->new_ioprio = task_nice_ioprio(tsk);
+               bfqq->new_ioprio_class = task_nice_ioclass(tsk);
+               break;
+       case IOPRIO_CLASS_RT:
+               bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+               bfqq->new_ioprio_class = IOPRIO_CLASS_RT;
+               break;
+       case IOPRIO_CLASS_BE:
+               bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+               bfqq->new_ioprio_class = IOPRIO_CLASS_BE;
+               break;
+       case IOPRIO_CLASS_IDLE:
+               bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE;
+               bfqq->new_ioprio = 7;
+               bfq_clear_bfqq_idle_window(bfqq);
+               break;
+       }
+
+       if (bfqq->new_ioprio >= IOPRIO_BE_NR) {
+               pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n",
+                       bfqq->new_ioprio);
+               bfqq->new_ioprio = IOPRIO_BE_NR;
+       }
+
+       bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
+       bfqq->entity.prio_changed = 1;
+}
+
+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+                                      struct bio *bio, bool is_sync,
+                                      struct bfq_io_cq *bic);
+
+static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
+{
+       struct bfq_data *bfqd = bic_to_bfqd(bic);
+       struct bfq_queue *bfqq;
+       int ioprio = bic->icq.ioc->ioprio;
+
+       /*
+        * This condition may trigger on a newly created bic, be sure to
+        * drop the lock before returning.
+        */
+       if (unlikely(!bfqd) || likely(bic->ioprio == ioprio))
+               return;
+
+       bic->ioprio = ioprio;
+
+       bfqq = bic_to_bfqq(bic, false);
+       if (bfqq) {
+               /* release process reference on this queue */
+               bfq_put_queue(bfqq);
+               bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic);
+               bic_set_bfqq(bic, bfqq, false);
+       }
+
+       bfqq = bic_to_bfqq(bic, true);
+       if (bfqq)
+               bfq_set_next_ioprio_data(bfqq, bic);
+}
+
+static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                         struct bfq_io_cq *bic, pid_t pid, int is_sync)
+{
+       RB_CLEAR_NODE(&bfqq->entity.rb_node);
+       INIT_LIST_HEAD(&bfqq->fifo);
+       INIT_HLIST_NODE(&bfqq->burst_list_node);
+
+       bfqq->ref = 0;
+       bfqq->bfqd = bfqd;
+
+       if (bic)
+               bfq_set_next_ioprio_data(bfqq, bic);
+
+       if (is_sync) {
+               if (!bfq_class_idle(bfqq))
+                       bfq_mark_bfqq_idle_window(bfqq);
+               bfq_mark_bfqq_sync(bfqq);
+               bfq_mark_bfqq_just_created(bfqq);
+       } else
+               bfq_clear_bfqq_sync(bfqq);
+
+       /* set end request to minus infinity from now */
+       bfqq->ttime.last_end_request = ktime_get_ns() + 1;
+
+       bfq_mark_bfqq_IO_bound(bfqq);
+
+       bfqq->pid = pid;
+
+       /* Tentative initial value to trade off between thr and lat */
+       bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
+       bfqq->budget_timeout = bfq_smallest_from_now();
+
+       bfqq->wr_coeff = 1;
+       bfqq->last_wr_start_finish = jiffies;
+       bfqq->wr_start_at_switch_to_srt = bfq_smallest_from_now();
+       bfqq->split_time = bfq_smallest_from_now();
+
+       /*
+        * Set to the value for which bfqq will not be deemed as
+        * soft rt when it becomes backlogged.
+        */
+       bfqq->soft_rt_next_start = bfq_greatest_from_now();
+
+       /* first request is almost certainly seeky */
+       bfqq->seek_history = 1;
+}
+
+static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
+                                              struct bfq_group *bfqg,
+                                              int ioprio_class, int ioprio)
+{
+       switch (ioprio_class) {
+       case IOPRIO_CLASS_RT:
+               return &bfqg->async_bfqq[0][ioprio];
+       case IOPRIO_CLASS_NONE:
+               ioprio = IOPRIO_NORM;
+               /* fall through */
+       case IOPRIO_CLASS_BE:
+               return &bfqg->async_bfqq[1][ioprio];
+       case IOPRIO_CLASS_IDLE:
+               return &bfqg->async_idle_bfqq;
+       default:
+               return NULL;
+       }
+}
+
+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+                                      struct bio *bio, bool is_sync,
+                                      struct bfq_io_cq *bic)
+{
+       const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+       const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
+       struct bfq_queue **async_bfqq = NULL;
+       struct bfq_queue *bfqq;
+       struct bfq_group *bfqg;
+
+       rcu_read_lock();
+
+       bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
+       if (!bfqg) {
+               bfqq = &bfqd->oom_bfqq;
+               goto out;
+       }
+
+       if (!is_sync) {
+               async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
+                                                 ioprio);
+               bfqq = *async_bfqq;
+               if (bfqq)
+                       goto out;
+       }
+
+       bfqq = kmem_cache_alloc_node(bfq_pool,
+                                    GFP_NOWAIT | __GFP_ZERO | __GFP_NOWARN,
+                                    bfqd->queue->node);
+
+       if (bfqq) {
+               bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
+                             is_sync);
+               bfq_init_entity(&bfqq->entity, bfqg);
+               bfq_log_bfqq(bfqd, bfqq, "allocated");
+       } else {
+               bfqq = &bfqd->oom_bfqq;
+               bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
+               goto out;
+       }
+
+       /*
+        * Pin the queue now that it's allocated, scheduler exit will
+        * prune it.
+        */
+       if (async_bfqq) {
+               bfqq->ref++; /*
+                             * Extra group reference, w.r.t. sync
+                             * queue. This extra reference is removed
+                             * only if bfqq->bfqg disappears, to
+                             * guarantee that this queue is not freed
+                             * until its group goes away.
+                             */
+               bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",
+                            bfqq, bfqq->ref);
+               *async_bfqq = bfqq;
+       }
+
+out:
+       bfqq->ref++; /* get a process reference to this queue */
+       bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref);
+       rcu_read_unlock();
+       return bfqq;
+}
+
+static void bfq_update_io_thinktime(struct bfq_data *bfqd,
+                                   struct bfq_queue *bfqq)
+{
+       struct bfq_ttime *ttime = &bfqq->ttime;
+       u64 elapsed = ktime_get_ns() - bfqq->ttime.last_end_request;
+
+       elapsed = min_t(u64, elapsed, 2ULL * bfqd->bfq_slice_idle);
+
+       ttime->ttime_samples = (7*bfqq->ttime.ttime_samples + 256) / 8;
+       ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed,  8);
+       ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
+                                    ttime->ttime_samples);
+}
+
+static void
+bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                      struct request *rq)
+{
+       bfqq->seek_history <<= 1;
+       bfqq->seek_history |=
+               get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR &&
+               (!blk_queue_nonrot(bfqd->queue) ||
+                blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT);
+}
+
+/*
+ * Disable idle window if the process thinks too long or seeks so much that
+ * it doesn't matter.
+ */
+static void bfq_update_idle_window(struct bfq_data *bfqd,
+                                  struct bfq_queue *bfqq,
+                                  struct bfq_io_cq *bic)
+{
+       int enable_idle;
+
+       /* Don't idle for async or idle io prio class. */
+       if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
+               return;
+
+       /* Idle window just restored, statistics are meaningless. */
+       if (time_is_after_eq_jiffies(bfqq->split_time +
+                                    bfqd->bfq_wr_min_idle_time))
+               return;
+
+       enable_idle = bfq_bfqq_idle_window(bfqq);
+
+       if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
+           bfqd->bfq_slice_idle == 0 ||
+               (bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&
+                       bfqq->wr_coeff == 1))
+               enable_idle = 0;
+       else if (bfq_sample_valid(bfqq->ttime.ttime_samples)) {
+               if (bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle &&
+                       bfqq->wr_coeff == 1)
+                       enable_idle = 0;
+               else
+                       enable_idle = 1;
+       }
+       bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d",
+               enable_idle);
+
+       if (enable_idle)
+               bfq_mark_bfqq_idle_window(bfqq);
+       else
+               bfq_clear_bfqq_idle_window(bfqq);
+}
+
+/*
+ * Called when a new fs request (rq) is added to bfqq.  Check if there's
+ * something we should do about it.
+ */
+static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                           struct request *rq)
+{
+       struct bfq_io_cq *bic = RQ_BIC(rq);
+
+       if (rq->cmd_flags & REQ_META)
+               bfqq->meta_pending++;
+
+       bfq_update_io_thinktime(bfqd, bfqq);
+       bfq_update_io_seektime(bfqd, bfqq, rq);
+       if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
+           !BFQQ_SEEKY(bfqq))
+               bfq_update_idle_window(bfqd, bfqq, bic);
+
+       bfq_log_bfqq(bfqd, bfqq,
+                    "rq_enqueued: idle_window=%d (seeky %d)",
+                    bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq));
+
+       bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
+
+       if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) {
+               bool small_req = bfqq->queued[rq_is_sync(rq)] == 1 &&
+                                blk_rq_sectors(rq) < 32;
+               bool budget_timeout = bfq_bfqq_budget_timeout(bfqq);
+
+               /*
+                * There is just this request queued: if the request
+                * is small and the queue is not to be expired, then
+                * just exit.
+                *
+                * In this way, if the device is being idled to wait
+                * for a new request from the in-service queue, we
+                * avoid unplugging the device and committing the
+                * device to serve just a small request. On the
+                * contrary, we wait for the block layer to decide
+                * when to unplug the device: hopefully, new requests
+                * will be merged to this one quickly, then the device
+                * will be unplugged and larger requests will be
+                * dispatched.
+                */
+               if (small_req && !budget_timeout)
+                       return;
+
+               /*
+                * A large enough request arrived, or the queue is to
+                * be expired: in both cases disk idling is to be
+                * stopped, so clear wait_request flag and reset
+                * timer.
+                */
+               bfq_clear_bfqq_wait_request(bfqq);
+               hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
+               bfqg_stats_update_idle_time(bfqq_group(bfqq));
+
+               /*
+                * The queue is not empty, because a new request just
+                * arrived. Hence we can safely expire the queue, in
+                * case of budget timeout, without risking that the
+                * timestamps of the queue are not updated correctly.
+                * See [1] for more details.
+                */
+               if (budget_timeout)
+                       bfq_bfqq_expire(bfqd, bfqq, false,
+                                       BFQQE_BUDGET_TIMEOUT);
+       }
+}
+
+static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
+{
+       struct bfq_queue *bfqq = RQ_BFQQ(rq),
+               *new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
+
+       if (new_bfqq) {
+               if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
+                       new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
+               /*
+                * Release the request's reference to the old bfqq
+                * and make sure one is taken to the shared queue.
+                */
+               new_bfqq->allocated++;
+               bfqq->allocated--;
+               new_bfqq->ref++;
+               bfq_clear_bfqq_just_created(bfqq);
+               /*
+                * If the bic associated with the process
+                * issuing this request still points to bfqq
+                * (and thus has not been already redirected
+                * to new_bfqq or even some other bfq_queue),
+                * then complete the merge and redirect it to
+                * new_bfqq.
+                */
+               if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
+                       bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
+                                       bfqq, new_bfqq);
+               /*
+                * rq is about to be enqueued into new_bfqq,
+                * release rq reference on bfqq
+                */
+               bfq_put_queue(bfqq);
+               rq->elv.priv[1] = new_bfqq;
+               bfqq = new_bfqq;
+       }
+
+       bfq_add_request(rq);
+
+       rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+       list_add_tail(&rq->queuelist, &bfqq->fifo);
+
+       bfq_rq_enqueued(bfqd, bfqq, rq);
+}
+
+static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
+                              bool at_head)
+{
+       struct request_queue *q = hctx->queue;
+       struct bfq_data *bfqd = q->elevator->elevator_data;
+
+       spin_lock_irq(&bfqd->lock);
+       if (blk_mq_sched_try_insert_merge(q, rq)) {
+               spin_unlock_irq(&bfqd->lock);
+               return;
+       }
+
+       spin_unlock_irq(&bfqd->lock);
+
+       blk_mq_sched_request_inserted(rq);
+
+       spin_lock_irq(&bfqd->lock);
+       if (at_head || blk_rq_is_passthrough(rq)) {
+               if (at_head)
+                       list_add(&rq->queuelist, &bfqd->dispatch);
+               else
+                       list_add_tail(&rq->queuelist, &bfqd->dispatch);
+       } else {
+               __bfq_insert_request(bfqd, rq);
+
+               if (rq_mergeable(rq)) {
+                       elv_rqhash_add(q, rq);
+                       if (!q->last_merge)
+                               q->last_merge = rq;
+               }
+       }
+
+       spin_unlock_irq(&bfqd->lock);
+}
+
+static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
+                               struct list_head *list, bool at_head)
+{
+       while (!list_empty(list)) {
+               struct request *rq;
+
+               rq = list_first_entry(list, struct request, queuelist);
+               list_del_init(&rq->queuelist);
+               bfq_insert_request(hctx, rq, at_head);
+       }
+}
+
+static void bfq_update_hw_tag(struct bfq_data *bfqd)
+{
+       bfqd->max_rq_in_driver = max_t(int, bfqd->max_rq_in_driver,
+                                      bfqd->rq_in_driver);
+
+       if (bfqd->hw_tag == 1)
+               return;
+
+       /*
+        * This sample is valid if the number of outstanding requests
+        * is large enough to allow a queueing behavior.  Note that the
+        * sum is not exact, as it's not taking into account deactivated
+        * requests.
+        */
+       if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)
+               return;
+
+       if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)
+               return;
+
+       bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD;
+       bfqd->max_rq_in_driver = 0;
+       bfqd->hw_tag_samples = 0;
+}
+
+static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
+{
+       u64 now_ns;
+       u32 delta_us;
+
+       bfq_update_hw_tag(bfqd);
+
+       bfqd->rq_in_driver--;
+       bfqq->dispatched--;
+
+       if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
+               /*
+                * Set budget_timeout (which we overload to store the
+                * time at which the queue remains with no backlog and
+                * no outstanding request; used by the weight-raising
+                * mechanism).
+                */
+               bfqq->budget_timeout = jiffies;
+
+               bfq_weights_tree_remove(bfqd, &bfqq->entity,
+                                       &bfqd->queue_weights_tree);
+       }
+
+       now_ns = ktime_get_ns();
+
+       bfqq->ttime.last_end_request = now_ns;
+
+       /*
+        * Using us instead of ns, to get a reasonable precision in
+        * computing rate in next check.
+        */
+       delta_us = div_u64(now_ns - bfqd->last_completion, NSEC_PER_USEC);
+
+       /*
+        * If the request took rather long to complete, and, according
+        * to the maximum request size recorded, this completion latency
+        * implies that the request was certainly served at a very low
+        * rate (less than 1M sectors/sec), then the whole observation
+        * interval that lasts up to this time instant cannot be a
+        * valid time interval for computing a new peak rate.  Invoke
+        * bfq_update_rate_reset to have the following three steps
+        * taken:
+        * - close the observation interval at the last (previous)
+        *   request dispatch or completion
+        * - compute rate, if possible, for that observation interval
+        * - reset to zero samples, which will trigger a proper
+        *   re-initialization of the observation interval on next
+        *   dispatch
+        */
+       if (delta_us > BFQ_MIN_TT/NSEC_PER_USEC &&
+          (bfqd->last_rq_max_size<<BFQ_RATE_SHIFT)/delta_us <
+                       1UL<<(BFQ_RATE_SHIFT - 10))
+               bfq_update_rate_reset(bfqd, NULL);
+       bfqd->last_completion = now_ns;
+
+       /*
+        * If we are waiting to discover whether the request pattern
+        * of the task associated with the queue is actually
+        * isochronous, and both requisites for this condition to hold
+        * are now satisfied, then compute soft_rt_next_start (see the
+        * comments on the function bfq_bfqq_softrt_next_start()). We
+        * schedule this delayed check when bfqq expires, if it still
+        * has in-flight requests.
+        */
+       if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
+           RB_EMPTY_ROOT(&bfqq->sort_list))
+               bfqq->soft_rt_next_start =
+                       bfq_bfqq_softrt_next_start(bfqd, bfqq);
+
+       /*
+        * If this is the in-service queue, check if it needs to be expired,
+        * or if we want to idle in case it has no pending requests.
+        */
+       if (bfqd->in_service_queue == bfqq) {
+               if (bfqq->dispatched == 0 && bfq_bfqq_must_idle(bfqq)) {
+                       bfq_arm_slice_timer(bfqd);
+                       return;
+               } else if (bfq_may_expire_for_budg_timeout(bfqq))
+                       bfq_bfqq_expire(bfqd, bfqq, false,
+                                       BFQQE_BUDGET_TIMEOUT);
+               else if (RB_EMPTY_ROOT(&bfqq->sort_list) &&
+                        (bfqq->dispatched == 0 ||
+                         !bfq_bfqq_may_idle(bfqq)))
+                       bfq_bfqq_expire(bfqd, bfqq, false,
+                                       BFQQE_NO_MORE_REQUESTS);
+       }
+}
+
+static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
+{
+       bfqq->allocated--;
+
+       bfq_put_queue(bfqq);
+}
+
+static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
+{
+       struct bfq_queue *bfqq = RQ_BFQQ(rq);
+       struct bfq_data *bfqd = bfqq->bfqd;
+
+       if (rq->rq_flags & RQF_STARTED)
+               bfqg_stats_update_completion(bfqq_group(bfqq),
+                                            rq_start_time_ns(rq),
+                                            rq_io_start_time_ns(rq),
+                                            rq->cmd_flags);
+
+       if (likely(rq->rq_flags & RQF_STARTED)) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&bfqd->lock, flags);
+
+               bfq_completed_request(bfqq, bfqd);
+               bfq_put_rq_priv_body(bfqq);
+
+               spin_unlock_irqrestore(&bfqd->lock, flags);
+       } else {
+               /*
+                * Request rq may be still/already in the scheduler,
+                * in which case we need to remove it. And we cannot
+                * defer such a check and removal, to avoid
+                * inconsistencies in the time interval from the end
+                * of this function to the start of the deferred work.
+                * This situation seems to occur only in process
+                * context, as a consequence of a merge. In the
+                * current version of the code, this implies that the
+                * lock is held.
+                */
+
+               if (!RB_EMPTY_NODE(&rq->rb_node))
+                       bfq_remove_request(q, rq);
+               bfq_put_rq_priv_body(bfqq);
+       }
+
+       rq->elv.priv[0] = NULL;
+       rq->elv.priv[1] = NULL;
+}
+
+/*
+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
+ * was the last process referring to that bfqq.
+ */
+static struct bfq_queue *
+bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
+{
+       bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
+
+       if (bfqq_process_refs(bfqq) == 1) {
+               bfqq->pid = current->pid;
+               bfq_clear_bfqq_coop(bfqq);
+               bfq_clear_bfqq_split_coop(bfqq);
+               return bfqq;
+       }
+
+       bic_set_bfqq(bic, NULL, 1);
+
+       bfq_put_cooperator(bfqq);
+
+       bfq_put_queue(bfqq);
+       return NULL;
+}
+
+static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
+                                                  struct bfq_io_cq *bic,
+                                                  struct bio *bio,
+                                                  bool split, bool is_sync,
+                                                  bool *new_queue)
+{
+       struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync);
+
+       if (likely(bfqq && bfqq != &bfqd->oom_bfqq))
+               return bfqq;
+
+       if (new_queue)
+               *new_queue = true;
+
+       if (bfqq)
+               bfq_put_queue(bfqq);
+       bfqq = bfq_get_queue(bfqd, bio, is_sync, bic);
+
+       bic_set_bfqq(bic, bfqq, is_sync);
+       if (split && is_sync) {
+               if ((bic->was_in_burst_list && bfqd->large_burst) ||
+                   bic->saved_in_large_burst)
+                       bfq_mark_bfqq_in_large_burst(bfqq);
+               else {
+                       bfq_clear_bfqq_in_large_burst(bfqq);
+                       if (bic->was_in_burst_list)
+                               hlist_add_head(&bfqq->burst_list_node,
+                                              &bfqd->burst_list);
+               }
+               bfqq->split_time = jiffies;
+       }
+
+       return bfqq;
+}
+
+/*
+ * Allocate bfq data structures associated with this request.
+ */
+static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
+                             struct bio *bio)
+{
+       struct bfq_data *bfqd = q->elevator->elevator_data;
+       struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
+       const int is_sync = rq_is_sync(rq);
+       struct bfq_queue *bfqq;
+       bool new_queue = false;
+       bool split = false;
+
+       spin_lock_irq(&bfqd->lock);
+
+       if (!bic)
+               goto queue_fail;
+
+       bfq_check_ioprio_change(bic, bio);
+
+       bfq_bic_update_cgroup(bic, bio);
+
+       bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, false, is_sync,
+                                        &new_queue);
+
+       if (likely(!new_queue)) {
+               /* If the queue was seeky for too long, break it apart. */
+               if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
+                       bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
+
+                       /* Update bic before losing reference to bfqq */
+                       if (bfq_bfqq_in_large_burst(bfqq))
+                               bic->saved_in_large_burst = true;
+
+                       bfqq = bfq_split_bfqq(bic, bfqq);
+                       split = true;
+
+                       if (!bfqq)
+                               bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
+                                                                true, is_sync,
+                                                                NULL);
+               }
+       }
+
+       bfqq->allocated++;
+       bfqq->ref++;
+       bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
+                    rq, bfqq, bfqq->ref);
+
+       rq->elv.priv[0] = bic;
+       rq->elv.priv[1] = bfqq;
+
+       /*
+        * If a bfq_queue has only one process reference, it is owned
+        * by only this bic: we can then set bfqq->bic = bic. in
+        * addition, if the queue has also just been split, we have to
+        * resume its state.
+        */
+       if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
+               bfqq->bic = bic;
+               if (split) {
+                       /*
+                        * The queue has just been split from a shared
+                        * queue: restore the idle window and the
+                        * possible weight raising period.
+                        */
+                       bfq_bfqq_resume_state(bfqq, bic);
+               }
+       }
+
+       if (unlikely(bfq_bfqq_just_created(bfqq)))
+               bfq_handle_burst(bfqd, bfqq);
+
+       spin_unlock_irq(&bfqd->lock);
+
+       return 0;
+
+queue_fail:
+       spin_unlock_irq(&bfqd->lock);
+
+       return 1;
+}
+
+static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
+{
+       struct bfq_data *bfqd = bfqq->bfqd;
+       enum bfqq_expiration reason;
+       unsigned long flags;
+
+       spin_lock_irqsave(&bfqd->lock, flags);
+       bfq_clear_bfqq_wait_request(bfqq);
+
+       if (bfqq != bfqd->in_service_queue) {
+               spin_unlock_irqrestore(&bfqd->lock, flags);
+               return;
+       }
+
+       if (bfq_bfqq_budget_timeout(bfqq))
+               /*
+                * Also here the queue can be safely expired
+                * for budget timeout without wasting
+                * guarantees
+                */
+               reason = BFQQE_BUDGET_TIMEOUT;
+       else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)
+               /*
+                * The queue may not be empty upon timer expiration,
+                * because we may not disable the timer when the
+                * first request of the in-service queue arrives
+                * during disk idling.
+                */
+               reason = BFQQE_TOO_IDLE;
+       else
+               goto schedule_dispatch;
+
+       bfq_bfqq_expire(bfqd, bfqq, true, reason);
+
+schedule_dispatch:
+       spin_unlock_irqrestore(&bfqd->lock, flags);
+       bfq_schedule_dispatch(bfqd);
+}
+
+/*
+ * Handler of the expiration of the timer running if the in-service queue
+ * is idling inside its time slice.
+ */
+static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer)
+{
+       struct bfq_data *bfqd = container_of(timer, struct bfq_data,
+                                            idle_slice_timer);
+       struct bfq_queue *bfqq = bfqd->in_service_queue;
+
+       /*
+        * Theoretical race here: the in-service queue can be NULL or
+        * different from the queue that was idling if a new request
+        * arrives for the current queue and there is a full dispatch
+        * cycle that changes the in-service queue.  This can hardly
+        * happen, but in the worst case we just expire a queue too
+        * early.
+        */
+       if (bfqq)
+               bfq_idle_slice_timer_body(bfqq);
+
+       return HRTIMER_NORESTART;
+}
+
+static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
+                                struct bfq_queue **bfqq_ptr)
+{
+       struct bfq_queue *bfqq = *bfqq_ptr;
+
+       bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
+       if (bfqq) {
+               bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
+
+               bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",
+                            bfqq, bfqq->ref);
+               bfq_put_queue(bfqq);
+               *bfqq_ptr = NULL;
+       }
+}
+
+/*
+ * Release all the bfqg references to its async queues.  If we are
+ * deallocating the group these queues may still contain requests, so
+ * we reparent them to the root cgroup (i.e., the only one that will
+ * exist for sure until all the requests on a device are gone).
+ */
+void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
+{
+       int i, j;
+
+       for (i = 0; i < 2; i++)
+               for (j = 0; j < IOPRIO_BE_NR; j++)
+                       __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);
+
+       __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
+}
+
+static void bfq_exit_queue(struct elevator_queue *e)
+{
+       struct bfq_data *bfqd = e->elevator_data;
+       struct bfq_queue *bfqq, *n;
+
+       hrtimer_cancel(&bfqd->idle_slice_timer);
+
+       spin_lock_irq(&bfqd->lock);
+       list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
+               bfq_deactivate_bfqq(bfqd, bfqq, false, false);
+       spin_unlock_irq(&bfqd->lock);
+
+       hrtimer_cancel(&bfqd->idle_slice_timer);
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq);
+#else
+       spin_lock_irq(&bfqd->lock);
+       bfq_put_async_queues(bfqd, bfqd->root_group);
+       kfree(bfqd->root_group);
+       spin_unlock_irq(&bfqd->lock);
+#endif
+
+       kfree(bfqd);
+}
+
+static void bfq_init_root_group(struct bfq_group *root_group,
+                               struct bfq_data *bfqd)
+{
+       int i;
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       root_group->entity.parent = NULL;
+       root_group->my_entity = NULL;
+       root_group->bfqd = bfqd;
+#endif
+       root_group->rq_pos_tree = RB_ROOT;
+       for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+               root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
+       root_group->sched_data.bfq_class_idle_last_service = jiffies;
+}
+
+static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+{
+       struct bfq_data *bfqd;
+       struct elevator_queue *eq;
+
+       eq = elevator_alloc(q, e);
+       if (!eq)
+               return -ENOMEM;
+
+       bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node);
+       if (!bfqd) {
+               kobject_put(&eq->kobj);
+               return -ENOMEM;
+       }
+       eq->elevator_data = bfqd;
+
+       spin_lock_irq(q->queue_lock);
+       q->elevator = eq;
+       spin_unlock_irq(q->queue_lock);
+
+       /*
+        * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
+        * Grab a permanent reference to it, so that the normal code flow
+        * will not attempt to free it.
+        */
+       bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
+       bfqd->oom_bfqq.ref++;
+       bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
+       bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
+       bfqd->oom_bfqq.entity.new_weight =
+               bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
+
+       /* oom_bfqq does not participate to bursts */
+       bfq_clear_bfqq_just_created(&bfqd->oom_bfqq);
+
+       /*
+        * Trigger weight initialization, according to ioprio, at the
+        * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
+        * class won't be changed any more.
+        */
+       bfqd->oom_bfqq.entity.prio_changed = 1;
+
+       bfqd->queue = q;
+
+       INIT_LIST_HEAD(&bfqd->dispatch);
+
+       hrtimer_init(&bfqd->idle_slice_timer, CLOCK_MONOTONIC,
+                    HRTIMER_MODE_REL);
+       bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
+
+       bfqd->queue_weights_tree = RB_ROOT;
+       bfqd->group_weights_tree = RB_ROOT;
+
+       INIT_LIST_HEAD(&bfqd->active_list);
+       INIT_LIST_HEAD(&bfqd->idle_list);
+       INIT_HLIST_HEAD(&bfqd->burst_list);
+
+       bfqd->hw_tag = -1;
+
+       bfqd->bfq_max_budget = bfq_default_max_budget;
+
+       bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0];
+       bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1];
+       bfqd->bfq_back_max = bfq_back_max;
+       bfqd->bfq_back_penalty = bfq_back_penalty;
+       bfqd->bfq_slice_idle = bfq_slice_idle;
+       bfqd->bfq_timeout = bfq_timeout;
+
+       bfqd->bfq_requests_within_timer = 120;
+
+       bfqd->bfq_large_burst_thresh = 8;
+       bfqd->bfq_burst_interval = msecs_to_jiffies(180);
+
+       bfqd->low_latency = true;
+
+       /*
+        * Trade-off between responsiveness and fairness.
+        */
+       bfqd->bfq_wr_coeff = 30;
+       bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300);
+       bfqd->bfq_wr_max_time = 0;
+       bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000);
+       bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500);
+       bfqd->bfq_wr_max_softrt_rate = 7000; /*
+                                             * Approximate rate required
+                                             * to playback or record a
+                                             * high-definition compressed
+                                             * video.
+                                             */
+       bfqd->wr_busy_queues = 0;
+
+       /*
+        * Begin by assuming, optimistically, that the device is a
+        * high-speed one, and that its peak rate is equal to 2/3 of
+        * the highest reference rate.
+        */
+       bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
+                       T_fast[blk_queue_nonrot(bfqd->queue)];
+       bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
+       bfqd->device_speed = BFQ_BFQD_FAST;
+
+       spin_lock_init(&bfqd->lock);
+
+       /*
+        * The invocation of the next bfq_create_group_hierarchy
+        * function is the head of a chain of function calls
+        * (bfq_create_group_hierarchy->blkcg_activate_policy->
+        * blk_mq_freeze_queue) that may lead to the invocation of the
+        * has_work hook function. For this reason,
+        * bfq_create_group_hierarchy is invoked only after all
+        * scheduler data has been initialized, apart from the fields
+        * that can be initialized only after invoking
+        * bfq_create_group_hierarchy. This, in particular, enables
+        * has_work to correctly return false. Of course, to avoid
+        * other inconsistencies, the blk-mq stack must then refrain
+        * from invoking further scheduler hooks before this init
+        * function is finished.
+        */
+       bfqd->root_group = bfq_create_group_hierarchy(bfqd, q->node);
+       if (!bfqd->root_group)
+               goto out_free;
+       bfq_init_root_group(bfqd->root_group, bfqd);
+       bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
+
+
+       return 0;
+
+out_free:
+       kfree(bfqd);
+       kobject_put(&eq->kobj);
+       return -ENOMEM;
+}
+
+static void bfq_slab_kill(void)
+{
+       kmem_cache_destroy(bfq_pool);
+}
+
+static int __init bfq_slab_setup(void)
+{
+       bfq_pool = KMEM_CACHE(bfq_queue, 0);
+       if (!bfq_pool)
+               return -ENOMEM;
+       return 0;
+}
+
+static ssize_t bfq_var_show(unsigned int var, char *page)
+{
+       return sprintf(page, "%u\n", var);
+}
+
+static ssize_t bfq_var_store(unsigned long *var, const char *page,
+                            size_t count)
+{
+       unsigned long new_val;
+       int ret = kstrtoul(page, 10, &new_val);
+
+       if (ret == 0)
+               *var = new_val;
+
+       return count;
+}
+
+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)                           \
+static ssize_t __FUNC(struct elevator_queue *e, char *page)            \
+{                                                                      \
+       struct bfq_data *bfqd = e->elevator_data;                       \
+       u64 __data = __VAR;                                             \
+       if (__CONV == 1)                                                \
+               __data = jiffies_to_msecs(__data);                      \
+       else if (__CONV == 2)                                           \
+               __data = div_u64(__data, NSEC_PER_MSEC);                \
+       return bfq_var_show(__data, (page));                            \
+}
+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 2);
+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 2);
+SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
+SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 2);
+SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
+SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout, 1);
+SHOW_FUNCTION(bfq_strict_guarantees_show, bfqd->strict_guarantees, 0);
+SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
+#undef SHOW_FUNCTION
+
+#define USEC_SHOW_FUNCTION(__FUNC, __VAR)                              \
+static ssize_t __FUNC(struct elevator_queue *e, char *page)            \
+{                                                                      \
+       struct bfq_data *bfqd = e->elevator_data;                       \
+       u64 __data = __VAR;                                             \
+       __data = div_u64(__data, NSEC_PER_USEC);                        \
+       return bfq_var_show(__data, (page));                            \
+}
+USEC_SHOW_FUNCTION(bfq_slice_idle_us_show, bfqd->bfq_slice_idle);
+#undef USEC_SHOW_FUNCTION
+
+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                        \
+static ssize_t                                                         \
+__FUNC(struct elevator_queue *e, const char *page, size_t count)       \
+{                                                                      \
+       struct bfq_data *bfqd = e->elevator_data;                       \
+       unsigned long uninitialized_var(__data);                        \
+       int ret = bfq_var_store(&__data, (page), count);                \
+       if (__data < (MIN))                                             \
+               __data = (MIN);                                         \
+       else if (__data > (MAX))                                        \
+               __data = (MAX);                                         \
+       if (__CONV == 1)                                                \
+               *(__PTR) = msecs_to_jiffies(__data);                    \
+       else if (__CONV == 2)                                           \
+               *(__PTR) = (u64)__data * NSEC_PER_MSEC;                 \
+       else                                                            \
+               *(__PTR) = __data;                                      \
+       return ret;                                                     \
+}
+STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
+               INT_MAX, 2);
+STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,
+               INT_MAX, 2);
+STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
+STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
+               INT_MAX, 0);
+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 2);
+#undef STORE_FUNCTION
+
+#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)                   \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\
+{                                                                      \
+       struct bfq_data *bfqd = e->elevator_data;                       \
+       unsigned long uninitialized_var(__data);                        \
+       int ret = bfq_var_store(&__data, (page), count);                \
+       if (__data < (MIN))                                             \
+               __data = (MIN);                                         \
+       else if (__data > (MAX))                                        \
+               __data = (MAX);                                         \
+       *(__PTR) = (u64)__data * NSEC_PER_USEC;                         \
+       return ret;                                                     \
+}
+USEC_STORE_FUNCTION(bfq_slice_idle_us_store, &bfqd->bfq_slice_idle, 0,
+                   UINT_MAX);
+#undef USEC_STORE_FUNCTION
+
+static ssize_t bfq_max_budget_store(struct elevator_queue *e,
+                                   const char *page, size_t count)
+{
+       struct bfq_data *bfqd = e->elevator_data;
+       unsigned long uninitialized_var(__data);
+       int ret = bfq_var_store(&__data, (page), count);
+
+       if (__data == 0)
+               bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd);
+       else {
+               if (__data > INT_MAX)
+                       __data = INT_MAX;
+               bfqd->bfq_max_budget = __data;
+       }
+
+       bfqd->bfq_user_max_budget = __data;
+
+       return ret;
+}
+
+/*
+ * Leaving this name to preserve name compatibility with cfq
+ * parameters, but this timeout is used for both sync and async.
+ */
+static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
+                                     const char *page, size_t count)
+{
+       struct bfq_data *bfqd = e->elevator_data;
+       unsigned long uninitialized_var(__data);
+       int ret = bfq_var_store(&__data, (page), count);
+
+       if (__data < 1)
+               __data = 1;
+       else if (__data > INT_MAX)
+               __data = INT_MAX;
+
+       bfqd->bfq_timeout = msecs_to_jiffies(__data);
+       if (bfqd->bfq_user_max_budget == 0)
+               bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd);
+
+       return ret;
+}
+
+static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
+                                    const char *page, size_t count)
+{
+       struct bfq_data *bfqd = e->elevator_data;
+       unsigned long uninitialized_var(__data);
+       int ret = bfq_var_store(&__data, (page), count);
+
+       if (__data > 1)
+               __data = 1;
+       if (!bfqd->strict_guarantees && __data == 1
+           && bfqd->bfq_slice_idle < 8 * NSEC_PER_MSEC)
+               bfqd->bfq_slice_idle = 8 * NSEC_PER_MSEC;
+
+       bfqd->strict_guarantees = __data;
+
+       return ret;
+}
+
+static ssize_t bfq_low_latency_store(struct elevator_queue *e,
+                                    const char *page, size_t count)
+{
+       struct bfq_data *bfqd = e->elevator_data;
+       unsigned long uninitialized_var(__data);
+       int ret = bfq_var_store(&__data, (page), count);
+
+       if (__data > 1)
+               __data = 1;
+       if (__data == 0 && bfqd->low_latency != 0)
+               bfq_end_wr(bfqd);
+       bfqd->low_latency = __data;
+
+       return ret;
+}
+
+#define BFQ_ATTR(name) \
+       __ATTR(name, 0644, bfq_##name##_show, bfq_##name##_store)
+
+static struct elv_fs_entry bfq_attrs[] = {
+       BFQ_ATTR(fifo_expire_sync),
+       BFQ_ATTR(fifo_expire_async),
+       BFQ_ATTR(back_seek_max),
+       BFQ_ATTR(back_seek_penalty),
+       BFQ_ATTR(slice_idle),
+       BFQ_ATTR(slice_idle_us),
+       BFQ_ATTR(max_budget),
+       BFQ_ATTR(timeout_sync),
+       BFQ_ATTR(strict_guarantees),
+       BFQ_ATTR(low_latency),
+       __ATTR_NULL
+};
+
+static struct elevator_type iosched_bfq_mq = {
+       .ops.mq = {
+               .get_rq_priv            = bfq_get_rq_private,
+               .put_rq_priv            = bfq_put_rq_private,
+               .exit_icq               = bfq_exit_icq,
+               .insert_requests        = bfq_insert_requests,
+               .dispatch_request       = bfq_dispatch_request,
+               .next_request           = elv_rb_latter_request,
+               .former_request         = elv_rb_former_request,
+               .allow_merge            = bfq_allow_bio_merge,
+               .bio_merge              = bfq_bio_merge,
+               .request_merge          = bfq_request_merge,
+               .requests_merged        = bfq_requests_merged,
+               .request_merged         = bfq_request_merged,
+               .has_work               = bfq_has_work,
+               .init_sched             = bfq_init_queue,
+               .exit_sched             = bfq_exit_queue,
+       },
+
+       .uses_mq =              true,
+       .icq_size =             sizeof(struct bfq_io_cq),
+       .icq_align =            __alignof__(struct bfq_io_cq),
+       .elevator_attrs =       bfq_attrs,
+       .elevator_name =        "bfq",
+       .elevator_owner =       THIS_MODULE,
+};
+
+static int __init bfq_init(void)
+{
+       int ret;
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       ret = blkcg_policy_register(&blkcg_policy_bfq);
+       if (ret)
+               return ret;
+#endif
+
+       ret = -ENOMEM;
+       if (bfq_slab_setup())
+               goto err_pol_unreg;
+
+       /*
+        * Times to load large popular applications for the typical
+        * systems installed on the reference devices (see the
+        * comments before the definitions of the next two
+        * arrays). Actually, we use slightly slower values, as the
+        * estimated peak rate tends to be smaller than the actual
+        * peak rate.  The reason for this last fact is that estimates
+        * are computed over much shorter time intervals than the long
+        * intervals typically used for benchmarking. Why? First, to
+        * adapt more quickly to variations. Second, because an I/O
+        * scheduler cannot rely on a peak-rate-evaluation workload to
+        * be run for a long time.
+        */
+       T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */
+       T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */
+       T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */
+       T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */
+
+       /*
+        * Thresholds that determine the switch between speed classes
+        * (see the comments before the definition of the array
+        * device_speed_thresh). These thresholds are biased towards
+        * transitions to the fast class. This is safer than the
+        * opposite bias. In fact, a wrong transition to the slow
+        * class results in short weight-raising periods, because the
+        * speed of the device then tends to be higher that the
+        * reference peak rate. On the opposite end, a wrong
+        * transition to the fast class tends to increase
+        * weight-raising periods, because of the opposite reason.
+        */
+       device_speed_thresh[0] = (4 * R_slow[0]) / 3;
+       device_speed_thresh[1] = (4 * R_slow[1]) / 3;
+
+       ret = elv_register(&iosched_bfq_mq);
+       if (ret)
+               goto err_pol_unreg;
+
+       return 0;
+
+err_pol_unreg:
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       blkcg_policy_unregister(&blkcg_policy_bfq);
+#endif
+       return ret;
+}
+
+static void __exit bfq_exit(void)
+{
+       elv_unregister(&iosched_bfq_mq);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       blkcg_policy_unregister(&blkcg_policy_bfq);
+#endif
+       bfq_slab_kill();
+}
+
+module_init(bfq_init);
+module_exit(bfq_exit);
+
+MODULE_AUTHOR("Paolo Valente");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MQ Budget Fair Queueing I/O Scheduler");
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h

new file mode 100644 (file)

index 0000000..ae783c0
--- /dev/null
+++ b/block/bfq-iosched.h
@@ -0,0 +1,941 @@
+/*
+ * Header file for the BFQ I/O scheduler: data structures and
+ * prototypes of interface functions among BFQ components.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation; either version 2 of the
+ *  License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ */
+#ifndef _BFQ_H
+#define _BFQ_H
+
+#include <linux/blktrace_api.h>
+#include <linux/hrtimer.h>
+#include <linux/blk-cgroup.h>
+
+#define BFQ_IOPRIO_CLASSES     3
+#define BFQ_CL_IDLE_TIMEOUT    (HZ/5)
+
+#define BFQ_MIN_WEIGHT                 1
+#define BFQ_MAX_WEIGHT                 1000
+#define BFQ_WEIGHT_CONVERSION_COEFF    10
+
+#define BFQ_DEFAULT_QUEUE_IOPRIO       4
+
+#define BFQ_WEIGHT_LEGACY_DFL  100
+#define BFQ_DEFAULT_GRP_IOPRIO 0
+#define BFQ_DEFAULT_GRP_CLASS  IOPRIO_CLASS_BE
+
+/*
+ * Soft real-time applications are extremely more latency sensitive
+ * than interactive ones. Over-raise the weight of the former to
+ * privilege them against the latter.
+ */
+#define BFQ_SOFTRT_WEIGHT_FACTOR       100
+
+struct bfq_entity;
+
+/**
+ * struct bfq_service_tree - per ioprio_class service tree.
+ *
+ * Each service tree represents a B-WF2Q+ scheduler on its own.  Each
+ * ioprio_class has its own independent scheduler, and so its own
+ * bfq_service_tree.  All the fields are protected by the queue lock
+ * of the containing bfqd.
+ */
+struct bfq_service_tree {
+       /* tree for active entities (i.e., those backlogged) */
+       struct rb_root active;
+       /* tree for idle entities (i.e., not backlogged, with V <= F_i)*/
+       struct rb_root idle;
+
+       /* idle entity with minimum F_i */
+       struct bfq_entity *first_idle;
+       /* idle entity with maximum F_i */
+       struct bfq_entity *last_idle;
+
+       /* scheduler virtual time */
+       u64 vtime;
+       /* scheduler weight sum; active and idle entities contribute to it */
+       unsigned long wsum;
+};
+
+/**
+ * struct bfq_sched_data - multi-class scheduler.
+ *
+ * bfq_sched_data is the basic scheduler queue.  It supports three
+ * ioprio_classes, and can be used either as a toplevel queue or as an
+ * intermediate queue on a hierarchical setup.  @next_in_service
+ * points to the active entity of the sched_data service trees that
+ * will be scheduled next. It is used to reduce the number of steps
+ * needed for each hierarchical-schedule update.
+ *
+ * The supported ioprio_classes are the same as in CFQ, in descending
+ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
+ * Requests from higher priority queues are served before all the
+ * requests from lower priority queues; among requests of the same
+ * queue requests are served according to B-WF2Q+.
+ * All the fields are protected by the queue lock of the containing bfqd.
+ */
+struct bfq_sched_data {
+       /* entity in service */
+       struct bfq_entity *in_service_entity;
+       /* head-of-line entity (see comments above) */
+       struct bfq_entity *next_in_service;
+       /* array of service trees, one per ioprio_class */
+       struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
+       /* last time CLASS_IDLE was served */
+       unsigned long bfq_class_idle_last_service;
+
+};
+
+/**
+ * struct bfq_weight_counter - counter of the number of all active entities
+ *                             with a given weight.
+ */
+struct bfq_weight_counter {
+       unsigned int weight; /* weight of the entities this counter refers to */
+       unsigned int num_active; /* nr of active entities with this weight */
+       /*
+        * Weights tree member (see bfq_data's @queue_weights_tree and
+        * @group_weights_tree)
+        */
+       struct rb_node weights_node;
+};
+
+/**
+ * struct bfq_entity - schedulable entity.
+ *
+ * A bfq_entity is used to represent either a bfq_queue (leaf node in the
+ * cgroup hierarchy) or a bfq_group into the upper level scheduler.  Each
+ * entity belongs to the sched_data of the parent group in the cgroup
+ * hierarchy.  Non-leaf entities have also their own sched_data, stored
+ * in @my_sched_data.
+ *
+ * Each entity stores independently its priority values; this would
+ * allow different weights on different devices, but this
+ * functionality is not exported to userspace by now.  Priorities and
+ * weights are updated lazily, first storing the new values into the
+ * new_* fields, then setting the @prio_changed flag.  As soon as
+ * there is a transition in the entity state that allows the priority
+ * update to take place the effective and the requested priority
+ * values are synchronized.
+ *
+ * Unless cgroups are used, the weight value is calculated from the
+ * ioprio to export the same interface as CFQ.  When dealing with
+ * ``well-behaved'' queues (i.e., queues that do not spend too much
+ * time to consume their budget and have true sequential behavior, and
+ * when there are no external factors breaking anticipation) the
+ * relative weights at each level of the cgroups hierarchy should be
+ * guaranteed.  All the fields are protected by the queue lock of the
+ * containing bfqd.
+ */
+struct bfq_entity {
+       /* service_tree member */
+       struct rb_node rb_node;
+       /* pointer to the weight counter associated with this entity */
+       struct bfq_weight_counter *weight_counter;
+
+       /*
+        * Flag, true if the entity is on a tree (either the active or
+        * the idle one of its service_tree) or is in service.
+        */
+       bool on_st;
+
+       /* B-WF2Q+ start and finish timestamps [sectors/weight] */
+       u64 start, finish;
+
+       /* tree the entity is enqueued into; %NULL if not on a tree */
+       struct rb_root *tree;
+
+       /*
+        * minimum start time of the (active) subtree rooted at this
+        * entity; used for O(log N) lookups into active trees
+        */
+       u64 min_start;
+
+       /* amount of service received during the last service slot */
+       int service;
+
+       /* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
+       int budget;
+
+       /* weight of the queue */
+       int weight;
+       /* next weight if a change is in progress */
+       int new_weight;
+
+       /* original weight, used to implement weight boosting */
+       int orig_weight;
+
+       /* parent entity, for hierarchical scheduling */
+       struct bfq_entity *parent;
+
+       /*
+        * For non-leaf nodes in the hierarchy, the associated
+        * scheduler queue, %NULL on leaf nodes.
+        */
+       struct bfq_sched_data *my_sched_data;
+       /* the scheduler queue this entity belongs to */
+       struct bfq_sched_data *sched_data;
+
+       /* flag, set to request a weight, ioprio or ioprio_class change  */
+       int prio_changed;
+};
+
+struct bfq_group;
+
+/**
+ * struct bfq_ttime - per process thinktime stats.
+ */
+struct bfq_ttime {
+       /* completion time of the last request */
+       u64 last_end_request;
+
+       /* total process thinktime */
+       u64 ttime_total;
+       /* number of thinktime samples */
+       unsigned long ttime_samples;
+       /* average process thinktime */
+       u64 ttime_mean;
+};
+
+/**
+ * struct bfq_queue - leaf schedulable entity.
+ *
+ * A bfq_queue is a leaf request queue; it can be associated with an
+ * io_context or more, if it  is  async or shared  between  cooperating
+ * processes. @cgroup holds a reference to the cgroup, to be sure that it
+ * does not disappear while a bfqq still references it (mostly to avoid
+ * races between request issuing and task migration followed by cgroup
+ * destruction).
+ * All the fields are protected by the queue lock of the containing bfqd.
+ */
+struct bfq_queue {
+       /* reference counter */
+       int ref;
+       /* parent bfq_data */
+       struct bfq_data *bfqd;
+
+       /* current ioprio and ioprio class */
+       unsigned short ioprio, ioprio_class;
+       /* next ioprio and ioprio class if a change is in progress */
+       unsigned short new_ioprio, new_ioprio_class;
+
+       /*
+        * Shared bfq_queue if queue is cooperating with one or more
+        * other queues.
+        */
+       struct bfq_queue *new_bfqq;
+       /* request-position tree member (see bfq_group's @rq_pos_tree) */
+       struct rb_node pos_node;
+       /* request-position tree root (see bfq_group's @rq_pos_tree) */
+       struct rb_root *pos_root;
+
+       /* sorted list of pending requests */
+       struct rb_root sort_list;
+       /* if fifo isn't expired, next request to serve */
+       struct request *next_rq;
+       /* number of sync and async requests queued */
+       int queued[2];
+       /* number of requests currently allocated */
+       int allocated;
+       /* number of pending metadata requests */
+       int meta_pending;
+       /* fifo list of requests in sort_list */
+       struct list_head fifo;
+
+       /* entity representing this queue in the scheduler */
+       struct bfq_entity entity;
+
+       /* maximum budget allowed from the feedback mechanism */
+       int max_budget;
+       /* budget expiration (in jiffies) */
+       unsigned long budget_timeout;
+
+       /* number of requests on the dispatch list or inside driver */
+       int dispatched;
+
+       /* status flags */
+       unsigned long flags;
+
+       /* node for active/idle bfqq list inside parent bfqd */
+       struct list_head bfqq_list;
+
+       /* associated @bfq_ttime struct */
+       struct bfq_ttime ttime;
+
+       /* bit vector: a 1 for each seeky requests in history */
+       u32 seek_history;
+
+       /* node for the device's burst list */
+       struct hlist_node burst_list_node;
+
+       /* position of the last request enqueued */
+       sector_t last_request_pos;
+
+       /* Number of consecutive pairs of request completion and
+        * arrival, such that the queue becomes idle after the
+        * completion, but the next request arrives within an idle
+        * time slice; used only if the queue's IO_bound flag has been
+        * cleared.
+        */
+       unsigned int requests_within_timer;
+
+       /* pid of the process owning the queue, used for logging purposes */
+       pid_t pid;
+
+       /*
+        * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL
+        * if the queue is shared.
+        */
+       struct bfq_io_cq *bic;
+
+       /* current maximum weight-raising time for this queue */
+       unsigned long wr_cur_max_time;
+       /*
+        * Minimum time instant such that, only if a new request is
+        * enqueued after this time instant in an idle @bfq_queue with
+        * no outstanding requests, then the task associated with the
+        * queue it is deemed as soft real-time (see the comments on
+        * the function bfq_bfqq_softrt_next_start())
+        */
+       unsigned long soft_rt_next_start;
+       /*
+        * Start time of the current weight-raising period if
+        * the @bfq-queue is being weight-raised, otherwise
+        * finish time of the last weight-raising period.
+        */
+       unsigned long last_wr_start_finish;
+       /* factor by which the weight of this queue is multiplied */
+       unsigned int wr_coeff;
+       /*
+        * Time of the last transition of the @bfq_queue from idle to
+        * backlogged.
+        */
+       unsigned long last_idle_bklogged;
+       /*
+        * Cumulative service received from the @bfq_queue since the
+        * last transition from idle to backlogged.
+        */
+       unsigned long service_from_backlogged;
+
+       /*
+        * Value of wr start time when switching to soft rt
+        */
+       unsigned long wr_start_at_switch_to_srt;
+
+       unsigned long split_time; /* time of last split */
+};
+
+/**
+ * struct bfq_io_cq - per (request_queue, io_context) structure.
+ */
+struct bfq_io_cq {
+       /* associated io_cq structure */
+       struct io_cq icq; /* must be the first member */
+       /* array of two process queues, the sync and the async */
+       struct bfq_queue *bfqq[2];
+       /* per (request_queue, blkcg) ioprio */
+       int ioprio;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       uint64_t blkcg_serial_nr; /* the current blkcg serial */
+#endif
+       /*
+        * Snapshot of the idle window before merging; taken to
+        * remember this value while the queue is merged, so as to be
+        * able to restore it in case of split.
+        */
+       bool saved_idle_window;
+       /*
+        * Same purpose as the previous two fields for the I/O bound
+        * classification of a queue.
+        */
+       bool saved_IO_bound;
+
+       /*
+        * Same purpose as the previous fields for the value of the
+        * field keeping the queue's belonging to a large burst
+        */
+       bool saved_in_large_burst;
+       /*
+        * True if the queue belonged to a burst list before its merge
+        * with another cooperating queue.
+        */
+       bool was_in_burst_list;
+
+       /*
+        * Similar to previous fields: save wr information.
+        */
+       unsigned long saved_wr_coeff;
+       unsigned long saved_last_wr_start_finish;
+       unsigned long saved_wr_start_at_switch_to_srt;
+       unsigned int saved_wr_cur_max_time;
+       struct bfq_ttime saved_ttime;
+};
+
+enum bfq_device_speed {
+       BFQ_BFQD_FAST,
+       BFQ_BFQD_SLOW,
+};
+
+/**
+ * struct bfq_data - per-device data structure.
+ *
+ * All the fields are protected by @lock.
+ */
+struct bfq_data {
+       /* device request queue */
+       struct request_queue *queue;
+       /* dispatch queue */
+       struct list_head dispatch;
+
+       /* root bfq_group for the device */
+       struct bfq_group *root_group;
+
+       /*
+        * rbtree of weight counters of @bfq_queues, sorted by
+        * weight. Used to keep track of whether all @bfq_queues have
+        * the same weight. The tree contains one counter for each
+        * distinct weight associated to some active and not
+        * weight-raised @bfq_queue (see the comments to the functions
+        * bfq_weights_tree_[add|remove] for further details).
+        */
+       struct rb_root queue_weights_tree;
+       /*
+        * rbtree of non-queue @bfq_entity weight counters, sorted by
+        * weight. Used to keep track of whether all @bfq_groups have
+        * the same weight. The tree contains one counter for each
+        * distinct weight associated to some active @bfq_group (see
+        * the comments to the functions bfq_weights_tree_[add|remove]
+        * for further details).
+        */
+       struct rb_root group_weights_tree;
+
+       /*
+        * Number of bfq_queues containing requests (including the
+        * queue in service, even if it is idling).
+        */
+       int busy_queues;
+       /* number of weight-raised busy @bfq_queues */
+       int wr_busy_queues;
+       /* number of queued requests */
+       int queued;
+       /* number of requests dispatched and waiting for completion */
+       int rq_in_driver;
+
+       /*
+        * Maximum number of requests in driver in the last
+        * @hw_tag_samples completed requests.
+        */
+       int max_rq_in_driver;
+       /* number of samples used to calculate hw_tag */
+       int hw_tag_samples;
+       /* flag set to one if the driver is showing a queueing behavior */
+       int hw_tag;
+
+       /* number of budgets assigned */
+       int budgets_assigned;
+
+       /*
+        * Timer set when idling (waiting) for the next request from
+        * the queue in service.
+        */
+       struct hrtimer idle_slice_timer;
+
+       /* bfq_queue in service */
+       struct bfq_queue *in_service_queue;
+
+       /* on-disk position of the last served request */
+       sector_t last_position;
+
+       /* time of last request completion (ns) */
+       u64 last_completion;
+
+       /* time of first rq dispatch in current observation interval (ns) */
+       u64 first_dispatch;
+       /* time of last rq dispatch in current observation interval (ns) */
+       u64 last_dispatch;
+
+       /* beginning of the last budget */
+       ktime_t last_budget_start;
+       /* beginning of the last idle slice */
+       ktime_t last_idling_start;
+
+       /* number of samples in current observation interval */
+       int peak_rate_samples;
+       /* num of samples of seq dispatches in current observation interval */
+       u32 sequential_samples;
+       /* total num of sectors transferred in current observation interval */
+       u64 tot_sectors_dispatched;
+       /* max rq size seen during current observation interval (sectors) */
+       u32 last_rq_max_size;
+       /* time elapsed from first dispatch in current observ. interval (us) */
+       u64 delta_from_first;
+       /*
+        * Current estimate of the device peak rate, measured in
+        * [BFQ_RATE_SHIFT * sectors/usec]. The left-shift by
+        * BFQ_RATE_SHIFT is performed to increase precision in
+        * fixed-point calculations.
+        */
+       u32 peak_rate;
+
+       /* maximum budget allotted to a bfq_queue before rescheduling */
+       int bfq_max_budget;
+
+       /* list of all the bfq_queues active on the device */
+       struct list_head active_list;
+       /* list of all the bfq_queues idle on the device */
+       struct list_head idle_list;
+
+       /*
+        * Timeout for async/sync requests; when it fires, requests
+        * are served in fifo order.
+        */
+       u64 bfq_fifo_expire[2];
+       /* weight of backward seeks wrt forward ones */
+       unsigned int bfq_back_penalty;
+       /* maximum allowed backward seek */
+       unsigned int bfq_back_max;
+       /* maximum idling time */
+       u32 bfq_slice_idle;
+
+       /* user-configured max budget value (0 for auto-tuning) */
+       int bfq_user_max_budget;
+       /*
+        * Timeout for bfq_queues to consume their budget; used to
+        * prevent seeky queues from imposing long latencies to
+        * sequential or quasi-sequential ones (this also implies that
+        * seeky queues cannot receive guarantees in the service
+        * domain; after a timeout they are charged for the time they
+        * have been in service, to preserve fairness among them, but
+        * without service-domain guarantees).
+        */
+       unsigned int bfq_timeout;
+
+       /*
+        * Number of consecutive requests that must be issued within
+        * the idle time slice to set again idling to a queue which
+        * was marked as non-I/O-bound (see the definition of the
+        * IO_bound flag for further details).
+        */
+       unsigned int bfq_requests_within_timer;
+
+       /*
+        * Force device idling whenever needed to provide accurate
+        * service guarantees, without caring about throughput
+        * issues. CAVEAT: this may even increase latencies, in case
+        * of useless idling for processes that did stop doing I/O.
+        */
+       bool strict_guarantees;
+
+       /*
+        * Last time at which a queue entered the current burst of
+        * queues being activated shortly after each other; for more
+        * details about this and the following parameters related to
+        * a burst of activations, see the comments on the function
+        * bfq_handle_burst.
+        */
+       unsigned long last_ins_in_burst;
+       /*
+        * Reference time interval used to decide whether a queue has
+        * been activated shortly after @last_ins_in_burst.
+        */
+       unsigned long bfq_burst_interval;
+       /* number of queues in the current burst of queue activations */
+       int burst_size;
+
+       /* common parent entity for the queues in the burst */
+       struct bfq_entity *burst_parent_entity;
+       /* Maximum burst size above which the current queue-activation
+        * burst is deemed as 'large'.
+        */
+       unsigned long bfq_large_burst_thresh;
+       /* true if a large queue-activation burst is in progress */
+       bool large_burst;
+       /*
+        * Head of the burst list (as for the above fields, more
+        * details in the comments on the function bfq_handle_burst).
+        */
+       struct hlist_head burst_list;
+
+       /* if set to true, low-latency heuristics are enabled */
+       bool low_latency;
+       /*
+        * Maximum factor by which the weight of a weight-raised queue
+        * is multiplied.
+        */
+       unsigned int bfq_wr_coeff;
+       /* maximum duration of a weight-raising period (jiffies) */
+       unsigned int bfq_wr_max_time;
+
+       /* Maximum weight-raising duration for soft real-time processes */
+       unsigned int bfq_wr_rt_max_time;
+       /*
+        * Minimum idle period after which weight-raising may be
+        * reactivated for a queue (in jiffies).
+        */
+       unsigned int bfq_wr_min_idle_time;
+       /*
+        * Minimum period between request arrivals after which
+        * weight-raising may be reactivated for an already busy async
+        * queue (in jiffies).
+        */
+       unsigned long bfq_wr_min_inter_arr_async;
+
+       /* Max service-rate for a soft real-time queue, in sectors/sec */
+       unsigned int bfq_wr_max_softrt_rate;
+       /*
+        * Cached value of the product R*T, used for computing the
+        * maximum duration of weight raising automatically.
+        */
+       u64 RT_prod;
+       /* device-speed class for the low-latency heuristic */
+       enum bfq_device_speed device_speed;
+
+       /* fallback dummy bfqq for extreme OOM conditions */
+       struct bfq_queue oom_bfqq;
+
+       spinlock_t lock;
+
+       /*
+        * bic associated with the task issuing current bio for
+        * merging. This and the next field are used as a support to
+        * be able to perform the bic lookup, needed by bio-merge
+        * functions, before the scheduler lock is taken, and thus
+        * avoid taking the request-queue lock while the scheduler
+        * lock is being held.
+        */
+       struct bfq_io_cq *bio_bic;
+       /* bfqq associated with the task issuing current bio for merging */
+       struct bfq_queue *bio_bfqq;
+};
+
+enum bfqq_state_flags {
+       BFQQF_just_created = 0, /* queue just allocated */
+       BFQQF_busy,             /* has requests or is in service */
+       BFQQF_wait_request,     /* waiting for a request */
+       BFQQF_non_blocking_wait_rq, /*
+                                    * waiting for a request
+                                    * without idling the device
+                                    */
+       BFQQF_fifo_expire,      /* FIFO checked in this slice */
+       BFQQF_idle_window,      /* slice idling enabled */
+       BFQQF_sync,             /* synchronous queue */
+       BFQQF_IO_bound,         /*
+                                * bfqq has timed-out at least once
+                                * having consumed at most 2/10 of
+                                * its budget
+                                */
+       BFQQF_in_large_burst,   /*
+                                * bfqq activated in a large burst,
+                                * see comments to bfq_handle_burst.
+                                */
+       BFQQF_softrt_update,    /*
+                                * may need softrt-next-start
+                                * update
+                                */
+       BFQQF_coop,             /* bfqq is shared */
+       BFQQF_split_coop        /* shared bfqq will be split */
+};
+
+#define BFQ_BFQQ_FNS(name)                                             \
+void bfq_mark_bfqq_##name(struct bfq_queue *bfqq);                     \
+void bfq_clear_bfqq_##name(struct bfq_queue *bfqq);                    \
+int bfq_bfqq_##name(const struct bfq_queue *bfqq);
+
+BFQ_BFQQ_FNS(just_created);
+BFQ_BFQQ_FNS(busy);
+BFQ_BFQQ_FNS(wait_request);
+BFQ_BFQQ_FNS(non_blocking_wait_rq);
+BFQ_BFQQ_FNS(fifo_expire);
+BFQ_BFQQ_FNS(idle_window);
+BFQ_BFQQ_FNS(sync);
+BFQ_BFQQ_FNS(IO_bound);
+BFQ_BFQQ_FNS(in_large_burst);
+BFQ_BFQQ_FNS(coop);
+BFQ_BFQQ_FNS(split_coop);
+BFQ_BFQQ_FNS(softrt_update);
+#undef BFQ_BFQQ_FNS
+
+/* Expiration reasons. */
+enum bfqq_expiration {
+       BFQQE_TOO_IDLE = 0,             /*
+                                        * queue has been idling for
+                                        * too long
+                                        */
+       BFQQE_BUDGET_TIMEOUT,   /* budget took too long to be used */
+       BFQQE_BUDGET_EXHAUSTED, /* budget consumed */
+       BFQQE_NO_MORE_REQUESTS, /* the queue has no more requests */
+       BFQQE_PREEMPTED         /* preemption in progress */
+};
+
+struct bfqg_stats {
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       /* number of ios merged */
+       struct blkg_rwstat              merged;
+       /* total time spent on device in ns, may not be accurate w/ queueing */
+       struct blkg_rwstat              service_time;
+       /* total time spent waiting in scheduler queue in ns */
+       struct blkg_rwstat              wait_time;
+       /* number of IOs queued up */
+       struct blkg_rwstat              queued;
+       /* total disk time and nr sectors dispatched by this group */
+       struct blkg_stat                time;
+       /* sum of number of ios queued across all samples */
+       struct blkg_stat                avg_queue_size_sum;
+       /* count of samples taken for average */
+       struct blkg_stat                avg_queue_size_samples;
+       /* how many times this group has been removed from service tree */
+       struct blkg_stat                dequeue;
+       /* total time spent waiting for it to be assigned a timeslice. */
+       struct blkg_stat                group_wait_time;
+       /* time spent idling for this blkcg_gq */
+       struct blkg_stat                idle_time;
+       /* total time with empty current active q with other requests queued */
+       struct blkg_stat                empty_time;
+       /* fields after this shouldn't be cleared on stat reset */
+       uint64_t                        start_group_wait_time;
+       uint64_t                        start_idle_time;
+       uint64_t                        start_empty_time;
+       uint16_t                        flags;
+#endif /* CONFIG_BFQ_GROUP_IOSCHED */
+};
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+
+/*
+ * struct bfq_group_data - per-blkcg storage for the blkio subsystem.
+ *
+ * @ps: @blkcg_policy_storage that this structure inherits
+ * @weight: weight of the bfq_group
+ */
+struct bfq_group_data {
+       /* must be the first member */
+       struct blkcg_policy_data pd;
+
+       unsigned int weight;
+};
+
+/**
+ * struct bfq_group - per (device, cgroup) data structure.
+ * @entity: schedulable entity to insert into the parent group sched_data.
+ * @sched_data: own sched_data, to contain child entities (they may be
+ *              both bfq_queues and bfq_groups).
+ * @bfqd: the bfq_data for the device this group acts upon.
+ * @async_bfqq: array of async queues for all the tasks belonging to
+ *              the group, one queue per ioprio value per ioprio_class,
+ *              except for the idle class that has only one queue.
+ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
+ * @my_entity: pointer to @entity, %NULL for the toplevel group; used
+ *             to avoid too many special cases during group creation/
+ *             migration.
+ * @stats: stats for this bfqg.
+ * @active_entities: number of active entities belonging to the group;
+ *                   unused for the root group. Used to know whether there
+ *                   are groups with more than one active @bfq_entity
+ *                   (see the comments to the function
+ *                   bfq_bfqq_may_idle()).
+ * @rq_pos_tree: rbtree sorted by next_request position, used when
+ *               determining if two or more queues have interleaving
+ *               requests (see bfq_find_close_cooperator()).
+ *
+ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
+ * there is a set of bfq_groups, each one collecting the lower-level
+ * entities belonging to the group that are acting on the same device.
+ *
+ * Locking works as follows:
+ *    o @bfqd is protected by the queue lock, RCU is used to access it
+ *      from the readers.
+ *    o All the other fields are protected by the @bfqd queue lock.
+ */
+struct bfq_group {
+       /* must be the first member */
+       struct blkg_policy_data pd;
+
+       struct bfq_entity entity;
+       struct bfq_sched_data sched_data;
+
+       void *bfqd;
+
+       struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+       struct bfq_queue *async_idle_bfqq;
+
+       struct bfq_entity *my_entity;
+
+       int active_entities;
+
+       struct rb_root rq_pos_tree;
+
+       struct bfqg_stats stats;
+};
+
+#else
+struct bfq_group {
+       struct bfq_sched_data sched_data;
+
+       struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+       struct bfq_queue *async_idle_bfqq;
+
+       struct rb_root rq_pos_tree;
+};
+#endif
+
+struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
+
+/* --------------- main algorithm interface ----------------- */
+
+#define BFQ_SERVICE_TREE_INIT  ((struct bfq_service_tree)              \
+                               { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
+
+extern const int bfq_timeout;
+
+struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync);
+void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync);
+struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
+void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
+                         struct rb_root *root);
+void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_entity *entity,
+                            struct rb_root *root);
+void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                    bool compensate, enum bfqq_expiration reason);
+void bfq_put_queue(struct bfq_queue *bfqq);
+void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
+void bfq_schedule_dispatch(struct bfq_data *bfqd);
+void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
+
+/* ------------ end of main algorithm interface -------------- */
+
+/* ---------------- cgroups-support interface ---------------- */
+
+void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
+                             unsigned int op);
+void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
+void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
+void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
+                                 uint64_t io_start_time, unsigned int op);
+void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
+void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
+void bfqg_stats_update_idle_time(struct bfq_group *bfqg);
+void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg);
+void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg);
+void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                  struct bfq_group *bfqg);
+
+void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
+void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
+void bfq_end_wr_async(struct bfq_data *bfqd);
+struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
+                                    struct blkcg *blkcg);
+struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
+struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
+void bfqg_put(struct bfq_group *bfqg);
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+extern struct cftype bfq_blkcg_legacy_files[];
+extern struct cftype bfq_blkg_files[];
+extern struct blkcg_policy blkcg_policy_bfq;
+#endif
+
+/* ------------- end of cgroups-support interface ------------- */
+
+/* - interface of the internal hierarchical B-WF2Q+ scheduler - */
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+/* both next loops stop at one of the child entities of the root group */
+#define for_each_entity(entity)        \
+       for (; entity ; entity = entity->parent)
+
+/*
+ * For each iteration, compute parent in advance, so as to be safe if
+ * entity is deallocated during the iteration. Such a deallocation may
+ * happen as a consequence of a bfq_put_queue that frees the bfq_queue
+ * containing entity.
+ */
+#define for_each_entity_safe(entity, parent) \
+       for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
+
+#else /* CONFIG_BFQ_GROUP_IOSCHED */
+/*
+ * Next two macros are fake loops when cgroups support is not
+ * enabled. I fact, in such a case, there is only one level to go up
+ * (to reach the root group).
+ */
+#define for_each_entity(entity)        \
+       for (; entity ; entity = NULL)
+
+#define for_each_entity_safe(entity, parent) \
+       for (parent = NULL; entity ; entity = parent)
+#endif /* CONFIG_BFQ_GROUP_IOSCHED */
+
+struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq);
+struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
+struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity);
+struct bfq_entity *bfq_entity_of(struct rb_node *node);
+unsigned short bfq_ioprio_to_weight(int ioprio);
+void bfq_put_idle_entity(struct bfq_service_tree *st,
+                        struct bfq_entity *entity);
+struct bfq_service_tree *
+__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
+                               struct bfq_entity *entity);
+void bfq_bfqq_served(struct bfq_queue *bfqq, int served);
+void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                         unsigned long time_ms);
+bool __bfq_deactivate_entity(struct bfq_entity *entity,
+                            bool ins_into_idle_tree);
+bool next_queue_may_preempt(struct bfq_data *bfqd);
+struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd);
+void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
+void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                        bool ins_into_idle_tree, bool expiration);
+void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                      bool expiration);
+void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+
+/* --------------- end of interface of B-WF2Q+ ---------------- */
+
+/* Logging facilities. */
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do {                    \
+       char __pbuf[128];                                               \
+                                                                       \
+       blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \
+       blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid, \
+                       bfq_bfqq_sync((bfqq)) ? 'S' : 'A',              \
+                         __pbuf, ##args);                              \
+} while (0)
+
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {                    \
+       char __pbuf[128];                                               \
+                                                                       \
+       blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf));          \
+       blk_add_trace_msg((bfqd)->queue, "%s " fmt, __pbuf, ##args);    \
+} while (0)
+
+#else /* CONFIG_BFQ_GROUP_IOSCHED */
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
+       blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid,   \
+                       bfq_bfqq_sync((bfqq)) ? 'S' : 'A',              \
+                               ##args)
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)         do {} while (0)
+
+#endif /* CONFIG_BFQ_GROUP_IOSCHED */
+
+#define bfq_log(bfqd, fmt, args...) \
+       blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
+
+#endif /* _BFQ_H */
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c

new file mode 100644 (file)

index 0000000..b4fc3e4
--- /dev/null
+++ b/block/bfq-wf2q.c
@@ -0,0 +1,1616 @@
+/*
+ * Hierarchical Budget Worst-case Fair Weighted Fair Queueing
+ * (B-WF2Q+): hierarchical scheduling algorithm by which the BFQ I/O
+ * scheduler schedules generic entities. The latter can represent
+ * either single bfq queues (associated with processes) or groups of
+ * bfq queues (associated with cgroups).
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation; either version 2 of the
+ *  License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ */
+#include "bfq-iosched.h"
+
+/**
+ * bfq_gt - compare two timestamps.
+ * @a: first ts.
+ * @b: second ts.
+ *
+ * Return @a > @b, dealing with wrapping correctly.
+ */
+static int bfq_gt(u64 a, u64 b)
+{
+       return (s64)(a - b) > 0;
+}
+
+static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
+{
+       struct rb_node *node = tree->rb_node;
+
+       return rb_entry(node, struct bfq_entity, rb_node);
+}
+
+static unsigned int bfq_class_idx(struct bfq_entity *entity)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+       return bfqq ? bfqq->ioprio_class - 1 :
+               BFQ_DEFAULT_GRP_CLASS - 1;
+}
+
+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd);
+
+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service);
+
+/**
+ * bfq_update_next_in_service - update sd->next_in_service
+ * @sd: sched_data for which to perform the update.
+ * @new_entity: if not NULL, pointer to the entity whose activation,
+ *             requeueing or repositionig triggered the invocation of
+ *             this function.
+ *
+ * This function is called to update sd->next_in_service, which, in
+ * its turn, may change as a consequence of the insertion or
+ * extraction of an entity into/from one of the active trees of
+ * sd. These insertions/extractions occur as a consequence of
+ * activations/deactivations of entities, with some activations being
+ * 'true' activations, and other activations being requeueings (i.e.,
+ * implementing the second, requeueing phase of the mechanism used to
+ * reposition an entity in its active tree; see comments on
+ * __bfq_activate_entity and __bfq_requeue_entity for details). In
+ * both the last two activation sub-cases, new_entity points to the
+ * just activated or requeued entity.
+ *
+ * Returns true if sd->next_in_service changes in such a way that
+ * entity->parent may become the next_in_service for its parent
+ * entity.
+ */
+static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
+                                      struct bfq_entity *new_entity)
+{
+       struct bfq_entity *next_in_service = sd->next_in_service;
+       bool parent_sched_may_change = false;
+
+       /*
+        * If this update is triggered by the activation, requeueing
+        * or repositiong of an entity that does not coincide with
+        * sd->next_in_service, then a full lookup in the active tree
+        * can be avoided. In fact, it is enough to check whether the
+        * just-modified entity has a higher priority than
+        * sd->next_in_service, or, even if it has the same priority
+        * as sd->next_in_service, is eligible and has a lower virtual
+        * finish time than sd->next_in_service. If this compound
+        * condition holds, then the new entity becomes the new
+        * next_in_service. Otherwise no change is needed.
+        */
+       if (new_entity && new_entity != sd->next_in_service) {
+               /*
+                * Flag used to decide whether to replace
+                * sd->next_in_service with new_entity. Tentatively
+                * set to true, and left as true if
+                * sd->next_in_service is NULL.
+                */
+               bool replace_next = true;
+
+               /*
+                * If there is already a next_in_service candidate
+                * entity, then compare class priorities or timestamps
+                * to decide whether to replace sd->service_tree with
+                * new_entity.
+                */
+               if (next_in_service) {
+                       unsigned int new_entity_class_idx =
+                               bfq_class_idx(new_entity);
+                       struct bfq_service_tree *st =
+                               sd->service_tree + new_entity_class_idx;
+
+                       /*
+                        * For efficiency, evaluate the most likely
+                        * sub-condition first.
+                        */
+                       replace_next =
+                               (new_entity_class_idx ==
+                                bfq_class_idx(next_in_service)
+                                &&
+                                !bfq_gt(new_entity->start, st->vtime)
+                                &&
+                                bfq_gt(next_in_service->finish,
+                                       new_entity->finish))
+                               ||
+                               new_entity_class_idx <
+                               bfq_class_idx(next_in_service);
+               }
+
+               if (replace_next)
+                       next_in_service = new_entity;
+       } else /* invoked because of a deactivation: lookup needed */
+               next_in_service = bfq_lookup_next_entity(sd);
+
+       if (next_in_service) {
+               parent_sched_may_change = !sd->next_in_service ||
+                       bfq_update_parent_budget(next_in_service);
+       }
+
+       sd->next_in_service = next_in_service;
+
+       if (!next_in_service)
+               return parent_sched_may_change;
+
+       return parent_sched_may_change;
+}
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+
+struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
+{
+       struct bfq_entity *group_entity = bfqq->entity.parent;
+
+       if (!group_entity)
+               group_entity = &bfqq->bfqd->root_group->entity;
+
+       return container_of(group_entity, struct bfq_group, entity);
+}
+
+/*
+ * Returns true if this budget changes may let next_in_service->parent
+ * become the next_in_service entity for its parent entity.
+ */
+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
+{
+       struct bfq_entity *bfqg_entity;
+       struct bfq_group *bfqg;
+       struct bfq_sched_data *group_sd;
+       bool ret = false;
+
+       group_sd = next_in_service->sched_data;
+
+       bfqg = container_of(group_sd, struct bfq_group, sched_data);
+       /*
+        * bfq_group's my_entity field is not NULL only if the group
+        * is not the root group. We must not touch the root entity
+        * as it must never become an in-service entity.
+        */
+       bfqg_entity = bfqg->my_entity;
+       if (bfqg_entity) {
+               if (bfqg_entity->budget > next_in_service->budget)
+                       ret = true;
+               bfqg_entity->budget = next_in_service->budget;
+       }
+
+       return ret;
+}
+
+/*
+ * This function tells whether entity stops being a candidate for next
+ * service, according to the following logic.
+ *
+ * This function is invoked for an entity that is about to be set in
+ * service. If such an entity is a queue, then the entity is no longer
+ * a candidate for next service (i.e, a candidate entity to serve
+ * after the in-service entity is expired). The function then returns
+ * true.
+ *
+ * In contrast, the entity could stil be a candidate for next service
+ * if it is not a queue, and has more than one child. In fact, even if
+ * one of its children is about to be set in service, other children
+ * may still be the next to serve. As a consequence, a non-queue
+ * entity is not a candidate for next-service only if it has only one
+ * child. And only if this condition holds, then the function returns
+ * true for a non-queue entity.
+ */
+static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
+{
+       struct bfq_group *bfqg;
+
+       if (bfq_entity_to_bfqq(entity))
+               return true;
+
+       bfqg = container_of(entity, struct bfq_group, entity);
+
+       if (bfqg->active_entities == 1)
+               return true;
+
+       return false;
+}
+
+#else /* CONFIG_BFQ_GROUP_IOSCHED */
+
+struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
+{
+       return bfqq->bfqd->root_group;
+}
+
+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
+{
+       return false;
+}
+
+static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
+{
+       return true;
+}
+
+#endif /* CONFIG_BFQ_GROUP_IOSCHED */
+
+/*
+ * Shift for timestamp calculations.  This actually limits the maximum
+ * service allowed in one timestamp delta (small shift values increase it),
+ * the maximum total weight that can be used for the queues in the system
+ * (big shift values increase it), and the period of virtual time
+ * wraparounds.
+ */
+#define WFQ_SERVICE_SHIFT      22
+
+struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
+{
+       struct bfq_queue *bfqq = NULL;
+
+       if (!entity->my_sched_data)
+               bfqq = container_of(entity, struct bfq_queue, entity);
+
+       return bfqq;
+}
+
+
+/**
+ * bfq_delta - map service into the virtual time domain.
+ * @service: amount of service.
+ * @weight: scale factor (weight of an entity or weight sum).
+ */
+static u64 bfq_delta(unsigned long service, unsigned long weight)
+{
+       u64 d = (u64)service << WFQ_SERVICE_SHIFT;
+
+       do_div(d, weight);
+       return d;
+}
+
+/**
+ * bfq_calc_finish - assign the finish time to an entity.
+ * @entity: the entity to act upon.
+ * @service: the service to be charged to the entity.
+ */
+static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+       entity->finish = entity->start +
+               bfq_delta(service, entity->weight);
+
+       if (bfqq) {
+               bfq_log_bfqq(bfqq->bfqd, bfqq,
+                       "calc_finish: serv %lu, w %d",
+                       service, entity->weight);
+               bfq_log_bfqq(bfqq->bfqd, bfqq,
+                       "calc_finish: start %llu, finish %llu, delta %llu",
+                       entity->start, entity->finish,
+                       bfq_delta(service, entity->weight));
+       }
+}
+
+/**
+ * bfq_entity_of - get an entity from a node.
+ * @node: the node field of the entity.
+ *
+ * Convert a node pointer to the relative entity.  This is used only
+ * to simplify the logic of some functions and not as the generic
+ * conversion mechanism because, e.g., in the tree walking functions,
+ * the check for a %NULL value would be redundant.
+ */
+struct bfq_entity *bfq_entity_of(struct rb_node *node)
+{
+       struct bfq_entity *entity = NULL;
+
+       if (node)
+               entity = rb_entry(node, struct bfq_entity, rb_node);
+
+       return entity;
+}
+
+/**
+ * bfq_extract - remove an entity from a tree.
+ * @root: the tree root.
+ * @entity: the entity to remove.
+ */
+static void bfq_extract(struct rb_root *root, struct bfq_entity *entity)
+{
+       entity->tree = NULL;
+       rb_erase(&entity->rb_node, root);
+}
+
+/**
+ * bfq_idle_extract - extract an entity from the idle tree.
+ * @st: the service tree of the owning @entity.
+ * @entity: the entity being removed.
+ */
+static void bfq_idle_extract(struct bfq_service_tree *st,
+                            struct bfq_entity *entity)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+       struct rb_node *next;
+
+       if (entity == st->first_idle) {
+               next = rb_next(&entity->rb_node);
+               st->first_idle = bfq_entity_of(next);
+       }
+
+       if (entity == st->last_idle) {
+               next = rb_prev(&entity->rb_node);
+               st->last_idle = bfq_entity_of(next);
+       }
+
+       bfq_extract(&st->idle, entity);
+
+       if (bfqq)
+               list_del(&bfqq->bfqq_list);
+}
+
+/**
+ * bfq_insert - generic tree insertion.
+ * @root: tree root.
+ * @entity: entity to insert.
+ *
+ * This is used for the idle and the active tree, since they are both
+ * ordered by finish time.
+ */
+static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
+{
+       struct bfq_entity *entry;
+       struct rb_node **node = &root->rb_node;
+       struct rb_node *parent = NULL;
+
+       while (*node) {
+               parent = *node;
+               entry = rb_entry(parent, struct bfq_entity, rb_node);
+
+               if (bfq_gt(entry->finish, entity->finish))
+                       node = &parent->rb_left;
+               else
+                       node = &parent->rb_right;
+       }
+
+       rb_link_node(&entity->rb_node, parent, node);
+       rb_insert_color(&entity->rb_node, root);
+
+       entity->tree = root;
+}
+
+/**
+ * bfq_update_min - update the min_start field of a entity.
+ * @entity: the entity to update.
+ * @node: one of its children.
+ *
+ * This function is called when @entity may store an invalid value for
+ * min_start due to updates to the active tree.  The function  assumes
+ * that the subtree rooted at @node (which may be its left or its right
+ * child) has a valid min_start value.
+ */
+static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node)
+{
+       struct bfq_entity *child;
+
+       if (node) {
+               child = rb_entry(node, struct bfq_entity, rb_node);
+               if (bfq_gt(entity->min_start, child->min_start))
+                       entity->min_start = child->min_start;
+       }
+}
+
+/**
+ * bfq_update_active_node - recalculate min_start.
+ * @node: the node to update.
+ *
+ * @node may have changed position or one of its children may have moved,
+ * this function updates its min_start value.  The left and right subtrees
+ * are assumed to hold a correct min_start value.
+ */
+static void bfq_update_active_node(struct rb_node *node)
+{
+       struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
+
+       entity->min_start = entity->start;
+       bfq_update_min(entity, node->rb_right);
+       bfq_update_min(entity, node->rb_left);
+}
+
+/**
+ * bfq_update_active_tree - update min_start for the whole active tree.
+ * @node: the starting node.
+ *
+ * @node must be the deepest modified node after an update.  This function
+ * updates its min_start using the values held by its children, assuming
+ * that they did not change, and then updates all the nodes that may have
+ * changed in the path to the root.  The only nodes that may have changed
+ * are the ones in the path or their siblings.
+ */
+static void bfq_update_active_tree(struct rb_node *node)
+{
+       struct rb_node *parent;
+
+up:
+       bfq_update_active_node(node);
+
+       parent = rb_parent(node);
+       if (!parent)
+               return;
+
+       if (node == parent->rb_left && parent->rb_right)
+               bfq_update_active_node(parent->rb_right);
+       else if (parent->rb_left)
+               bfq_update_active_node(parent->rb_left);
+
+       node = parent;
+       goto up;
+}
+
+/**
+ * bfq_active_insert - insert an entity in the active tree of its
+ *                     group/device.
+ * @st: the service tree of the entity.
+ * @entity: the entity being inserted.
+ *
+ * The active tree is ordered by finish time, but an extra key is kept
+ * per each node, containing the minimum value for the start times of
+ * its children (and the node itself), so it's possible to search for
+ * the eligible node with the lowest finish time in logarithmic time.
+ */
+static void bfq_active_insert(struct bfq_service_tree *st,
+                             struct bfq_entity *entity)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+       struct rb_node *node = &entity->rb_node;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       struct bfq_sched_data *sd = NULL;
+       struct bfq_group *bfqg = NULL;
+       struct bfq_data *bfqd = NULL;
+#endif
+
+       bfq_insert(&st->active, entity);
+
+       if (node->rb_left)
+               node = node->rb_left;
+       else if (node->rb_right)
+               node = node->rb_right;
+
+       bfq_update_active_tree(node);
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       sd = entity->sched_data;
+       bfqg = container_of(sd, struct bfq_group, sched_data);
+       bfqd = (struct bfq_data *)bfqg->bfqd;
+#endif
+       if (bfqq)
+               list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       else /* bfq_group */
+               bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree);
+
+       if (bfqg != bfqd->root_group)
+               bfqg->active_entities++;
+#endif
+}
+
+/**
+ * bfq_ioprio_to_weight - calc a weight from an ioprio.
+ * @ioprio: the ioprio value to convert.
+ */
+unsigned short bfq_ioprio_to_weight(int ioprio)
+{
+       return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF;
+}
+
+/**
+ * bfq_weight_to_ioprio - calc an ioprio from a weight.
+ * @weight: the weight value to convert.
+ *
+ * To preserve as much as possible the old only-ioprio user interface,
+ * 0 is used as an escape ioprio value for weights (numerically) equal or
+ * larger than IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF.
+ */
+static unsigned short bfq_weight_to_ioprio(int weight)
+{
+       return max_t(int, 0,
+                    IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight);
+}
+
+static void bfq_get_entity(struct bfq_entity *entity)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+       if (bfqq) {
+               bfqq->ref++;
+               bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
+                            bfqq, bfqq->ref);
+       }
+}
+
+/**
+ * bfq_find_deepest - find the deepest node that an extraction can modify.
+ * @node: the node being removed.
+ *
+ * Do the first step of an extraction in an rb tree, looking for the
+ * node that will replace @node, and returning the deepest node that
+ * the following modifications to the tree can touch.  If @node is the
+ * last node in the tree return %NULL.
+ */
+static struct rb_node *bfq_find_deepest(struct rb_node *node)
+{
+       struct rb_node *deepest;
+
+       if (!node->rb_right && !node->rb_left)
+               deepest = rb_parent(node);
+       else if (!node->rb_right)
+               deepest = node->rb_left;
+       else if (!node->rb_left)
+               deepest = node->rb_right;
+       else {
+               deepest = rb_next(node);
+               if (deepest->rb_right)
+                       deepest = deepest->rb_right;
+               else if (rb_parent(deepest) != node)
+                       deepest = rb_parent(deepest);
+       }
+
+       return deepest;
+}
+
+/**
+ * bfq_active_extract - remove an entity from the active tree.
+ * @st: the service_tree containing the tree.
+ * @entity: the entity being removed.
+ */
+static void bfq_active_extract(struct bfq_service_tree *st,
+                              struct bfq_entity *entity)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+       struct rb_node *node;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       struct bfq_sched_data *sd = NULL;
+       struct bfq_group *bfqg = NULL;
+       struct bfq_data *bfqd = NULL;
+#endif
+
+       node = bfq_find_deepest(&entity->rb_node);
+       bfq_extract(&st->active, entity);
+
+       if (node)
+               bfq_update_active_tree(node);
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       sd = entity->sched_data;
+       bfqg = container_of(sd, struct bfq_group, sched_data);
+       bfqd = (struct bfq_data *)bfqg->bfqd;
+#endif
+       if (bfqq)
+               list_del(&bfqq->bfqq_list);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+       else /* bfq_group */
+               bfq_weights_tree_remove(bfqd, entity,
+                                       &bfqd->group_weights_tree);
+
+       if (bfqg != bfqd->root_group)
+               bfqg->active_entities--;
+#endif
+}
+
+/**
+ * bfq_idle_insert - insert an entity into the idle tree.
+ * @st: the service tree containing the tree.
+ * @entity: the entity to insert.
+ */
+static void bfq_idle_insert(struct bfq_service_tree *st,
+                           struct bfq_entity *entity)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+       struct bfq_entity *first_idle = st->first_idle;
+       struct bfq_entity *last_idle = st->last_idle;
+
+       if (!first_idle || bfq_gt(first_idle->finish, entity->finish))
+               st->first_idle = entity;
+       if (!last_idle || bfq_gt(entity->finish, last_idle->finish))
+               st->last_idle = entity;
+
+       bfq_insert(&st->idle, entity);
+
+       if (bfqq)
+               list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
+}
+
+/**
+ * bfq_forget_entity - do not consider entity any longer for scheduling
+ * @st: the service tree.
+ * @entity: the entity being removed.
+ * @is_in_service: true if entity is currently the in-service entity.
+ *
+ * Forget everything about @entity. In addition, if entity represents
+ * a queue, and the latter is not in service, then release the service
+ * reference to the queue (the one taken through bfq_get_entity). In
+ * fact, in this case, there is really no more service reference to
+ * the queue, as the latter is also outside any service tree. If,
+ * instead, the queue is in service, then __bfq_bfqd_reset_in_service
+ * will take care of putting the reference when the queue finally
+ * stops being served.
+ */
+static void bfq_forget_entity(struct bfq_service_tree *st,
+                             struct bfq_entity *entity,
+                             bool is_in_service)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+       entity->on_st = false;
+       st->wsum -= entity->weight;
+       if (bfqq && !is_in_service)
+               bfq_put_queue(bfqq);
+}
+
+/**
+ * bfq_put_idle_entity - release the idle tree ref of an entity.
+ * @st: service tree for the entity.
+ * @entity: the entity being released.
+ */
+void bfq_put_idle_entity(struct bfq_service_tree *st, struct bfq_entity *entity)
+{
+       bfq_idle_extract(st, entity);
+       bfq_forget_entity(st, entity,
+                         entity == entity->sched_data->in_service_entity);
+}
+
+/**
+ * bfq_forget_idle - update the idle tree if necessary.
+ * @st: the service tree to act upon.
+ *
+ * To preserve the global O(log N) complexity we only remove one entry here;
+ * as the idle tree will not grow indefinitely this can be done safely.
+ */
+static void bfq_forget_idle(struct bfq_service_tree *st)
+{
+       struct bfq_entity *first_idle = st->first_idle;
+       struct bfq_entity *last_idle = st->last_idle;
+
+       if (RB_EMPTY_ROOT(&st->active) && last_idle &&
+           !bfq_gt(last_idle->finish, st->vtime)) {
+               /*
+                * Forget the whole idle tree, increasing the vtime past
+                * the last finish time of idle entities.
+                */
+               st->vtime = last_idle->finish;
+       }
+
+       if (first_idle && !bfq_gt(first_idle->finish, st->vtime))
+               bfq_put_idle_entity(st, first_idle);
+}
+
+struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity)
+{
+       struct bfq_sched_data *sched_data = entity->sched_data;
+       unsigned int idx = bfq_class_idx(entity);
+
+       return sched_data->service_tree + idx;
+}
+
+
+struct bfq_service_tree *
+__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
+                               struct bfq_entity *entity)
+{
+       struct bfq_service_tree *new_st = old_st;
+
+       if (entity->prio_changed) {
+               struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+               unsigned int prev_weight, new_weight;
+               struct bfq_data *bfqd = NULL;
+               struct rb_root *root;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+               struct bfq_sched_data *sd;
+               struct bfq_group *bfqg;
+#endif
+
+               if (bfqq)
+                       bfqd = bfqq->bfqd;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+               else {
+                       sd = entity->my_sched_data;
+                       bfqg = container_of(sd, struct bfq_group, sched_data);
+                       bfqd = (struct bfq_data *)bfqg->bfqd;
+               }
+#endif
+
+               old_st->wsum -= entity->weight;
+
+               if (entity->new_weight != entity->orig_weight) {
+                       if (entity->new_weight < BFQ_MIN_WEIGHT ||
+                           entity->new_weight > BFQ_MAX_WEIGHT) {
+                               pr_crit("update_weight_prio: new_weight %d\n",
+                                       entity->new_weight);
+                               if (entity->new_weight < BFQ_MIN_WEIGHT)
+                                       entity->new_weight = BFQ_MIN_WEIGHT;
+                               else
+                                       entity->new_weight = BFQ_MAX_WEIGHT;
+                       }
+                       entity->orig_weight = entity->new_weight;
+                       if (bfqq)
+                               bfqq->ioprio =
+                                 bfq_weight_to_ioprio(entity->orig_weight);
+               }
+
+               if (bfqq)
+                       bfqq->ioprio_class = bfqq->new_ioprio_class;
+               entity->prio_changed = 0;
+
+               /*
+                * NOTE: here we may be changing the weight too early,
+                * this will cause unfairness.  The correct approach
+                * would have required additional complexity to defer
+                * weight changes to the proper time instants (i.e.,
+                * when entity->finish <= old_st->vtime).
+                */
+               new_st = bfq_entity_service_tree(entity);
+
+               prev_weight = entity->weight;
+               new_weight = entity->orig_weight *
+                            (bfqq ? bfqq->wr_coeff : 1);
+               /*
+                * If the weight of the entity changes, remove the entity
+                * from its old weight counter (if there is a counter
+                * associated with the entity), and add it to the counter
+                * associated with its new weight.
+                */
+               if (prev_weight != new_weight) {
+                       root = bfqq ? &bfqd->queue_weights_tree :
+                                     &bfqd->group_weights_tree;
+                       bfq_weights_tree_remove(bfqd, entity, root);
+               }
+               entity->weight = new_weight;
+               /*
+                * Add the entity to its weights tree only if it is
+                * not associated with a weight-raised queue.
+                */
+               if (prev_weight != new_weight &&
+                   (bfqq ? bfqq->wr_coeff == 1 : 1))
+                       /* If we get here, root has been initialized. */
+                       bfq_weights_tree_add(bfqd, entity, root);
+
+               new_st->wsum += entity->weight;
+
+               if (new_st != old_st)
+                       entity->start = new_st->vtime;
+       }
+
+       return new_st;
+}
+
+/**
+ * bfq_bfqq_served - update the scheduler status after selection for
+ *                   service.
+ * @bfqq: the queue being served.
+ * @served: bytes to transfer.
+ *
+ * NOTE: this can be optimized, as the timestamps of upper level entities
+ * are synchronized every time a new bfqq is selected for service.  By now,
+ * we keep it to better check consistency.
+ */
+void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
+{
+       struct bfq_entity *entity = &bfqq->entity;
+       struct bfq_service_tree *st;
+
+       for_each_entity(entity) {
+               st = bfq_entity_service_tree(entity);
+
+               entity->service += served;
+
+               st->vtime += bfq_delta(served, st->wsum);
+               bfq_forget_idle(st);
+       }
+       bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
+       bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);
+}
+
+/**
+ * bfq_bfqq_charge_time - charge an amount of service equivalent to the length
+ *                       of the time interval during which bfqq has been in
+ *                       service.
+ * @bfqd: the device
+ * @bfqq: the queue that needs a service update.
+ * @time_ms: the amount of time during which the queue has received service
+ *
+ * If a queue does not consume its budget fast enough, then providing
+ * the queue with service fairness may impair throughput, more or less
+ * severely. For this reason, queues that consume their budget slowly
+ * are provided with time fairness instead of service fairness. This
+ * goal is achieved through the BFQ scheduling engine, even if such an
+ * engine works in the service, and not in the time domain. The trick
+ * is charging these queues with an inflated amount of service, equal
+ * to the amount of service that they would have received during their
+ * service slot if they had been fast, i.e., if their requests had
+ * been dispatched at a rate equal to the estimated peak rate.
+ *
+ * It is worth noting that time fairness can cause important
+ * distortions in terms of bandwidth distribution, on devices with
+ * internal queueing. The reason is that I/O requests dispatched
+ * during the service slot of a queue may be served after that service
+ * slot is finished, and may have a total processing time loosely
+ * correlated with the duration of the service slot. This is
+ * especially true for short service slots.
+ */
+void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                         unsigned long time_ms)
+{
+       struct bfq_entity *entity = &bfqq->entity;
+       int tot_serv_to_charge = entity->service;
+       unsigned int timeout_ms = jiffies_to_msecs(bfq_timeout);
+
+       if (time_ms > 0 && time_ms < timeout_ms)
+               tot_serv_to_charge =
+                       (bfqd->bfq_max_budget * time_ms) / timeout_ms;
+
+       if (tot_serv_to_charge < entity->service)
+               tot_serv_to_charge = entity->service;
+
+       /* Increase budget to avoid inconsistencies */
+       if (tot_serv_to_charge > entity->budget)
+               entity->budget = tot_serv_to_charge;
+
+       bfq_bfqq_served(bfqq,
+                       max_t(int, 0, tot_serv_to_charge - entity->service));
+}
+
+static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
+                                       struct bfq_service_tree *st,
+                                       bool backshifted)
+{
+       struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+       st = __bfq_entity_update_weight_prio(st, entity);
+       bfq_calc_finish(entity, entity->budget);
+
+       /*
+        * If some queues enjoy backshifting for a while, then their
+        * (virtual) finish timestamps may happen to become lower and
+        * lower than the system virtual time.  In particular, if
+        * these queues often happen to be idle for short time
+        * periods, and during such time periods other queues with
+        * higher timestamps happen to be busy, then the backshifted
+        * timestamps of the former queues can become much lower than
+        * the system virtual time. In fact, to serve the queues with
+        * higher timestamps while the ones with lower timestamps are
+        * idle, the system virtual time may be pushed-up to much
+        * higher values than the finish timestamps of the idle
+        * queues. As a consequence, the finish timestamps of all new
+        * or newly activated queues may end up being much larger than
+        * those of lucky queues with backshifted timestamps. The
+        * latter queues may then monopolize the device for a lot of
+        * time. This would simply break service guarantees.
+        *
+        * To reduce this problem, push up a little bit the
+        * backshifted timestamps of the queue associated with this
+        * entity (only a queue can happen to have the backshifted
+        * flag set): just enough to let the finish timestamp of the
+        * queue be equal to the current value of the system virtual
+        * time. This may introduce a little unfairness among queues
+        * with backshifted timestamps, but it does not break
+        * worst-case fairness guarantees.
+        *
+        * As a special case, if bfqq is weight-raised, push up
+        * timestamps much less, to keep very low the probability that
+        * this push up causes the backshifted finish timestamps of
+        * weight-raised queues to become higher than the backshifted
+        * finish timestamps of non weight-raised queues.
+        */
+       if (backshifted && bfq_gt(st->vtime, entity->finish)) {
+               unsigned long delta = st->vtime - entity->finish;
+
+               if (bfqq)
+                       delta /= bfqq->wr_coeff;
+
+               entity->start += delta;
+               entity->finish += delta;
+       }
+
+       bfq_active_insert(st, entity);
+}
+
+/**
+ * __bfq_activate_entity - handle activation of entity.
+ * @entity: the entity being activated.
+ * @non_blocking_wait_rq: true if entity was waiting for a request
+ *
+ * Called for a 'true' activation, i.e., if entity is not active and
+ * one of its children receives a new request.
+ *
+ * Basically, this function updates the timestamps of entity and
+ * inserts entity into its active tree, ater possible extracting it
+ * from its idle tree.
+ */
+static void __bfq_activate_entity(struct bfq_entity *entity,
+                                 bool non_blocking_wait_rq)
+{
+       struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+       bool backshifted = false;
+       unsigned long long min_vstart;
+
+       /* See comments on bfq_fqq_update_budg_for_activation */
+       if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) {
+               backshifted = true;
+               min_vstart = entity->finish;
+       } else
+               min_vstart = st->vtime;
+
+       if (entity->tree == &st->idle) {
+               /*
+                * Must be on the idle tree, bfq_idle_extract() will
+                * check for that.
+                */
+               bfq_idle_extract(st, entity);
+               entity->start = bfq_gt(min_vstart, entity->finish) ?
+                       min_vstart : entity->finish;
+       } else {
+               /*
+                * The finish time of the entity may be invalid, and
+                * it is in the past for sure, otherwise the queue
+                * would have been on the idle tree.
+                */
+               entity->start = min_vstart;
+               st->wsum += entity->weight;
+               /*
+                * entity is about to be inserted into a service tree,
+                * and then set in service: get a reference to make
+                * sure entity does not disappear until it is no
+                * longer in service or scheduled for service.
+                */
+               bfq_get_entity(entity);
+
+               entity->on_st = true;
+       }
+
+       bfq_update_fin_time_enqueue(entity, st, backshifted);
+}
+
+/**
+ * __bfq_requeue_entity - handle requeueing or repositioning of an entity.
+ * @entity: the entity being requeued or repositioned.
+ *
+ * Requeueing is needed if this entity stops being served, which
+ * happens if a leaf descendant entity has expired. On the other hand,
+ * repositioning is needed if the next_inservice_entity for the child
+ * entity has changed. See the comments inside the function for
+ * details.
+ *
+ * Basically, this function: 1) removes entity from its active tree if
+ * present there, 2) updates the timestamps of entity and 3) inserts
+ * entity back into its active tree (in the new, right position for
+ * the new values of the timestamps).
+ */
+static void __bfq_requeue_entity(struct bfq_entity *entity)
+{
+       struct bfq_sched_data *sd = entity->sched_data;
+       struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+
+       if (entity == sd->in_service_entity) {
+               /*
+                * We are requeueing the current in-service entity,
+                * which may have to be done for one of the following
+                * reasons:
+                * - entity represents the in-service queue, and the
+                *   in-service queue is being requeued after an
+                *   expiration;
+                * - entity represents a group, and its budget has
+                *   changed because one of its child entities has
+                *   just been either activated or requeued for some
+                *   reason; the timestamps of the entity need then to
+                *   be updated, and the entity needs to be enqueued
+                *   or repositioned accordingly.
+                *
+                * In particular, before requeueing, the start time of
+                * the entity must be moved forward to account for the
+                * service that the entity has received while in
+                * service. This is done by the next instructions. The
+                * finish time will then be updated according to this
+                * new value of the start time, and to the budget of
+                * the entity.
+                */
+               bfq_calc_finish(entity, entity->service);
+               entity->start = entity->finish;
+               /*
+                * In addition, if the entity had more than one child
+                * when set in service, then was not extracted from
+                * the active tree. This implies that the position of
+                * the entity in the active tree may need to be
+                * changed now, because we have just updated the start
+                * time of the entity, and we will update its finish
+                * time in a moment (the requeueing is then, more
+                * precisely, a repositioning in this case). To
+                * implement this repositioning, we: 1) dequeue the
+                * entity here, 2) update the finish time and
+                * requeue the entity according to the new
+                * timestamps below.
+                */
+               if (entity->tree)
+                       bfq_active_extract(st, entity);
+       } else { /* The entity is already active, and not in service */
+               /*
+                * In this case, this function gets called only if the
+                * next_in_service entity below this entity has
+                * changed, and this change has caused the budget of
+                * this entity to change, which, finally implies that
+                * the finish time of this entity must be
+                * updated. Such an update may cause the scheduling,
+                * i.e., the position in the active tree, of this
+                * entity to change. We handle this change by: 1)
+                * dequeueing the entity here, 2) updating the finish
+                * time and requeueing the entity according to the new
+                * timestamps below. This is the same approach as the
+                * non-extracted-entity sub-case above.
+                */
+               bfq_active_extract(st, entity);
+       }
+
+       bfq_update_fin_time_enqueue(entity, st, false);
+}
+
+static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
+                                         struct bfq_sched_data *sd,
+                                         bool non_blocking_wait_rq)
+{
+       struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+
+       if (sd->in_service_entity == entity || entity->tree == &st->active)
+                /*
+                 * in service or already queued on the active tree,
+                 * requeue or reposition
+                 */
+               __bfq_requeue_entity(entity);
+       else
+               /*
+                * Not in service and not queued on its active tree:
+                * the activity is idle and this is a true activation.
+                */
+               __bfq_activate_entity(entity, non_blocking_wait_rq);
+}
+
+
+/**
+ * bfq_activate_entity - activate or requeue an entity representing a bfq_queue,
+ *                      and activate, requeue or reposition all ancestors
+ *                      for which such an update becomes necessary.
+ * @entity: the entity to activate.
+ * @non_blocking_wait_rq: true if this entity was waiting for a request
+ * @requeue: true if this is a requeue, which implies that bfqq is
+ *          being expired; thus ALL its ancestors stop being served and must
+ *          therefore be requeued
+ */
+static void bfq_activate_requeue_entity(struct bfq_entity *entity,
+                                       bool non_blocking_wait_rq,
+                                       bool requeue)
+{
+       struct bfq_sched_data *sd;
+
+       for_each_entity(entity) {
+               sd = entity->sched_data;
+               __bfq_activate_requeue_entity(entity, sd, non_blocking_wait_rq);
+
+               if (!bfq_update_next_in_service(sd, entity) && !requeue)
+                       break;
+       }
+}
+
+/**
+ * __bfq_deactivate_entity - deactivate an entity from its service tree.
+ * @entity: the entity to deactivate.
+ * @ins_into_idle_tree: if false, the entity will not be put into the
+ *                     idle tree.
+ *
+ * Deactivates an entity, independently from its previous state.  Must
+ * be invoked only if entity is on a service tree. Extracts the entity
+ * from that tree, and if necessary and allowed, puts it on the idle
+ * tree.
+ */
+bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
+{
+       struct bfq_sched_data *sd = entity->sched_data;
+       struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+       int is_in_service = entity == sd->in_service_entity;
+
+       if (!entity->on_st) /* entity never activated, or already inactive */
+               return false;
+
+       if (is_in_service)
+               bfq_calc_finish(entity, entity->service);
+
+       if (entity->tree == &st->active)
+               bfq_active_extract(st, entity);
+       else if (!is_in_service && entity->tree == &st->idle)
+               bfq_idle_extract(st, entity);
+
+       if (!ins_into_idle_tree || !bfq_gt(entity->finish, st->vtime))
+               bfq_forget_entity(st, entity, is_in_service);
+       else
+               bfq_idle_insert(st, entity);
+
+       return true;
+}
+
+/**
+ * bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
+ * @entity: the entity to deactivate.
+ * @ins_into_idle_tree: true if the entity can be put on the idle tree
+ */
+static void bfq_deactivate_entity(struct bfq_entity *entity,
+                                 bool ins_into_idle_tree,
+                                 bool expiration)
+{
+       struct bfq_sched_data *sd;
+       struct bfq_entity *parent = NULL;
+
+       for_each_entity_safe(entity, parent) {
+               sd = entity->sched_data;
+
+               if (!__bfq_deactivate_entity(entity, ins_into_idle_tree)) {
+                       /*
+                        * entity is not in any tree any more, so
+                        * this deactivation is a no-op, and there is
+                        * nothing to change for upper-level entities
+                        * (in case of expiration, this can never
+                        * happen).
+                        */
+                       return;
+               }
+
+               if (sd->next_in_service == entity)
+                       /*
+                        * entity was the next_in_service entity,
+                        * then, since entity has just been
+                        * deactivated, a new one must be found.
+                        */
+                       bfq_update_next_in_service(sd, NULL);
+
+               if (sd->next_in_service)
+                       /*
+                        * The parent entity is still backlogged,
+                        * because next_in_service is not NULL. So, no
+                        * further upwards deactivation must be
+                        * performed.  Yet, next_in_service has
+                        * changed.  Then the schedule does need to be
+                        * updated upwards.
+                        */
+                       break;
+
+               /*
+                * If we get here, then the parent is no more
+                * backlogged and we need to propagate the
+                * deactivation upwards. Thus let the loop go on.
+                */
+
+               /*
+                * Also let parent be queued into the idle tree on
+                * deactivation, to preserve service guarantees, and
+                * assuming that who invoked this function does not
+                * need parent entities too to be removed completely.
+                */
+               ins_into_idle_tree = true;
+       }
+
+       /*
+        * If the deactivation loop is fully executed, then there are
+        * no more entities to touch and next loop is not executed at
+        * all. Otherwise, requeue remaining entities if they are
+        * about to stop receiving service, or reposition them if this
+        * is not the case.
+        */
+       entity = parent;
+       for_each_entity(entity) {
+               /*
+                * Invoke __bfq_requeue_entity on entity, even if
+                * already active, to requeue/reposition it in the
+                * active tree (because sd->next_in_service has
+                * changed)
+                */
+               __bfq_requeue_entity(entity);
+
+               sd = entity->sched_data;
+               if (!bfq_update_next_in_service(sd, entity) &&
+                   !expiration)
+                       /*
+                        * next_in_service unchanged or not causing
+                        * any change in entity->parent->sd, and no
+                        * requeueing needed for expiration: stop
+                        * here.
+                        */
+                       break;
+       }
+}
+
+/**
+ * bfq_calc_vtime_jump - compute the value to which the vtime should jump,
+ *                       if needed, to have at least one entity eligible.
+ * @st: the service tree to act upon.
+ *
+ * Assumes that st is not empty.
+ */
+static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st)
+{
+       struct bfq_entity *root_entity = bfq_root_active_entity(&st->active);
+
+       if (bfq_gt(root_entity->min_start, st->vtime))
+               return root_entity->min_start;
+
+       return st->vtime;
+}
+
+static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value)
+{
+       if (new_value > st->vtime) {
+               st->vtime = new_value;
+               bfq_forget_idle(st);
+       }
+}
+
+/**
+ * bfq_first_active_entity - find the eligible entity with
+ *                           the smallest finish time
+ * @st: the service tree to select from.
+ * @vtime: the system virtual to use as a reference for eligibility
+ *
+ * This function searches the first schedulable entity, starting from the
+ * root of the tree and going on the left every time on this side there is
+ * a subtree with at least one eligible (start >= vtime) entity. The path on
+ * the right is followed only if a) the left subtree contains no eligible
+ * entities and b) no eligible entity has been found yet.
+ */
+static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st,
+                                                 u64 vtime)
+{
+       struct bfq_entity *entry, *first = NULL;
+       struct rb_node *node = st->active.rb_node;
+
+       while (node) {
+               entry = rb_entry(node, struct bfq_entity, rb_node);
+left:
+               if (!bfq_gt(entry->start, vtime))
+                       first = entry;
+
+               if (node->rb_left) {
+                       entry = rb_entry(node->rb_left,
+                                        struct bfq_entity, rb_node);
+                       if (!bfq_gt(entry->min_start, vtime)) {
+                               node = node->rb_left;
+                               goto left;
+                       }
+               }
+               if (first)
+                       break;
+               node = node->rb_right;
+       }
+
+       return first;
+}
+
+/**
+ * __bfq_lookup_next_entity - return the first eligible entity in @st.
+ * @st: the service tree.
+ *
+ * If there is no in-service entity for the sched_data st belongs to,
+ * then return the entity that will be set in service if:
+ * 1) the parent entity this st belongs to is set in service;
+ * 2) no entity belonging to such parent entity undergoes a state change
+ * that would influence the timestamps of the entity (e.g., becomes idle,
+ * becomes backlogged, changes its budget, ...).
+ *
+ * In this first case, update the virtual time in @st too (see the
+ * comments on this update inside the function).
+ *
+ * In constrast, if there is an in-service entity, then return the
+ * entity that would be set in service if not only the above
+ * conditions, but also the next one held true: the currently
+ * in-service entity, on expiration,
+ * 1) gets a finish time equal to the current one, or
+ * 2) is not eligible any more, or
+ * 3) is idle.
+ */
+static struct bfq_entity *
+__bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service)
+{
+       struct bfq_entity *entity;
+       u64 new_vtime;
+
+       if (RB_EMPTY_ROOT(&st->active))
+               return NULL;
+
+       /*
+        * Get the value of the system virtual time for which at
+        * least one entity is eligible.
+        */
+       new_vtime = bfq_calc_vtime_jump(st);
+
+       /*
+        * If there is no in-service entity for the sched_data this
+        * active tree belongs to, then push the system virtual time
+        * up to the value that guarantees that at least one entity is
+        * eligible. If, instead, there is an in-service entity, then
+        * do not make any such update, because there is already an
+        * eligible entity, namely the in-service one (even if the
+        * entity is not on st, because it was extracted when set in
+        * service).
+        */
+       if (!in_service)
+               bfq_update_vtime(st, new_vtime);
+
+       entity = bfq_first_active_entity(st, new_vtime);
+
+       return entity;
+}
+
+/**
+ * bfq_lookup_next_entity - return the first eligible entity in @sd.
+ * @sd: the sched_data.
+ *
+ * This function is invoked when there has been a change in the trees
+ * for sd, and we need know what is the new next entity after this
+ * change.
+ */
+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd)
+{
+       struct bfq_service_tree *st = sd->service_tree;
+       struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1);
+       struct bfq_entity *entity = NULL;
+       int class_idx = 0;
+
+       /*
+        * Choose from idle class, if needed to guarantee a minimum
+        * bandwidth to this class (and if there is some active entity
+        * in idle class). This should also mitigate
+        * priority-inversion problems in case a low priority task is
+        * holding file system resources.
+        */
+       if (time_is_before_jiffies(sd->bfq_class_idle_last_service +
+                                  BFQ_CL_IDLE_TIMEOUT)) {
+               if (!RB_EMPTY_ROOT(&idle_class_st->active))
+                       class_idx = BFQ_IOPRIO_CLASSES - 1;
+               /* About to be served if backlogged, or not yet backlogged */
+               sd->bfq_class_idle_last_service = jiffies;
+       }
+
+       /*
+        * Find the next entity to serve for the highest-priority
+        * class, unless the idle class needs to be served.
+        */
+       for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) {
+               entity = __bfq_lookup_next_entity(st + class_idx,
+                                                 sd->in_service_entity);
+
+               if (entity)
+                       break;
+       }
+
+       if (!entity)
+               return NULL;
+
+       return entity;
+}
+
+bool next_queue_may_preempt(struct bfq_data *bfqd)
+{
+       struct bfq_sched_data *sd = &bfqd->root_group->sched_data;
+
+       return sd->next_in_service != sd->in_service_entity;
+}
+
+/*
+ * Get next queue for service.
+ */
+struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+{
+       struct bfq_entity *entity = NULL;
+       struct bfq_sched_data *sd;
+       struct bfq_queue *bfqq;
+
+       if (bfqd->busy_queues == 0)
+               return NULL;
+
+       /*
+        * Traverse the path from the root to the leaf entity to
+        * serve. Set in service all the entities visited along the
+        * way.
+        */
+       sd = &bfqd->root_group->sched_data;
+       for (; sd ; sd = entity->my_sched_data) {
+               /*
+                * WARNING. We are about to set the in-service entity
+                * to sd->next_in_service, i.e., to the (cached) value
+                * returned by bfq_lookup_next_entity(sd) the last
+                * time it was invoked, i.e., the last time when the
+                * service order in sd changed as a consequence of the
+                * activation or deactivation of an entity. In this
+                * respect, if we execute bfq_lookup_next_entity(sd)
+                * in this very moment, it may, although with low
+                * probability, yield a different entity than that
+                * pointed to by sd->next_in_service. This rare event
+                * happens in case there was no CLASS_IDLE entity to
+                * serve for sd when bfq_lookup_next_entity(sd) was
+                * invoked for the last time, while there is now one
+                * such entity.
+                *
+                * If the above event happens, then the scheduling of
+                * such entity in CLASS_IDLE is postponed until the
+                * service of the sd->next_in_service entity
+                * finishes. In fact, when the latter is expired,
+                * bfq_lookup_next_entity(sd) gets called again,
+                * exactly to update sd->next_in_service.
+                */
+
+               /* Make next_in_service entity become in_service_entity */
+               entity = sd->next_in_service;
+               sd->in_service_entity = entity;
+
+               /*
+                * Reset the accumulator of the amount of service that
+                * the entity is about to receive.
+                */
+               entity->service = 0;
+
+               /*
+                * If entity is no longer a candidate for next
+                * service, then we extract it from its active tree,
+                * for the following reason. To further boost the
+                * throughput in some special case, BFQ needs to know
+                * which is the next candidate entity to serve, while
+                * there is already an entity in service. In this
+                * respect, to make it easy to compute/update the next
+                * candidate entity to serve after the current
+                * candidate has been set in service, there is a case
+                * where it is necessary to extract the current
+                * candidate from its service tree. Such a case is
+                * when the entity just set in service cannot be also
+                * a candidate for next service. Details about when
+                * this conditions holds are reported in the comments
+                * on the function bfq_no_longer_next_in_service()
+                * invoked below.
+                */
+               if (bfq_no_longer_next_in_service(entity))
+                       bfq_active_extract(bfq_entity_service_tree(entity),
+                                          entity);
+
+               /*
+                * For the same reason why we may have just extracted
+                * entity from its active tree, we may need to update
+                * next_in_service for the sched_data of entity too,
+                * regardless of whether entity has been extracted.
+                * In fact, even if entity has not been extracted, a
+                * descendant entity may get extracted. Such an event
+                * would cause a change in next_in_service for the
+                * level of the descendant entity, and thus possibly
+                * back to upper levels.
+                *
+                * We cannot perform the resulting needed update
+                * before the end of this loop, because, to know which
+                * is the correct next-to-serve candidate entity for
+                * each level, we need first to find the leaf entity
+                * to set in service. In fact, only after we know
+                * which is the next-to-serve leaf entity, we can
+                * discover whether the parent entity of the leaf
+                * entity becomes the next-to-serve, and so on.
+                */
+
+       }
+
+       bfqq = bfq_entity_to_bfqq(entity);
+
+       /*
+        * We can finally update all next-to-serve entities along the
+        * path from the leaf entity just set in service to the root.
+        */
+       for_each_entity(entity) {
+               struct bfq_sched_data *sd = entity->sched_data;
+
+               if (!bfq_update_next_in_service(sd, NULL))
+                       break;
+       }
+
+       return bfqq;
+}
+
+void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+{
+       struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
+       struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
+       struct bfq_entity *entity = in_serv_entity;
+
+       bfq_clear_bfqq_wait_request(in_serv_bfqq);
+       hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
+       bfqd->in_service_queue = NULL;
+
+       /*
+        * When this function is called, all in-service entities have
+        * been properly deactivated or requeued, so we can safely
+        * execute the final step: reset in_service_entity along the
+        * path from entity to the root.
+        */
+       for_each_entity(entity)
+               entity->sched_data->in_service_entity = NULL;
+
+       /*
+        * in_serv_entity is no longer in service, so, if it is in no
+        * service tree either, then release the service reference to
+        * the queue it represents (taken with bfq_get_entity).
+        */
+       if (!in_serv_entity->on_st)
+               bfq_put_queue(in_serv_bfqq);
+}
+
+void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                        bool ins_into_idle_tree, bool expiration)
+{
+       struct bfq_entity *entity = &bfqq->entity;
+
+       bfq_deactivate_entity(entity, ins_into_idle_tree, expiration);
+}
+
+void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+       struct bfq_entity *entity = &bfqq->entity;
+
+       bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq),
+                                   false);
+       bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
+}
+
+void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+       struct bfq_entity *entity = &bfqq->entity;
+
+       bfq_activate_requeue_entity(entity, false,
+                                   bfqq == bfqd->in_service_queue);
+}
+
+/*
+ * Called when the bfqq no longer has requests pending, remove it from
+ * the service tree. As a special case, it can be invoked during an
+ * expiration.
+ */
+void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+                      bool expiration)
+{
+       bfq_log_bfqq(bfqd, bfqq, "del from busy");
+
+       bfq_clear_bfqq_busy(bfqq);
+
+       bfqd->busy_queues--;
+
+       if (!bfqq->dispatched)
+               bfq_weights_tree_remove(bfqd, &bfqq->entity,
+                                       &bfqd->queue_weights_tree);
+
+       if (bfqq->wr_coeff > 1)
+               bfqd->wr_busy_queues--;
+
+       bfqg_stats_update_dequeue(bfqq_group(bfqq));
+
+       bfq_deactivate_bfqq(bfqd, bfqq, true, expiration);
+}
+
+/*
+ * Called when an inactive queue receives a new request.
+ */
+void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+       bfq_log_bfqq(bfqd, bfqq, "add to busy");
+
+       bfq_activate_bfqq(bfqd, bfqq);
+
+       bfq_mark_bfqq_busy(bfqq);
+       bfqd->busy_queues++;
+
+       if (!bfqq->dispatched)
+               if (bfqq->wr_coeff == 1)
+                       bfq_weights_tree_add(bfqd, &bfqq->entity,
+                                            &bfqd->queue_weights_tree);
+
+       if (bfqq->wr_coeff > 1)
+               bfqd->wr_busy_queues++;
+}
diff --git a/block/bio.c b/block/bio.c

index 5eec5e08417f6ff1989e3e2a07b31c62901953d5..f4d2071802663f0264711edad40b9126fa1f1799 100644 (file)
--- a/block/bio.c
+++ b/block/bio.c
@@ -30,6 +30,7 @@
  #include <linux/cgroup.h>
  
  #include <trace/events/block.h>
+#include "blk.h"
  
  /*
   * Test patch to inline a certain number of bi_io_vec's inside the bio
@@ -376,10 +377,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
         bio_list_init(&punt);
         bio_list_init(&nopunt);
  
-       while ((bio = bio_list_pop(current->bio_list)))
+       while ((bio = bio_list_pop(&current->bio_list[0])))
                 bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+       current->bio_list[0] = nopunt;
  
-       *current->bio_list = nopunt;
+       bio_list_init(&nopunt);
+       while ((bio = bio_list_pop(&current->bio_list[1])))
+               bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+       current->bio_list[1] = nopunt;
  
         spin_lock(&bs->rescue_lock);
         bio_list_merge(&bs->rescue_list, &punt);
@@ -423,7 +428,8 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
   *   RETURNS:
   *   Pointer to new bio on success, NULL on failure.
   */
-struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
+struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
+                            struct bio_set *bs)
  {
         gfp_t saved_gfp = gfp_mask;
         unsigned front_pad;
@@ -466,7 +472,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
                  * we retry with the original gfp_flags.
                  */
  
-               if (current->bio_list && !bio_list_empty(current->bio_list))
+               if (current->bio_list &&
+                   (!bio_list_empty(&current->bio_list[0]) ||
+                    !bio_list_empty(&current->bio_list[1])))
                         gfp_mask &= ~__GFP_DIRECT_RECLAIM;
  
                 p = mempool_alloc(bs->bio_pool, gfp_mask);
@@ -1818,6 +1826,11 @@ static inline bool bio_remaining_done(struct bio *bio)
   *   bio_endio() will end I/O on the whole bio. bio_endio() is the preferred
   *   way to end I/O on a bio. No one should call bi_end_io() directly on a
   *   bio unless they own it and thus know that it has an end_io function.
+ *
+ *   bio_endio() can be called several times on a bio that has been chained
+ *   using bio_chain().  The ->bi_end_io() function will only be called the
+ *   last time.  At this point the BLK_TA_COMPLETE tracing event will be
+ *   generated if BIO_TRACE_COMPLETION is set.
   **/
  void bio_endio(struct bio *bio)
  {
@@ -1838,6 +1851,13 @@ again:
                 goto again;
         }
  
+       if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
+               trace_block_bio_complete(bdev_get_queue(bio->bi_bdev),
+                                        bio, bio->bi_error);
+               bio_clear_flag(bio, BIO_TRACE_COMPLETION);
+       }
+
+       blk_throtl_bio_endio(bio);
         if (bio->bi_end_io)
                 bio->bi_end_io(bio);
  }
@@ -1876,6 +1896,9 @@ struct bio *bio_split(struct bio *bio, int sectors,
  
         bio_advance(bio, split->bi_iter.bi_size);
  
+       if (bio_flagged(bio, BIO_TRACE_COMPLETION))
+               bio_set_flag(bio, BIO_TRACE_COMPLETION);
+
         return split;
  }
  EXPORT_SYMBOL(bio_split);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c

index bbe7ee00bd3d70cf5a3a55b9c94eaf63af368729..7c2947128f5813a677a0361eddcd277b5946d03e 100644 (file)
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -772,6 +772,27 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
  }
  EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
  
+/* Performs queue bypass and policy enabled checks then looks up blkg. */
+static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
+                                         const struct blkcg_policy *pol,
+                                         struct request_queue *q)
+{
+       WARN_ON_ONCE(!rcu_read_lock_held());
+       lockdep_assert_held(q->queue_lock);
+
+       if (!blkcg_policy_enabled(q, pol))
+               return ERR_PTR(-EOPNOTSUPP);
+
+       /*
+        * This could be the first entry point of blkcg implementation and
+        * we shouldn't allow anything to go through for a bypassing queue.
+        */
+       if (unlikely(blk_queue_bypass(q)))
+               return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
+
+       return __blkg_lookup(blkcg, q, true /* update_hint */);
+}
+
  /**
   * blkg_conf_prep - parse and prepare for per-blkg config update
   * @blkcg: target block cgroup
@@ -789,6 +810,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
         __acquires(rcu) __acquires(disk->queue->queue_lock)
  {
         struct gendisk *disk;
+       struct request_queue *q;
         struct blkcg_gq *blkg;
         struct module *owner;
         unsigned int major, minor;
@@ -807,44 +829,95 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
         if (!disk)
                 return -ENODEV;
         if (part) {
-               owner = disk->fops->owner;
-               put_disk(disk);
-               module_put(owner);
-               return -ENODEV;
+               ret = -ENODEV;
+               goto fail;
         }
  
-       rcu_read_lock();
-       spin_lock_irq(disk->queue->queue_lock);
+       q = disk->queue;
  
-       if (blkcg_policy_enabled(disk->queue, pol))
-               blkg = blkg_lookup_create(blkcg, disk->queue);
-       else
-               blkg = ERR_PTR(-EOPNOTSUPP);
+       rcu_read_lock();
+       spin_lock_irq(q->queue_lock);
  
+       blkg = blkg_lookup_check(blkcg, pol, q);
         if (IS_ERR(blkg)) {
                 ret = PTR_ERR(blkg);
+               goto fail_unlock;
+       }
+
+       if (blkg)
+               goto success;
+
+       /*
+        * Create blkgs walking down from blkcg_root to @blkcg, so that all
+        * non-root blkgs have access to their parents.
+        */
+       while (true) {
+               struct blkcg *pos = blkcg;
+               struct blkcg *parent;
+               struct blkcg_gq *new_blkg;
+
+               parent = blkcg_parent(blkcg);
+               while (parent && !__blkg_lookup(parent, q, false)) {
+                       pos = parent;
+                       parent = blkcg_parent(parent);
+               }
+
+               /* Drop locks to do new blkg allocation with GFP_KERNEL. */
+               spin_unlock_irq(q->queue_lock);
                 rcu_read_unlock();
-               spin_unlock_irq(disk->queue->queue_lock);
-               owner = disk->fops->owner;
-               put_disk(disk);
-               module_put(owner);
-               /*
-                * If queue was bypassing, we should retry.  Do so after a
-                * short msleep().  It isn't strictly necessary but queue
-                * can be bypassing for some time and it's always nice to
-                * avoid busy looping.
-                */
-               if (ret == -EBUSY) {
-                       msleep(10);
-                       ret = restart_syscall();
+
+               new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
+               if (unlikely(!new_blkg)) {
+                       ret = -ENOMEM;
+                       goto fail;
                 }
-               return ret;
-       }
  
+               rcu_read_lock();
+               spin_lock_irq(q->queue_lock);
+
+               blkg = blkg_lookup_check(pos, pol, q);
+               if (IS_ERR(blkg)) {
+                       ret = PTR_ERR(blkg);
+                       goto fail_unlock;
+               }
+
+               if (blkg) {
+                       blkg_free(new_blkg);
+               } else {
+                       blkg = blkg_create(pos, q, new_blkg);
+                       if (unlikely(IS_ERR(blkg))) {
+                               ret = PTR_ERR(blkg);
+                               goto fail_unlock;
+                       }
+               }
+
+               if (pos == blkcg)
+                       goto success;
+       }
+success:
         ctx->disk = disk;
         ctx->blkg = blkg;
         ctx->body = body;
         return 0;
+
+fail_unlock:
+       spin_unlock_irq(q->queue_lock);
+       rcu_read_unlock();
+fail:
+       owner = disk->fops->owner;
+       put_disk(disk);
+       module_put(owner);
+       /*
+        * If queue was bypassing, we should retry.  Do so after a
+        * short msleep().  It isn't strictly necessary but queue
+        * can be bypassing for some time and it's always nice to
+        * avoid busy looping.
+        */
+       if (ret == -EBUSY) {
+               msleep(10);
+               ret = restart_syscall();
+       }
+       return ret;
  }
  EXPORT_SYMBOL_GPL(blkg_conf_prep);
  
diff --git a/block/blk-core.c b/block/blk-core.c

index 1086dac8724c995b85bf3c843a4aae080ab938a1..24886b69690f08ffd832a694c74accf139ac8906 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -268,10 +268,8 @@ void blk_sync_queue(struct request_queue *q)
                 struct blk_mq_hw_ctx *hctx;
                 int i;
  
-               queue_for_each_hw_ctx(q, hctx, i) {
-                       cancel_work_sync(&hctx->run_work);
-                       cancel_delayed_work_sync(&hctx->delay_work);
-               }
+               queue_for_each_hw_ctx(q, hctx, i)
+                       cancel_delayed_work_sync(&hctx->run_work);
         } else {
                 cancel_delayed_work_sync(&q->delay_work);
         }
@@ -500,6 +498,13 @@ void blk_set_queue_dying(struct request_queue *q)
         queue_flag_set(QUEUE_FLAG_DYING, q);
         spin_unlock_irq(q->queue_lock);
  
+       /*
+        * When queue DYING flag is set, we need to block new req
+        * entering queue, so we call blk_freeze_queue_start() to
+        * prevent I/O from crossing blk_queue_enter().
+        */
+       blk_freeze_queue_start(q);
+
         if (q->mq_ops)
                 blk_mq_wake_waiters(q);
         else {
@@ -556,9 +561,13 @@ void blk_cleanup_queue(struct request_queue *q)
          * prevent that q->request_fn() gets invoked after draining finished.
          */
         blk_freeze_queue(q);
-       spin_lock_irq(lock);
-       if (!q->mq_ops)
+       if (!q->mq_ops) {
+               spin_lock_irq(lock);
                 __blk_drain_queue(q, true);
+       } else {
+               blk_mq_debugfs_unregister_mq(q);
+               spin_lock_irq(lock);
+       }
         queue_flag_set(QUEUE_FLAG_DEAD, q);
         spin_unlock_irq(lock);
  
@@ -578,8 +587,6 @@ void blk_cleanup_queue(struct request_queue *q)
                 q->queue_lock = &q->__queue_lock;
         spin_unlock_irq(lock);
  
-       put_disk_devt(q->disk_devt);
-
         /* @q is and will stay empty, shutdown and put */
         blk_put_queue(q);
  }
@@ -671,6 +678,15 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
                 if (nowait)
                         return -EBUSY;
  
+               /*
+                * read pair of barrier in blk_freeze_queue_start(),
+                * we need to order reading __PERCPU_REF_DEAD flag of
+                * .q_usage_counter and reading .mq_freeze_depth or
+                * queue dying flag, otherwise the following wait may
+                * never return if the two reads are reordered.
+                */
+               smp_rmb();
+
                 ret = wait_event_interruptible(q->mq_freeze_wq,
                                 !atomic_read(&q->mq_freeze_depth) ||
                                 blk_queue_dying(q));
@@ -722,6 +738,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
         if (!q->backing_dev_info)
                 goto fail_split;
  
+       q->stats = blk_alloc_queue_stats();
+       if (!q->stats)
+               goto fail_stats;
+
         q->backing_dev_info->ra_pages =
                         (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
         q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
@@ -778,6 +798,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
  fail_ref:
         percpu_ref_exit(&q->q_usage_counter);
  fail_bdi:
+       blk_free_queue_stats(q->stats);
+fail_stats:
         bdi_put(q->backing_dev_info);
  fail_split:
         bioset_free(q->bio_split);
@@ -891,7 +913,6 @@ out_exit_flush_rq:
                 q->exit_rq_fn(q, q->fq->flush_rq);
  out_free_flush_queue:
         blk_free_flush_queue(q->fq);
-       wbt_exit(q);
         return -ENOMEM;
  }
  EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1130,7 +1151,6 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
  
         blk_rq_init(q, rq);
         blk_rq_set_rl(rq, rl);
-       blk_rq_set_prio(rq, ioc);
         rq->cmd_flags = op;
         rq->rq_flags = rq_flags;
  
@@ -1610,17 +1630,23 @@ out:
         return ret;
  }
  
-void init_request_from_bio(struct request *req, struct bio *bio)
+void blk_init_request_from_bio(struct request *req, struct bio *bio)
  {
+       struct io_context *ioc = rq_ioc(bio);
+
         if (bio->bi_opf & REQ_RAHEAD)
                 req->cmd_flags |= REQ_FAILFAST_MASK;
  
-       req->errors = 0;
         req->__sector = bio->bi_iter.bi_sector;
         if (ioprio_valid(bio_prio(bio)))
                 req->ioprio = bio_prio(bio);
+       else if (ioc)
+               req->ioprio = ioc->ioprio;
+       else
+               req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
         blk_rq_bio_prep(req->q, req, bio);
  }
+EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
  
  static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
  {
@@ -1711,7 +1737,7 @@ get_rq:
          * We don't worry about that case for efficiency. It won't happen
          * often, and the elevators are able to handle it.
          */
-       init_request_from_bio(req, bio);
+       blk_init_request_from_bio(req, bio);
  
         if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
                 req->cpu = raw_smp_processor_id();
@@ -1938,7 +1964,13 @@ generic_make_request_checks(struct bio *bio)
         if (!blkcg_bio_issue_check(q, bio))
                 return false;
  
-       trace_block_bio_queue(q, bio);
+       if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
+               trace_block_bio_queue(q, bio);
+               /* Now that enqueuing has been traced, we need to trace
+                * completion as well.
+                */
+               bio_set_flag(bio, BIO_TRACE_COMPLETION);
+       }
         return true;
  
  not_supported:
@@ -1975,7 +2007,14 @@ end_io:
   */
  blk_qc_t generic_make_request(struct bio *bio)
  {
-       struct bio_list bio_list_on_stack;
+       /*
+        * bio_list_on_stack[0] contains bios submitted by the current
+        * make_request_fn.
+        * bio_list_on_stack[1] contains bios that were submitted before
+        * the current make_request_fn, but that haven't been processed
+        * yet.
+        */
+       struct bio_list bio_list_on_stack[2];
         blk_qc_t ret = BLK_QC_T_NONE;
  
         if (!generic_make_request_checks(bio))
@@ -1992,7 +2031,7 @@ blk_qc_t generic_make_request(struct bio *bio)
          * should be added at the tail
          */
         if (current->bio_list) {
-               bio_list_add(current->bio_list, bio);
+               bio_list_add(&current->bio_list[0], bio);
                 goto out;
         }
  
@@ -2011,23 +2050,39 @@ blk_qc_t generic_make_request(struct bio *bio)
          * bio_list, and call into ->make_request() again.
          */
         BUG_ON(bio->bi_next);
-       bio_list_init(&bio_list_on_stack);
-       current->bio_list = &bio_list_on_stack;
+       bio_list_init(&bio_list_on_stack[0]);
+       current->bio_list = bio_list_on_stack;
         do {
                 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
  
                 if (likely(blk_queue_enter(q, false) == 0)) {
+                       struct bio_list lower, same;
+
+                       /* Create a fresh bio_list for all subordinate requests */
+                       bio_list_on_stack[1] = bio_list_on_stack[0];
+                       bio_list_init(&bio_list_on_stack[0]);
                         ret = q->make_request_fn(q, bio);
  
                         blk_queue_exit(q);
  
-                       bio = bio_list_pop(current->bio_list);
+                       /* sort new bios into those for a lower level
+                        * and those for the same level
+                        */
+                       bio_list_init(&lower);
+                       bio_list_init(&same);
+                       while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
+                               if (q == bdev_get_queue(bio->bi_bdev))
+                                       bio_list_add(&same, bio);
+                               else
+                                       bio_list_add(&lower, bio);
+                       /* now assemble so we handle the lowest level first */
+                       bio_list_merge(&bio_list_on_stack[0], &lower);
+                       bio_list_merge(&bio_list_on_stack[0], &same);
+                       bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
                 } else {
-                       struct bio *bio_next = bio_list_pop(current->bio_list);
-
                         bio_io_error(bio);
-                       bio = bio_next;
                 }
+               bio = bio_list_pop(&bio_list_on_stack[0]);
         } while (bio);
         current->bio_list = NULL; /* deactivate */
  
@@ -2457,7 +2512,7 @@ void blk_start_request(struct request *req)
         blk_dequeue_request(req);
  
         if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
-               blk_stat_set_issue_time(&req->issue_stat);
+               blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req));
                 req->rq_flags |= RQF_STATS;
                 wbt_issue(req->q->rq_wb, &req->issue_stat);
         }
@@ -2519,22 +2574,11 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
  {
         int total_bytes;
  
-       trace_block_rq_complete(req->q, req, nr_bytes);
+       trace_block_rq_complete(req, error, nr_bytes);
  
         if (!req->bio)
                 return false;
  
-       /*
-        * For fs requests, rq is just carrier of independent bio's
-        * and each partial completion should be handled separately.
-        * Reset per-request error on each partial completion.
-        *
-        * TODO: tj: This is too subtle.  It would be better to let
-        * low level drivers do what they see fit.
-        */
-       if (!blk_rq_is_passthrough(req))
-               req->errors = 0;
-
         if (error && !blk_rq_is_passthrough(req) &&
             !(req->rq_flags & RQF_QUIET)) {
                 char *error_type;
@@ -2580,6 +2624,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
                 if (bio_bytes == bio->bi_iter.bi_size)
                         req->bio = bio->bi_next;
  
+               /* Completion has already been traced */
+               bio_clear_flag(bio, BIO_TRACE_COMPLETION);
                 req_bio_endio(req, bio, bio_bytes, error);
  
                 total_bytes += bio_bytes;
@@ -2678,7 +2724,7 @@ void blk_finish_request(struct request *req, int error)
         struct request_queue *q = req->q;
  
         if (req->rq_flags & RQF_STATS)
-               blk_stat_add(&q->rq_stats[rq_data_dir(req)], req);
+               blk_stat_add(req);
  
         if (req->rq_flags & RQF_QUEUED)
                 blk_queue_end_tag(q, req);
@@ -2755,7 +2801,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
   *     %false - we are done with this request
   *     %true  - still buffers pending for this request
   **/
-bool __blk_end_bidi_request(struct request *rq, int error,
+static bool __blk_end_bidi_request(struct request *rq, int error,
                                    unsigned int nr_bytes, unsigned int bidi_bytes)
  {
         if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
@@ -2807,43 +2853,6 @@ void blk_end_request_all(struct request *rq, int error)
  }
  EXPORT_SYMBOL(blk_end_request_all);
  
-/**
- * blk_end_request_cur - Helper function to finish the current request chunk.
- * @rq: the request to finish the current chunk for
- * @error: %0 for success, < %0 for error
- *
- * Description:
- *     Complete the current consecutively mapped chunk from @rq.
- *
- * Return:
- *     %false - we are done with this request
- *     %true  - still buffers pending for this request
- */
-bool blk_end_request_cur(struct request *rq, int error)
-{
-       return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
-}
-EXPORT_SYMBOL(blk_end_request_cur);
-
-/**
- * blk_end_request_err - Finish a request till the next failure boundary.
- * @rq: the request to finish till the next failure boundary for
- * @error: must be negative errno
- *
- * Description:
- *     Complete @rq till the next failure boundary.
- *
- * Return:
- *     %false - we are done with this request
- *     %true  - still buffers pending for this request
- */
-bool blk_end_request_err(struct request *rq, int error)
-{
-       WARN_ON(error >= 0);
-       return blk_end_request(rq, error, blk_rq_err_bytes(rq));
-}
-EXPORT_SYMBOL_GPL(blk_end_request_err);
-
  /**
   * __blk_end_request - Helper function for drivers to complete the request.
   * @rq:       the request being processed
@@ -2903,26 +2912,6 @@ bool __blk_end_request_cur(struct request *rq, int error)
  }
  EXPORT_SYMBOL(__blk_end_request_cur);
  
-/**
- * __blk_end_request_err - Finish a request till the next failure boundary.
- * @rq: the request to finish till the next failure boundary for
- * @error: must be negative errno
- *
- * Description:
- *     Complete @rq till the next failure boundary.  Must be called
- *     with queue lock held.
- *
- * Return:
- *     %false - we are done with this request
- *     %true  - still buffers pending for this request
- */
-bool __blk_end_request_err(struct request *rq, int error)
-{
-       WARN_ON(error >= 0);
-       return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
-}
-EXPORT_SYMBOL_GPL(__blk_end_request_err);
-
  void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
                      struct bio *bio)
  {
@@ -3085,6 +3074,13 @@ int kblockd_schedule_work_on(int cpu, struct work_struct *work)
  }
  EXPORT_SYMBOL(kblockd_schedule_work_on);
  
+int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
+                               unsigned long delay)
+{
+       return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
+}
+EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
+
  int kblockd_schedule_delayed_work(struct delayed_work *dwork,
                                   unsigned long delay)
  {
diff --git a/block/blk-exec.c b/block/blk-exec.c

index 8cd0e9bc8dc89b7bde4e4ab56aa7752677618f72..a9451e3b858715cb209a0b4e949ec591ca3d2d88 100644 (file)
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -69,8 +69,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
  
         if (unlikely(blk_queue_dying(q))) {
                 rq->rq_flags |= RQF_QUIET;
-               rq->errors = -ENXIO;
-               __blk_end_request_all(rq, rq->errors);
+               __blk_end_request_all(rq, -ENXIO);
                 spin_unlock_irq(q->queue_lock);
                 return;
         }
@@ -92,11 +91,10 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
   *    Insert a fully prepared request at the back of the I/O scheduler queue
   *    for execution and wait for completion.
   */
-int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
+void blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
                    struct request *rq, int at_head)
  {
         DECLARE_COMPLETION_ONSTACK(wait);
-       int err = 0;
         unsigned long hang_check;
  
         rq->end_io_data = &wait;
@@ -108,10 +106,5 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
                 while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
         else
                 wait_for_completion_io(&wait);
-
-       if (rq->errors)
-               err = -EIO;
-
-       return err;
  }
  EXPORT_SYMBOL(blk_execute_rq);
diff --git a/block/blk-flush.c b/block/blk-flush.c

index 0d5a9c1da1fc71db5706926e69e2a22b7491eea9..c4e0880b54bbf4fee779c8e724c1175221ef1830 100644 (file)
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -447,7 +447,7 @@ void blk_insert_flush(struct request *rq)
                 if (q->mq_ops)
                         blk_mq_end_request(rq, 0);
                 else
-                       __blk_end_bidi_request(rq, 0, 0, 0);
+                       __blk_end_request(rq, 0, 0);
                 return;
         }
  
@@ -497,8 +497,7 @@ void blk_insert_flush(struct request *rq)
   * Description:
   *    Issue a flush for the block device in question. Caller can supply
   *    room for storing the error offset in case of a flush error, if they
- *    wish to. If WAIT flag is not passed then caller may check only what
- *    request was pushed in some internal queue for later handling.
+ *    wish to.
   */
  int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
                 sector_t *error_sector)
diff --git a/block/blk-integrity.c b/block/blk-integrity.c

index 9f0ff5ba4f84d606ff49e96711d4cc78c3922c0d..0f891a9aff4d67b67f005af2b43d9860891f206a 100644 (file)
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -389,7 +389,7 @@ static int blk_integrity_nop_fn(struct blk_integrity_iter *iter)
         return 0;
  }
  
-static struct blk_integrity_profile nop_profile = {
+static const struct blk_integrity_profile nop_profile = {
         .name = "nop",
         .generate_fn = blk_integrity_nop_fn,
         .verify_fn = blk_integrity_nop_fn,
@@ -412,12 +412,13 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template
  
         bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE |
                 template->flags;
-       bi->interval_exp = ilog2(queue_logical_block_size(disk->queue));
+       bi->interval_exp = template->interval_exp ? :
+               ilog2(queue_logical_block_size(disk->queue));
         bi->profile = template->profile ? template->profile : &nop_profile;
         bi->tuple_size = template->tuple_size;
         bi->tag_size = template->tag_size;
  
-       blk_integrity_revalidate(disk);
+       disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
  }
  EXPORT_SYMBOL(blk_integrity_register);
  
@@ -430,26 +431,11 @@ EXPORT_SYMBOL(blk_integrity_register);
   */
  void blk_integrity_unregister(struct gendisk *disk)
  {
-       blk_integrity_revalidate(disk);
+       disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES;
         memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity));
  }
  EXPORT_SYMBOL(blk_integrity_unregister);
  
-void blk_integrity_revalidate(struct gendisk *disk)
-{
-       struct blk_integrity *bi = &disk->queue->integrity;
-
-       if (!(disk->flags & GENHD_FL_UP))
-               return;
-
-       if (bi->profile)
-               disk->queue->backing_dev_info->capabilities |=
-                       BDI_CAP_STABLE_WRITES;
-       else
-               disk->queue->backing_dev_info->capabilities &=
-                       ~BDI_CAP_STABLE_WRITES;
-}
-
  void blk_integrity_add(struct gendisk *disk)
  {
         if (kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
diff --git a/block/blk-lib.c b/block/blk-lib.c

index ed1e78e24db0053e993555e14bf730c1cfb868c2..e8caecd71688e00a4974b5d45b95bc53e4eaeac5 100644 (file)
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -37,17 +37,12 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                 return -ENXIO;
  
         if (flags & BLKDEV_DISCARD_SECURE) {
-               if (flags & BLKDEV_DISCARD_ZERO)
-                       return -EOPNOTSUPP;
                 if (!blk_queue_secure_erase(q))
                         return -EOPNOTSUPP;
                 op = REQ_OP_SECURE_ERASE;
         } else {
                 if (!blk_queue_discard(q))
                         return -EOPNOTSUPP;
-               if ((flags & BLKDEV_DISCARD_ZERO) &&
-                   !q->limits.discard_zeroes_data)
-                       return -EOPNOTSUPP;
                 op = REQ_OP_DISCARD;
         }
  
@@ -109,7 +104,7 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
   * @sector:    start sector
   * @nr_sects:  number of sectors to discard
   * @gfp_mask:  memory allocation flags (for bio_alloc)
- * @flags:     BLKDEV_IFL_* flags to control behaviour
+ * @flags:     BLKDEV_DISCARD_* flags to control behaviour
   *
   * Description:
   *    Issue a discard request for the sectors in question.
@@ -126,7 +121,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                         &bio);
         if (!ret && bio) {
                 ret = submit_bio_wait(bio);
-               if (ret == -EOPNOTSUPP && !(flags & BLKDEV_DISCARD_ZERO))
+               if (ret == -EOPNOTSUPP)
                         ret = 0;
                 bio_put(bio);
         }
@@ -226,20 +221,9 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
  }
  EXPORT_SYMBOL(blkdev_issue_write_same);
  
-/**
- * __blkdev_issue_write_zeroes - generate number of bios with WRITE ZEROES
- * @bdev:      blockdev to issue
- * @sector:    start sector
- * @nr_sects:  number of sectors to write
- * @gfp_mask:  memory allocation flags (for bio_alloc)
- * @biop:      pointer to anchor bio
- *
- * Description:
- *  Generate and issue number of bios(REQ_OP_WRITE_ZEROES) with zerofiled pages.
- */
  static int __blkdev_issue_write_zeroes(struct block_device *bdev,
                 sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
-               struct bio **biop)
+               struct bio **biop, unsigned flags)
  {
         struct bio *bio = *biop;
         unsigned int max_write_zeroes_sectors;
@@ -258,7 +242,9 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
                 bio = next_bio(bio, 0, gfp_mask);
                 bio->bi_iter.bi_sector = sector;
                 bio->bi_bdev = bdev;
-               bio_set_op_attrs(bio, REQ_OP_WRITE_ZEROES, 0);
+               bio->bi_opf = REQ_OP_WRITE_ZEROES;
+               if (flags & BLKDEV_ZERO_NOUNMAP)
+                       bio->bi_opf |= REQ_NOUNMAP;
  
                 if (nr_sects > max_write_zeroes_sectors) {
                         bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
@@ -282,14 +268,27 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
   * @nr_sects:  number of sectors to write
   * @gfp_mask:  memory allocation flags (for bio_alloc)
   * @biop:      pointer to anchor bio
- * @discard:   discard flag
+ * @flags:     controls detailed behavior
   *
   * Description:
- *  Generate and issue number of bios with zerofiled pages.
+ *  Zero-fill a block range, either using hardware offload or by explicitly
+ *  writing zeroes to the device.
+ *
+ *  Note that this function may fail with -EOPNOTSUPP if the driver signals
+ *  zeroing offload support, but the device fails to process the command (for
+ *  some devices there is no non-destructive way to verify whether this
+ *  operation is actually supported).  In this case the caller should call
+ *  retry the call to blkdev_issue_zeroout() and the fallback path will be used.
+ *
+ *  If a device is using logical block provisioning, the underlying space will
+ *  not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
+ *
+ *  If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
+ *  -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
   */
  int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
                 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
-               bool discard)
+               unsigned flags)
  {
         int ret;
         int bi_size = 0;
@@ -302,8 +301,8 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
                 return -EINVAL;
  
         ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
-                       biop);
-       if (ret == 0 || (ret && ret != -EOPNOTSUPP))
+                       biop, flags);
+       if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
                 goto out;
  
         ret = 0;
@@ -337,40 +336,23 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
   * @sector:    start sector
   * @nr_sects:  number of sectors to write
   * @gfp_mask:  memory allocation flags (for bio_alloc)
- * @discard:   whether to discard the block range
+ * @flags:     controls detailed behavior
   *
   * Description:
- *  Zero-fill a block range.  If the discard flag is set and the block
- *  device guarantees that subsequent READ operations to the block range
- *  in question will return zeroes, the blocks will be discarded. Should
- *  the discard request fail, if the discard flag is not set, or if
- *  discard_zeroes_data is not supported, this function will resort to
- *  zeroing the blocks manually, thus provisioning (allocating,
- *  anchoring) them. If the block device supports WRITE ZEROES or WRITE SAME
- *  command(s), blkdev_issue_zeroout() will use it to optimize the process of
- *  clearing the block range. Otherwise the zeroing will be performed
- *  using regular WRITE calls.
+ *  Zero-fill a block range, either using hardware offload or by explicitly
+ *  writing zeroes to the device.  See __blkdev_issue_zeroout() for the
+ *  valid values for %flags.
   */
  int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
-                        sector_t nr_sects, gfp_t gfp_mask, bool discard)
+               sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
  {
         int ret;
         struct bio *bio = NULL;
         struct blk_plug plug;
  
-       if (discard) {
-               if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
-                               BLKDEV_DISCARD_ZERO))
-                       return 0;
-       }
-
-       if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
-                       ZERO_PAGE(0)))
-               return 0;
-
         blk_start_plug(&plug);
         ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
-                       &bio, discard);
+                       &bio, flags);
         if (ret == 0 && bio) {
                 ret = submit_bio_wait(bio);
                 bio_put(bio);
diff --git a/block/blk-merge.c b/block/blk-merge.c

index 2afa262425d102e5c1500912f3b6d77525754566..3990ae40634123b4e16b0398ae36d1f1b74dc205 100644 (file)
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -54,6 +54,20 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
         return bio_split(bio, split_sectors, GFP_NOIO, bs);
  }
  
+static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
+               struct bio *bio, struct bio_set *bs, unsigned *nsegs)
+{
+       *nsegs = 1;
+
+       if (!q->limits.max_write_zeroes_sectors)
+               return NULL;
+
+       if (bio_sectors(bio) <= q->limits.max_write_zeroes_sectors)
+               return NULL;
+
+       return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs);
+}
+
  static struct bio *blk_bio_write_same_split(struct request_queue *q,
                                             struct bio *bio,
                                             struct bio_set *bs,
@@ -200,8 +214,7 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
                 split = blk_bio_discard_split(q, *bio, bs, &nsegs);
                 break;
         case REQ_OP_WRITE_ZEROES:
-               split = NULL;
-               nsegs = (*bio)->bi_phys_segments;
+               split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs);
                 break;
         case REQ_OP_WRITE_SAME:
                 split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c

index f6d917977b3318689f1a1ef387bae47abb03ab0b..bcd2a7d4a3a52fc23f26ee2e119a110579dadde6 100644 (file)
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -43,11 +43,157 @@ static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file,
         return ret;
  }
  
+static int blk_flags_show(struct seq_file *m, const unsigned long flags,
+                         const char *const *flag_name, int flag_name_count)
+{
+       bool sep = false;
+       int i;
+
+       for (i = 0; i < sizeof(flags) * BITS_PER_BYTE; i++) {
+               if (!(flags & BIT(i)))
+                       continue;
+               if (sep)
+                       seq_puts(m, " ");
+               sep = true;
+               if (i < flag_name_count && flag_name[i])
+                       seq_puts(m, flag_name[i]);
+               else
+                       seq_printf(m, "%d", i);
+       }
+       return 0;
+}
+
+static const char *const blk_queue_flag_name[] = {
+       [QUEUE_FLAG_QUEUED]      = "QUEUED",
+       [QUEUE_FLAG_STOPPED]     = "STOPPED",
+       [QUEUE_FLAG_SYNCFULL]    = "SYNCFULL",
+       [QUEUE_FLAG_ASYNCFULL]   = "ASYNCFULL",
+       [QUEUE_FLAG_DYING]       = "DYING",
+       [QUEUE_FLAG_BYPASS]      = "BYPASS",
+       [QUEUE_FLAG_BIDI]        = "BIDI",
+       [QUEUE_FLAG_NOMERGES]    = "NOMERGES",
+       [QUEUE_FLAG_SAME_COMP]   = "SAME_COMP",
+       [QUEUE_FLAG_FAIL_IO]     = "FAIL_IO",
+       [QUEUE_FLAG_STACKABLE]   = "STACKABLE",
+       [QUEUE_FLAG_NONROT]      = "NONROT",
+       [QUEUE_FLAG_IO_STAT]     = "IO_STAT",
+       [QUEUE_FLAG_DISCARD]     = "DISCARD",
+       [QUEUE_FLAG_NOXMERGES]   = "NOXMERGES",
+       [QUEUE_FLAG_ADD_RANDOM]  = "ADD_RANDOM",
+       [QUEUE_FLAG_SECERASE]    = "SECERASE",
+       [QUEUE_FLAG_SAME_FORCE]  = "SAME_FORCE",
+       [QUEUE_FLAG_DEAD]        = "DEAD",
+       [QUEUE_FLAG_INIT_DONE]   = "INIT_DONE",
+       [QUEUE_FLAG_NO_SG_MERGE] = "NO_SG_MERGE",
+       [QUEUE_FLAG_POLL]        = "POLL",
+       [QUEUE_FLAG_WC]          = "WC",
+       [QUEUE_FLAG_FUA]         = "FUA",
+       [QUEUE_FLAG_FLUSH_NQ]    = "FLUSH_NQ",
+       [QUEUE_FLAG_DAX]         = "DAX",
+       [QUEUE_FLAG_STATS]       = "STATS",
+       [QUEUE_FLAG_POLL_STATS]  = "POLL_STATS",
+       [QUEUE_FLAG_REGISTERED]  = "REGISTERED",
+};
+
+static int blk_queue_flags_show(struct seq_file *m, void *v)
+{
+       struct request_queue *q = m->private;
+
+       blk_flags_show(m, q->queue_flags, blk_queue_flag_name,
+                      ARRAY_SIZE(blk_queue_flag_name));
+       seq_puts(m, "\n");
+       return 0;
+}
+
+static ssize_t blk_queue_flags_store(struct file *file, const char __user *ubuf,
+                                    size_t len, loff_t *offp)
+{
+       struct request_queue *q = file_inode(file)->i_private;
+       char op[16] = { }, *s;
+
+       len = min(len, sizeof(op) - 1);
+       if (copy_from_user(op, ubuf, len))
+               return -EFAULT;
+       s = op;
+       strsep(&s, " \t\n"); /* strip trailing whitespace */
+       if (strcmp(op, "run") == 0) {
+               blk_mq_run_hw_queues(q, true);
+       } else if (strcmp(op, "start") == 0) {
+               blk_mq_start_stopped_hw_queues(q, true);
+       } else {
+               pr_err("%s: unsupported operation %s. Use either 'run' or 'start'\n",
+                      __func__, op);
+               return -EINVAL;
+       }
+       return len;
+}
+
+static int blk_queue_flags_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, blk_queue_flags_show, inode->i_private);
+}
+
+static const struct file_operations blk_queue_flags_fops = {
+       .open           = blk_queue_flags_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+       .write          = blk_queue_flags_store,
+};
+
+static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
+{
+       if (stat->nr_samples) {
+               seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
+                          stat->nr_samples, stat->mean, stat->min, stat->max);
+       } else {
+               seq_puts(m, "samples=0");
+       }
+}
+
+static int queue_poll_stat_show(struct seq_file *m, void *v)
+{
+       struct request_queue *q = m->private;
+       int bucket;
+
+       for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
+               seq_printf(m, "read  (%d Bytes): ", 1 << (9+bucket));
+               print_stat(m, &q->poll_stat[2*bucket]);
+               seq_puts(m, "\n");
+
+               seq_printf(m, "write (%d Bytes): ",  1 << (9+bucket));
+               print_stat(m, &q->poll_stat[2*bucket+1]);
+               seq_puts(m, "\n");
+       }
+       return 0;
+}
+
+static int queue_poll_stat_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, queue_poll_stat_show, inode->i_private);
+}
+
+static const struct file_operations queue_poll_stat_fops = {
+       .open           = queue_poll_stat_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static const char *const hctx_state_name[] = {
+       [BLK_MQ_S_STOPPED]       = "STOPPED",
+       [BLK_MQ_S_TAG_ACTIVE]    = "TAG_ACTIVE",
+       [BLK_MQ_S_SCHED_RESTART] = "SCHED_RESTART",
+       [BLK_MQ_S_TAG_WAITING]   = "TAG_WAITING",
+
+};
  static int hctx_state_show(struct seq_file *m, void *v)
  {
         struct blk_mq_hw_ctx *hctx = m->private;
  
-       seq_printf(m, "0x%lx\n", hctx->state);
+       blk_flags_show(m, hctx->state, hctx_state_name,
+                      ARRAY_SIZE(hctx_state_name));
+       seq_puts(m, "\n");
         return 0;
  }
  
@@ -63,11 +209,35 @@ static const struct file_operations hctx_state_fops = {
         .release        = single_release,
  };
  
+static const char *const alloc_policy_name[] = {
+       [BLK_TAG_ALLOC_FIFO]    = "fifo",
+       [BLK_TAG_ALLOC_RR]      = "rr",
+};
+
+static const char *const hctx_flag_name[] = {
+       [ilog2(BLK_MQ_F_SHOULD_MERGE)]  = "SHOULD_MERGE",
+       [ilog2(BLK_MQ_F_TAG_SHARED)]    = "TAG_SHARED",
+       [ilog2(BLK_MQ_F_SG_MERGE)]      = "SG_MERGE",
+       [ilog2(BLK_MQ_F_BLOCKING)]      = "BLOCKING",
+       [ilog2(BLK_MQ_F_NO_SCHED)]      = "NO_SCHED",
+};
+
  static int hctx_flags_show(struct seq_file *m, void *v)
  {
         struct blk_mq_hw_ctx *hctx = m->private;
-
-       seq_printf(m, "0x%lx\n", hctx->flags);
+       const int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(hctx->flags);
+
+       seq_puts(m, "alloc_policy=");
+       if (alloc_policy < ARRAY_SIZE(alloc_policy_name) &&
+           alloc_policy_name[alloc_policy])
+               seq_puts(m, alloc_policy_name[alloc_policy]);
+       else
+               seq_printf(m, "%d", alloc_policy);
+       seq_puts(m, " ");
+       blk_flags_show(m,
+                      hctx->flags ^ BLK_ALLOC_POLICY_TO_MQ_FLAG(alloc_policy),
+                      hctx_flag_name, ARRAY_SIZE(hctx_flag_name));
+       seq_puts(m, "\n");
         return 0;
  }
  
@@ -83,13 +253,83 @@ static const struct file_operations hctx_flags_fops = {
         .release        = single_release,
  };
  
+static const char *const op_name[] = {
+       [REQ_OP_READ]           = "READ",
+       [REQ_OP_WRITE]          = "WRITE",
+       [REQ_OP_FLUSH]          = "FLUSH",
+       [REQ_OP_DISCARD]        = "DISCARD",
+       [REQ_OP_ZONE_REPORT]    = "ZONE_REPORT",
+       [REQ_OP_SECURE_ERASE]   = "SECURE_ERASE",
+       [REQ_OP_ZONE_RESET]     = "ZONE_RESET",
+       [REQ_OP_WRITE_SAME]     = "WRITE_SAME",
+       [REQ_OP_WRITE_ZEROES]   = "WRITE_ZEROES",
+       [REQ_OP_SCSI_IN]        = "SCSI_IN",
+       [REQ_OP_SCSI_OUT]       = "SCSI_OUT",
+       [REQ_OP_DRV_IN]         = "DRV_IN",
+       [REQ_OP_DRV_OUT]        = "DRV_OUT",
+};
+
+static const char *const cmd_flag_name[] = {
+       [__REQ_FAILFAST_DEV]            = "FAILFAST_DEV",
+       [__REQ_FAILFAST_TRANSPORT]      = "FAILFAST_TRANSPORT",
+       [__REQ_FAILFAST_DRIVER]         = "FAILFAST_DRIVER",
+       [__REQ_SYNC]                    = "SYNC",
+       [__REQ_META]                    = "META",
+       [__REQ_PRIO]                    = "PRIO",
+       [__REQ_NOMERGE]                 = "NOMERGE",
+       [__REQ_IDLE]                    = "IDLE",
+       [__REQ_INTEGRITY]               = "INTEGRITY",
+       [__REQ_FUA]                     = "FUA",
+       [__REQ_PREFLUSH]                = "PREFLUSH",
+       [__REQ_RAHEAD]                  = "RAHEAD",
+       [__REQ_BACKGROUND]              = "BACKGROUND",
+       [__REQ_NR_BITS]                 = "NR_BITS",
+};
+
+static const char *const rqf_name[] = {
+       [ilog2((__force u32)RQF_SORTED)]                = "SORTED",
+       [ilog2((__force u32)RQF_STARTED)]               = "STARTED",
+       [ilog2((__force u32)RQF_QUEUED)]                = "QUEUED",
+       [ilog2((__force u32)RQF_SOFTBARRIER)]           = "SOFTBARRIER",
+       [ilog2((__force u32)RQF_FLUSH_SEQ)]             = "FLUSH_SEQ",
+       [ilog2((__force u32)RQF_MIXED_MERGE)]           = "MIXED_MERGE",
+       [ilog2((__force u32)RQF_MQ_INFLIGHT)]           = "MQ_INFLIGHT",
+       [ilog2((__force u32)RQF_DONTPREP)]              = "DONTPREP",
+       [ilog2((__force u32)RQF_PREEMPT)]               = "PREEMPT",
+       [ilog2((__force u32)RQF_COPY_USER)]             = "COPY_USER",
+       [ilog2((__force u32)RQF_FAILED)]                = "FAILED",
+       [ilog2((__force u32)RQF_QUIET)]                 = "QUIET",
+       [ilog2((__force u32)RQF_ELVPRIV)]               = "ELVPRIV",
+       [ilog2((__force u32)RQF_IO_STAT)]               = "IO_STAT",
+       [ilog2((__force u32)RQF_ALLOCED)]               = "ALLOCED",
+       [ilog2((__force u32)RQF_PM)]                    = "PM",
+       [ilog2((__force u32)RQF_HASHED)]                = "HASHED",
+       [ilog2((__force u32)RQF_STATS)]                 = "STATS",
+       [ilog2((__force u32)RQF_SPECIAL_PAYLOAD)]       = "SPECIAL_PAYLOAD",
+};
+
  static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
  {
         struct request *rq = list_entry_rq(v);
-
-       seq_printf(m, "%p {.cmd_flags=0x%x, .rq_flags=0x%x, .tag=%d, .internal_tag=%d}\n",
-                  rq, rq->cmd_flags, (__force unsigned int)rq->rq_flags,
-                  rq->tag, rq->internal_tag);
+       const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
+       const unsigned int op = rq->cmd_flags & REQ_OP_MASK;
+
+       seq_printf(m, "%p {.op=", rq);
+       if (op < ARRAY_SIZE(op_name) && op_name[op])
+               seq_printf(m, "%s", op_name[op]);
+       else
+               seq_printf(m, "%d", op);
+       seq_puts(m, ", .cmd_flags=");
+       blk_flags_show(m, rq->cmd_flags & ~REQ_OP_MASK, cmd_flag_name,
+                      ARRAY_SIZE(cmd_flag_name));
+       seq_puts(m, ", .rq_flags=");
+       blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
+                      ARRAY_SIZE(rqf_name));
+       seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
+                  rq->internal_tag);
+       if (mq_ops->show_rq)
+               mq_ops->show_rq(m, rq);
+       seq_puts(m, "}\n");
         return 0;
  }
  
@@ -322,60 +562,6 @@ static const struct file_operations hctx_io_poll_fops = {
         .release        = single_release,
  };
  
-static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
-{
-       seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
-                  stat->nr_samples, stat->mean, stat->min, stat->max);
-}
-
-static int hctx_stats_show(struct seq_file *m, void *v)
-{
-       struct blk_mq_hw_ctx *hctx = m->private;
-       struct blk_rq_stat stat[2];
-
-       blk_stat_init(&stat[BLK_STAT_READ]);
-       blk_stat_init(&stat[BLK_STAT_WRITE]);
-
-       blk_hctx_stat_get(hctx, stat);
-
-       seq_puts(m, "read: ");
-       print_stat(m, &stat[BLK_STAT_READ]);
-       seq_puts(m, "\n");
-
-       seq_puts(m, "write: ");
-       print_stat(m, &stat[BLK_STAT_WRITE]);
-       seq_puts(m, "\n");
-       return 0;
-}
-
-static int hctx_stats_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, hctx_stats_show, inode->i_private);
-}
-
-static ssize_t hctx_stats_write(struct file *file, const char __user *buf,
-                               size_t count, loff_t *ppos)
-{
-       struct seq_file *m = file->private_data;
-       struct blk_mq_hw_ctx *hctx = m->private;
-       struct blk_mq_ctx *ctx;
-       int i;
-
-       hctx_for_each_ctx(hctx, ctx, i) {
-               blk_stat_init(&ctx->stat[BLK_STAT_READ]);
-               blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
-       }
-       return count;
-}
-
-static const struct file_operations hctx_stats_fops = {
-       .open           = hctx_stats_open,
-       .read           = seq_read,
-       .write          = hctx_stats_write,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
  static int hctx_dispatched_show(struct seq_file *m, void *v)
  {
         struct blk_mq_hw_ctx *hctx = m->private;
@@ -636,6 +822,12 @@ static const struct file_operations ctx_completed_fops = {
         .release        = single_release,
  };
  
+static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
+       {"poll_stat", 0400, &queue_poll_stat_fops},
+       {"state", 0600, &blk_queue_flags_fops},
+       {},
+};
+
  static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
         {"state", 0400, &hctx_state_fops},
         {"flags", 0400, &hctx_flags_fops},
@@ -646,7 +838,6 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
         {"sched_tags", 0400, &hctx_sched_tags_fops},
         {"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops},
         {"io_poll", 0600, &hctx_io_poll_fops},
-       {"stats", 0600, &hctx_stats_fops},
         {"dispatched", 0600, &hctx_dispatched_fops},
         {"queued", 0600, &hctx_queued_fops},
         {"run", 0600, &hctx_run_fops},
@@ -662,16 +853,17 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
         {},
  };
  
-int blk_mq_debugfs_register(struct request_queue *q, const char *name)
+int blk_mq_debugfs_register(struct request_queue *q)
  {
         if (!blk_debugfs_root)
                 return -ENOENT;
  
-       q->debugfs_dir = debugfs_create_dir(name, blk_debugfs_root);
+       q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
+                                           blk_debugfs_root);
         if (!q->debugfs_dir)
                 goto err;
  
-       if (blk_mq_debugfs_register_hctxs(q))
+       if (blk_mq_debugfs_register_mq(q))
                 goto err;
  
         return 0;
@@ -741,7 +933,7 @@ static int blk_mq_debugfs_register_hctx(struct request_queue *q,
         return 0;
  }
  
-int blk_mq_debugfs_register_hctxs(struct request_queue *q)
+int blk_mq_debugfs_register_mq(struct request_queue *q)
  {
         struct blk_mq_hw_ctx *hctx;
         int i;
@@ -753,6 +945,9 @@ int blk_mq_debugfs_register_hctxs(struct request_queue *q)
         if (!q->mq_debugfs_dir)
                 goto err;
  
+       if (!debugfs_create_files(q->mq_debugfs_dir, q, blk_mq_debugfs_queue_attrs))
+               goto err;
+
         queue_for_each_hw_ctx(q, hctx, i) {
                 if (blk_mq_debugfs_register_hctx(q, hctx))
                         goto err;
@@ -761,11 +956,11 @@ int blk_mq_debugfs_register_hctxs(struct request_queue *q)
         return 0;
  
  err:
-       blk_mq_debugfs_unregister_hctxs(q);
+       blk_mq_debugfs_unregister_mq(q);
         return -ENOMEM;
  }
  
-void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
+void blk_mq_debugfs_unregister_mq(struct request_queue *q)
  {
         debugfs_remove_recursive(q->mq_debugfs_dir);
         q->mq_debugfs_dir = NULL;
diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c

index 966c2169762eb4d951011531e101fb1b3809d7e8..0c3354cf3552877d7542cdb32f0ce4102a79fcce 100644 (file)
--- a/block/blk-mq-pci.c
+++ b/block/blk-mq-pci.c
@@ -23,7 +23,7 @@
   * @pdev:      PCI device associated with @set.
   *
   * This function assumes the PCI device @pdev has at least as many available
- * interrupt vetors as @set has queues.  It will then queuery the vector
+ * interrupt vectors as @set has queues.  It will then query the vector
   * corresponding to each queue for it's affinity mask and built queue mapping
   * that maps a queue to the CPUs that have irq affinity for the corresponding
   * vector.
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c

index 09af8ff18719a42c662cbb9267812975b71666b0..8b361e192e8a910f2cbc8c374c8b960628b0006a 100644 (file)
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -30,43 +30,6 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
  }
  EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
  
-int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
-                               int (*init)(struct blk_mq_hw_ctx *),
-                               void (*exit)(struct blk_mq_hw_ctx *))
-{
-       struct blk_mq_hw_ctx *hctx;
-       int ret;
-       int i;
-
-       queue_for_each_hw_ctx(q, hctx, i) {
-               hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
-               if (!hctx->sched_data) {
-                       ret = -ENOMEM;
-                       goto error;
-               }
-
-               if (init) {
-                       ret = init(hctx);
-                       if (ret) {
-                               /*
-                                * We don't want to give exit() a partially
-                                * initialized sched_data. init() must clean up
-                                * if it fails.
-                                */
-                               kfree(hctx->sched_data);
-                               hctx->sched_data = NULL;
-                               goto error;
-                       }
-               }
-       }
-
-       return 0;
-error:
-       blk_mq_sched_free_hctx_data(q, exit);
-       return ret;
-}
-EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
-
  static void __blk_mq_sched_assign_ioc(struct request_queue *q,
                                       struct request *rq,
                                       struct bio *bio,
@@ -119,7 +82,11 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
         if (likely(!data->hctx))
                 data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
  
-       if (e) {
+       /*
+        * For a reserved tag, allocate a normal request since we might
+        * have driver dependencies on the value of the internal tag.
+        */
+       if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) {
                 data->flags |= BLK_MQ_REQ_INTERNAL;
  
                 /*
@@ -171,7 +138,8 @@ void blk_mq_sched_put_request(struct request *rq)
  
  void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
  {
-       struct elevator_queue *e = hctx->queue->elevator;
+       struct request_queue *q = hctx->queue;
+       struct elevator_queue *e = q->elevator;
         const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
         bool did_work = false;
         LIST_HEAD(rq_list);
@@ -203,10 +171,10 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
          */
         if (!list_empty(&rq_list)) {
                 blk_mq_sched_mark_restart_hctx(hctx);
-               did_work = blk_mq_dispatch_rq_list(hctx, &rq_list);
+               did_work = blk_mq_dispatch_rq_list(q, &rq_list);
         } else if (!has_sched_dispatch) {
                 blk_mq_flush_busy_ctxs(hctx, &rq_list);
-               blk_mq_dispatch_rq_list(hctx, &rq_list);
+               blk_mq_dispatch_rq_list(q, &rq_list);
         }
  
         /*
@@ -222,26 +190,10 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
                         if (!rq)
                                 break;
                         list_add(&rq->queuelist, &rq_list);
-               } while (blk_mq_dispatch_rq_list(hctx, &rq_list));
+               } while (blk_mq_dispatch_rq_list(q, &rq_list));
         }
  }
  
-void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
-                                  struct list_head *rq_list,
-                                  struct request *(*get_rq)(struct blk_mq_hw_ctx *))
-{
-       do {
-               struct request *rq;
-
-               rq = get_rq(hctx);
-               if (!rq)
-                       break;
-
-               list_add_tail(&rq->queuelist, rq_list);
-       } while (1);
-}
-EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
-
  bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
                             struct request **merged_request)
  {
@@ -317,25 +269,68 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
         return true;
  }
  
-static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
+static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
  {
         if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
                 clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-               if (blk_mq_hctx_has_pending(hctx))
+               if (blk_mq_hctx_has_pending(hctx)) {
                         blk_mq_run_hw_queue(hctx, true);
+                       return true;
+               }
         }
+       return false;
  }
  
-void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
-{
-       struct request_queue *q = hctx->queue;
-       unsigned int i;
+/**
+ * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
+ * @pos:    loop cursor.
+ * @skip:   the list element that will not be examined. Iteration starts at
+ *          @skip->next.
+ * @head:   head of the list to examine. This list must have at least one
+ *          element, namely @skip.
+ * @member: name of the list_head structure within typeof(*pos).
+ */
+#define list_for_each_entry_rcu_rr(pos, skip, head, member)            \
+       for ((pos) = (skip);                                            \
+            (pos = (pos)->member.next != (head) ? list_entry_rcu(      \
+                       (pos)->member.next, typeof(*pos), member) :     \
+             list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
+            (pos) != (skip); )
  
-       if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
-               if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
-                       queue_for_each_hw_ctx(q, hctx, i)
-                               blk_mq_sched_restart_hctx(hctx);
+/*
+ * Called after a driver tag has been freed to check whether a hctx needs to
+ * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
+ * queues in a round-robin fashion if the tag set of @hctx is shared with other
+ * hardware queues.
+ */
+void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
+{
+       struct blk_mq_tags *const tags = hctx->tags;
+       struct blk_mq_tag_set *const set = hctx->queue->tag_set;
+       struct request_queue *const queue = hctx->queue, *q;
+       struct blk_mq_hw_ctx *hctx2;
+       unsigned int i, j;
+
+       if (set->flags & BLK_MQ_F_TAG_SHARED) {
+               rcu_read_lock();
+               list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
+                                          tag_set_list) {
+                       queue_for_each_hw_ctx(q, hctx2, i)
+                               if (hctx2->tags == tags &&
+                                   blk_mq_sched_restart_hctx(hctx2))
+                                       goto done;
                 }
+               j = hctx->queue_num + 1;
+               for (i = 0; i < queue->nr_hw_queues; i++, j++) {
+                       if (j == queue->nr_hw_queues)
+                               j = 0;
+                       hctx2 = queue->queue_hw_ctx[j];
+                       if (hctx2->tags == tags &&
+                           blk_mq_sched_restart_hctx(hctx2))
+                               break;
+               }
+done:
+               rcu_read_unlock();
         } else {
                 blk_mq_sched_restart_hctx(hctx);
         }
@@ -431,11 +426,86 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
         }
  }
  
-int blk_mq_sched_setup(struct request_queue *q)
+static int blk_mq_sched_alloc_tags(struct request_queue *q,
+                                  struct blk_mq_hw_ctx *hctx,
+                                  unsigned int hctx_idx)
  {
         struct blk_mq_tag_set *set = q->tag_set;
+       int ret;
+
+       hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
+                                              set->reserved_tags);
+       if (!hctx->sched_tags)
+               return -ENOMEM;
+
+       ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
+       if (ret)
+               blk_mq_sched_free_tags(set, hctx, hctx_idx);
+
+       return ret;
+}
+
+static void blk_mq_sched_tags_teardown(struct request_queue *q)
+{
+       struct blk_mq_tag_set *set = q->tag_set;
+       struct blk_mq_hw_ctx *hctx;
+       int i;
+
+       queue_for_each_hw_ctx(q, hctx, i)
+               blk_mq_sched_free_tags(set, hctx, i);
+}
+
+int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+                          unsigned int hctx_idx)
+{
+       struct elevator_queue *e = q->elevator;
+       int ret;
+
+       if (!e)
+               return 0;
+
+       ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
+       if (ret)
+               return ret;
+
+       if (e->type->ops.mq.init_hctx) {
+               ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
+               if (ret) {
+                       blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+                           unsigned int hctx_idx)
+{
+       struct elevator_queue *e = q->elevator;
+
+       if (!e)
+               return;
+
+       if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
+               e->type->ops.mq.exit_hctx(hctx, hctx_idx);
+               hctx->sched_data = NULL;
+       }
+
+       blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
+}
+
+int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
+{
         struct blk_mq_hw_ctx *hctx;
-       int ret, i;
+       struct elevator_queue *eq;
+       unsigned int i;
+       int ret;
+
+       if (!e) {
+               q->elevator = NULL;
+               return 0;
+       }
  
         /*
          * Default to 256, since we don't split into sync/async like the
@@ -443,49 +513,53 @@ int blk_mq_sched_setup(struct request_queue *q)
          */
         q->nr_requests = 2 * BLKDEV_MAX_RQ;
  
-       /*
-        * We're switching to using an IO scheduler, so setup the hctx
-        * scheduler tags and switch the request map from the regular
-        * tags to scheduler tags. First allocate what we need, so we
-        * can safely fail and fallback, if needed.
-        */
-       ret = 0;
         queue_for_each_hw_ctx(q, hctx, i) {
-               hctx->sched_tags = blk_mq_alloc_rq_map(set, i,
-                               q->nr_requests, set->reserved_tags);
-               if (!hctx->sched_tags) {
-                       ret = -ENOMEM;
-                       break;
-               }
-               ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
+               ret = blk_mq_sched_alloc_tags(q, hctx, i);
                 if (ret)
-                       break;
+                       goto err;
         }
  
-       /*
-        * If we failed, free what we did allocate
-        */
-       if (ret) {
+       ret = e->ops.mq.init_sched(q, e);
+       if (ret)
+               goto err;
+
+       if (e->ops.mq.init_hctx) {
                 queue_for_each_hw_ctx(q, hctx, i) {
-                       if (!hctx->sched_tags)
-                               continue;
-                       blk_mq_sched_free_tags(set, hctx, i);
+                       ret = e->ops.mq.init_hctx(hctx, i);
+                       if (ret) {
+                               eq = q->elevator;
+                               blk_mq_exit_sched(q, eq);
+                               kobject_put(&eq->kobj);
+                               return ret;
+                       }
                 }
-
-               return ret;
         }
  
         return 0;
+
+err:
+       blk_mq_sched_tags_teardown(q);
+       q->elevator = NULL;
+       return ret;
  }
  
-void blk_mq_sched_teardown(struct request_queue *q)
+void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
  {
-       struct blk_mq_tag_set *set = q->tag_set;
         struct blk_mq_hw_ctx *hctx;
-       int i;
+       unsigned int i;
  
-       queue_for_each_hw_ctx(q, hctx, i)
-               blk_mq_sched_free_tags(set, hctx, i);
+       if (e->type->ops.mq.exit_hctx) {
+               queue_for_each_hw_ctx(q, hctx, i) {
+                       if (hctx->sched_data) {
+                               e->type->ops.mq.exit_hctx(hctx, i);
+                               hctx->sched_data = NULL;
+                       }
+               }
+       }
+       if (e->type->ops.mq.exit_sched)
+               e->type->ops.mq.exit_sched(e);
+       blk_mq_sched_tags_teardown(q);
+       q->elevator = NULL;
  }
  
  int blk_mq_sched_init(struct request_queue *q)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h

index a75b16b123f7aadac672651a7eef5c79f5553e16..edafb5383b7bbdedfd5365ed38f9a5c373ec96ab 100644 (file)
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -4,10 +4,6 @@
  #include "blk-mq.h"
  #include "blk-mq-tag.h"
  
-int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
-                               int (*init)(struct blk_mq_hw_ctx *),
-                               void (*exit)(struct blk_mq_hw_ctx *));
-
  void blk_mq_sched_free_hctx_data(struct request_queue *q,
                                  void (*exit)(struct blk_mq_hw_ctx *));
  
@@ -19,7 +15,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
                                 struct request **merged_request);
  bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
  bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
-void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
+void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx);
  
  void blk_mq_sched_insert_request(struct request *rq, bool at_head,
                                  bool run_queue, bool async, bool can_block);
@@ -28,12 +24,14 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
                                   struct list_head *list, bool run_queue_async);
  
  void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
-void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
-                       struct list_head *rq_list,
-                       struct request *(*get_rq)(struct blk_mq_hw_ctx *));
  
-int blk_mq_sched_setup(struct request_queue *q);
-void blk_mq_sched_teardown(struct request_queue *q);
+int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
+void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
+
+int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+                          unsigned int hctx_idx);
+void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+                           unsigned int hctx_idx);
  
  int blk_mq_sched_init(struct request_queue *q);
  
@@ -81,17 +79,12 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
         return true;
  }
  
-static inline void
-blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static inline void blk_mq_sched_completed_request(struct request *rq)
  {
-       struct elevator_queue *e = hctx->queue->elevator;
+       struct elevator_queue *e = rq->q->elevator;
  
         if (e && e->type->ops.mq.completed_request)
-               e->type->ops.mq.completed_request(hctx, rq);
-
-       BUG_ON(rq->internal_tag == -1);
-
-       blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag);
+               e->type->ops.mq.completed_request(rq);
  }
  
  static inline void blk_mq_sched_started_request(struct request *rq)
@@ -131,20 +124,6 @@ static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
                 set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
  }
  
-/*
- * Mark a hardware queue and the request queue it belongs to as needing a
- * restart.
- */
-static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx)
-{
-       struct request_queue *q = hctx->queue;
-
-       if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-               set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-       if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
-               set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
-}
-
  static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
  {
         return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c

index 295e69670c39343d058cbf7f67fc076f7249e94e..ec0afdf765e3939b520dec51fe02c9ec1b8a2d0c 100644 (file)
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -17,6 +17,15 @@ static void blk_mq_sysfs_release(struct kobject *kobj)
  {
  }
  
+static void blk_mq_hw_sysfs_release(struct kobject *kobj)
+{
+       struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
+                                                 kobj);
+       free_cpumask_var(hctx->cpumask);
+       kfree(hctx->ctxs);
+       kfree(hctx);
+}
+
  struct blk_mq_ctx_sysfs_entry {
         struct attribute attr;
         ssize_t (*show)(struct blk_mq_ctx *, char *);
@@ -200,7 +209,7 @@ static struct kobj_type blk_mq_ctx_ktype = {
  static struct kobj_type blk_mq_hw_ktype = {
         .sysfs_ops      = &blk_mq_hw_sysfs_ops,
         .default_attrs  = default_hw_ctx_attrs,
-       .release        = blk_mq_sysfs_release,
+       .release        = blk_mq_hw_sysfs_release,
  };
  
  static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
@@ -242,24 +251,17 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
  static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
  {
         struct blk_mq_hw_ctx *hctx;
-       struct blk_mq_ctx *ctx;
-       int i, j;
-
-       queue_for_each_hw_ctx(q, hctx, i) {
-               blk_mq_unregister_hctx(hctx);
+       int i;
  
-               hctx_for_each_ctx(hctx, ctx, j)
-                       kobject_put(&ctx->kobj);
+       lockdep_assert_held(&q->sysfs_lock);
  
-               kobject_put(&hctx->kobj);
-       }
+       queue_for_each_hw_ctx(q, hctx, i)
+               blk_mq_unregister_hctx(hctx);
  
-       blk_mq_debugfs_unregister_hctxs(q);
+       blk_mq_debugfs_unregister_mq(q);
  
         kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
         kobject_del(&q->mq_kobj);
-       kobject_put(&q->mq_kobj);
-
         kobject_put(&dev->kobj);
  
         q->mq_sysfs_init_done = false;
@@ -267,9 +269,9 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
  
  void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
  {
-       blk_mq_disable_hotplug();
+       mutex_lock(&q->sysfs_lock);
         __blk_mq_unregister_dev(dev, q);
-       blk_mq_enable_hotplug();
+       mutex_unlock(&q->sysfs_lock);
  }
  
  void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
@@ -277,7 +279,19 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
         kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
  }
  
-static void blk_mq_sysfs_init(struct request_queue *q)
+void blk_mq_sysfs_deinit(struct request_queue *q)
+{
+       struct blk_mq_ctx *ctx;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               ctx = per_cpu_ptr(q->queue_ctx, cpu);
+               kobject_put(&ctx->kobj);
+       }
+       kobject_put(&q->mq_kobj);
+}
+
+void blk_mq_sysfs_init(struct request_queue *q)
  {
         struct blk_mq_ctx *ctx;
         int cpu;
@@ -290,14 +304,13 @@ static void blk_mq_sysfs_init(struct request_queue *q)
         }
  }
  
-int blk_mq_register_dev(struct device *dev, struct request_queue *q)
+int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
  {
         struct blk_mq_hw_ctx *hctx;
         int ret, i;
  
-       blk_mq_disable_hotplug();
-
-       blk_mq_sysfs_init(q);
+       WARN_ON_ONCE(!q->kobj.parent);
+       lockdep_assert_held(&q->sysfs_lock);
  
         ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
         if (ret < 0)
@@ -305,20 +318,38 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
  
         kobject_uevent(&q->mq_kobj, KOBJ_ADD);
  
-       blk_mq_debugfs_register(q, kobject_name(&dev->kobj));
+       blk_mq_debugfs_register(q);
  
         queue_for_each_hw_ctx(q, hctx, i) {
                 ret = blk_mq_register_hctx(hctx);
                 if (ret)
-                       break;
+                       goto unreg;
         }
  
-       if (ret)
-               __blk_mq_unregister_dev(dev, q);
-       else
-               q->mq_sysfs_init_done = true;
+       q->mq_sysfs_init_done = true;
+
  out:
-       blk_mq_enable_hotplug();
+       return ret;
+
+unreg:
+       while (--i >= 0)
+               blk_mq_unregister_hctx(q->queue_hw_ctx[i]);
+
+       blk_mq_debugfs_unregister_mq(q);
+
+       kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
+       kobject_del(&q->mq_kobj);
+       kobject_put(&dev->kobj);
+       return ret;
+}
+
+int blk_mq_register_dev(struct device *dev, struct request_queue *q)
+{
+       int ret;
+
+       mutex_lock(&q->sysfs_lock);
+       ret = __blk_mq_register_dev(dev, q);
+       mutex_unlock(&q->sysfs_lock);
  
         return ret;
  }
@@ -329,13 +360,17 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
         struct blk_mq_hw_ctx *hctx;
         int i;
  
+       mutex_lock(&q->sysfs_lock);
         if (!q->mq_sysfs_init_done)
-               return;
+               goto unlock;
  
-       blk_mq_debugfs_unregister_hctxs(q);
+       blk_mq_debugfs_unregister_mq(q);
  
         queue_for_each_hw_ctx(q, hctx, i)
                 blk_mq_unregister_hctx(hctx);
+
+unlock:
+       mutex_unlock(&q->sysfs_lock);
  }
  
  int blk_mq_sysfs_register(struct request_queue *q)
@@ -343,10 +378,11 @@ int blk_mq_sysfs_register(struct request_queue *q)
         struct blk_mq_hw_ctx *hctx;
         int i, ret = 0;
  
+       mutex_lock(&q->sysfs_lock);
         if (!q->mq_sysfs_init_done)
-               return ret;
+               goto unlock;
  
-       blk_mq_debugfs_register_hctxs(q);
+       blk_mq_debugfs_register_mq(q);
  
         queue_for_each_hw_ctx(q, hctx, i) {
                 ret = blk_mq_register_hctx(hctx);
@@ -354,5 +390,8 @@ int blk_mq_sysfs_register(struct request_queue *q)
                         break;
         }
  
+unlock:
+       mutex_unlock(&q->sysfs_lock);
+
         return ret;
  }
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c

index e48bc2c72615de016f013a2e98ea72cd49713a04..d0be72ccb0914c74fd9140ed112228832752ee6d 100644 (file)
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -96,7 +96,10 @@ static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
         if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
             !hctx_may_queue(data->hctx, bt))
                 return -1;
-       return __sbitmap_queue_get(bt);
+       if (data->shallow_depth)
+               return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
+       else
+               return __sbitmap_queue_get(bt);
  }
  
  unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
@@ -295,6 +298,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
         for (i = 0; i < set->nr_hw_queues; i++) {
                 struct blk_mq_tags *tags = set->tags[i];
  
+               if (!tags)
+                       continue;
+
                 for (j = 0; j < tags->nr_tags; j++) {
                         if (!tags->static_rqs[j])
                                 continue;
diff --git a/block/blk-mq.c b/block/blk-mq.c

index b2fd175e84d79af071b28768e74de06cb673407b..bf90684a007a21c6aa5068b3b2e3746e8e1accff 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -39,6 +39,26 @@
  static DEFINE_MUTEX(all_q_mutex);
  static LIST_HEAD(all_q_list);
  
+static void blk_mq_poll_stats_start(struct request_queue *q);
+static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
+
+static int blk_mq_poll_stats_bkt(const struct request *rq)
+{
+       int ddir, bytes, bucket;
+
+       ddir = rq_data_dir(rq);
+       bytes = blk_rq_bytes(rq);
+
+       bucket = ddir + 2*(ilog2(bytes) - 9);
+
+       if (bucket < 0)
+               return -1;
+       else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
+               return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
+
+       return bucket;
+}
+
  /*
   * Check if any of the ctx's have pending work in this hardware queue
   */
@@ -65,7 +85,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
         sbitmap_clear_bit(&hctx->ctx_map, ctx->index_hw);
  }
  
-void blk_mq_freeze_queue_start(struct request_queue *q)
+void blk_freeze_queue_start(struct request_queue *q)
  {
         int freeze_depth;
  
@@ -75,7 +95,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q)
                 blk_mq_run_hw_queues(q, false);
         }
  }
-EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
+EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
  
  void blk_mq_freeze_queue_wait(struct request_queue *q)
  {
@@ -105,7 +125,7 @@ void blk_freeze_queue(struct request_queue *q)
          * no blk_unfreeze_queue(), and blk_freeze_queue() is not
          * exported to drivers as the only user for unfreeze is blk_mq.
          */
-       blk_mq_freeze_queue_start(q);
+       blk_freeze_queue_start(q);
         blk_mq_freeze_queue_wait(q);
  }
  
@@ -210,7 +230,6 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
  #endif
         rq->special = NULL;
         /* tag was already set */
-       rq->errors = 0;
         rq->extra_len = 0;
  
         INIT_LIST_HEAD(&rq->timeout_list);
@@ -321,7 +340,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
  
         rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
  
-       blk_mq_put_ctx(alloc_data.ctx);
         blk_queue_exit(q);
  
         if (!rq)
@@ -348,8 +366,8 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
         if (rq->tag != -1)
                 blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
         if (sched_tag != -1)
-               blk_mq_sched_completed_request(hctx, rq);
-       blk_mq_sched_restart_queues(hctx);
+               blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
+       blk_mq_sched_restart(hctx);
         blk_queue_exit(q);
  }
  
@@ -366,6 +384,7 @@ void blk_mq_finish_request(struct request *rq)
  {
         blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
  }
+EXPORT_SYMBOL_GPL(blk_mq_finish_request);
  
  void blk_mq_free_request(struct request *rq)
  {
@@ -403,12 +422,19 @@ static void __blk_mq_complete_request_remote(void *data)
         rq->q->softirq_done_fn(rq);
  }
  
-static void blk_mq_ipi_complete_request(struct request *rq)
+static void __blk_mq_complete_request(struct request *rq)
  {
         struct blk_mq_ctx *ctx = rq->mq_ctx;
         bool shared = false;
         int cpu;
  
+       if (rq->internal_tag != -1)
+               blk_mq_sched_completed_request(rq);
+       if (rq->rq_flags & RQF_STATS) {
+               blk_mq_poll_stats_start(rq->q);
+               blk_stat_add(rq);
+       }
+
         if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
                 rq->q->softirq_done_fn(rq);
                 return;
@@ -429,33 +455,6 @@ static void blk_mq_ipi_complete_request(struct request *rq)
         put_cpu();
  }
  
-static void blk_mq_stat_add(struct request *rq)
-{
-       if (rq->rq_flags & RQF_STATS) {
-               /*
-                * We could rq->mq_ctx here, but there's less of a risk
-                * of races if we have the completion event add the stats
-                * to the local software queue.
-                */
-               struct blk_mq_ctx *ctx;
-
-               ctx = __blk_mq_get_ctx(rq->q, raw_smp_processor_id());
-               blk_stat_add(&ctx->stat[rq_data_dir(rq)], rq);
-       }
-}
-
-static void __blk_mq_complete_request(struct request *rq)
-{
-       struct request_queue *q = rq->q;
-
-       blk_mq_stat_add(rq);
-
-       if (!q->softirq_done_fn)
-               blk_mq_end_request(rq, rq->errors);
-       else
-               blk_mq_ipi_complete_request(rq);
-}
-
  /**
   * blk_mq_complete_request - end I/O on a request
   * @rq:                the request being processed
@@ -464,16 +463,14 @@ static void __blk_mq_complete_request(struct request *rq)
   *     Ends all I/O on a request. It does not handle partial completions.
   *     The actual completion happens out-of-order, through a IPI handler.
   **/
-void blk_mq_complete_request(struct request *rq, int error)
+void blk_mq_complete_request(struct request *rq)
  {
         struct request_queue *q = rq->q;
  
         if (unlikely(blk_should_fake_timeout(q)))
                 return;
-       if (!blk_mark_rq_complete(rq)) {
-               rq->errors = error;
+       if (!blk_mark_rq_complete(rq))
                 __blk_mq_complete_request(rq);
-       }
  }
  EXPORT_SYMBOL(blk_mq_complete_request);
  
@@ -492,7 +489,7 @@ void blk_mq_start_request(struct request *rq)
         trace_block_rq_issue(q, rq);
  
         if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
-               blk_stat_set_issue_time(&rq->issue_stat);
+               blk_stat_set_issue(&rq->issue_stat, blk_rq_sectors(rq));
                 rq->rq_flags |= RQF_STATS;
                 wbt_issue(q->rq_wb, &rq->issue_stat);
         }
@@ -527,6 +524,15 @@ void blk_mq_start_request(struct request *rq)
  }
  EXPORT_SYMBOL(blk_mq_start_request);
  
+/*
+ * When we reach here because queue is busy, REQ_ATOM_COMPLETE
+ * flag isn't set yet, so there may be race with timeout handler,
+ * but given rq->deadline is just set in .queue_rq() under
+ * this situation, the race won't be possible in reality because
+ * rq->timeout should be set as big enough to cover the window
+ * between blk_mq_start_request() called from .queue_rq() and
+ * clearing REQ_ATOM_STARTED here.
+ */
  static void __blk_mq_requeue_request(struct request *rq)
  {
         struct request_queue *q = rq->q;
@@ -634,8 +640,7 @@ void blk_mq_abort_requeue_list(struct request_queue *q)
  
                 rq = list_first_entry(&rq_list, struct request, queuelist);
                 list_del_init(&rq->queuelist);
-               rq->errors = -EIO;
-               blk_mq_end_request(rq, rq->errors);
+               blk_mq_end_request(rq, -EIO);
         }
  }
  EXPORT_SYMBOL(blk_mq_abort_requeue_list);
@@ -667,7 +672,7 @@ void blk_mq_rq_timed_out(struct request *req, bool reserved)
          * just be ignored. This can happen due to the bitflag ordering.
          * Timeout first checks if STARTED is set, and if it is, assumes
          * the request is active. But if we race with completion, then
-        * we both flags will get cleared. So check here again, and ignore
+        * both flags will get cleared. So check here again, and ignore
          * a timeout event with a request that isn't active.
          */
         if (!test_bit(REQ_ATOM_STARTED, &req->atomic_flags))
@@ -697,18 +702,22 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
  {
         struct blk_mq_timeout_data *data = priv;
  
-       if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
-               /*
-                * If a request wasn't started before the queue was
-                * marked dying, kill it here or it'll go unnoticed.
-                */
-               if (unlikely(blk_queue_dying(rq->q))) {
-                       rq->errors = -EIO;
-                       blk_mq_end_request(rq, rq->errors);
-               }
+       if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
                 return;
-       }
  
+       /*
+        * The rq being checked may have been freed and reallocated
+        * out already here, we avoid this race by checking rq->deadline
+        * and REQ_ATOM_COMPLETE flag together:
+        *
+        * - if rq->deadline is observed as new value because of
+        *   reusing, the rq won't be timed out because of timing.
+        * - if rq->deadline is observed as previous value,
+        *   REQ_ATOM_COMPLETE flag won't be cleared in reuse path
+        *   because we put a barrier between setting rq->deadline
+        *   and clearing the flag in blk_mq_start_request(), so
+        *   this rq won't be timed out too.
+        */
         if (time_after_eq(jiffies, rq->deadline)) {
                 if (!blk_mark_rq_complete(rq))
                         blk_mq_rq_timed_out(rq, reserved);
@@ -737,7 +746,7 @@ static void blk_mq_timeout_work(struct work_struct *work)
          * percpu_ref_tryget directly, because we need to be able to
          * obtain a reference even in the short window between the queue
          * starting to freeze, by dropping the first reference in
-        * blk_mq_freeze_queue_start, and the moment the last request is
+        * blk_freeze_queue_start, and the moment the last request is
          * consumed, marked by the instant q_usage_counter reaches
          * zero.
          */
@@ -855,12 +864,10 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
                 .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
         };
  
-       if (rq->tag != -1) {
-done:
-               if (hctx)
-                       *hctx = data.hctx;
-               return true;
-       }
+       might_sleep_if(wait);
+
+       if (rq->tag != -1)
+               goto done;
  
         if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
                 data.flags |= BLK_MQ_REQ_RESERVED;
@@ -872,10 +879,12 @@ done:
                         atomic_inc(&data.hctx->nr_active);
                 }
                 data.hctx->tags->rqs[rq->tag] = rq;
-               goto done;
         }
  
-       return false;
+done:
+       if (hctx)
+               *hctx = data.hctx;
+       return rq->tag != -1;
  }
  
  static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
@@ -972,25 +981,20 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
         return true;
  }
  
-bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
+bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
  {
-       struct request_queue *q = hctx->queue;
+       struct blk_mq_hw_ctx *hctx;
         struct request *rq;
-       LIST_HEAD(driver_list);
-       struct list_head *dptr;
-       int queued, ret = BLK_MQ_RQ_QUEUE_OK;
+       int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
  
-       /*
-        * Start off with dptr being NULL, so we start the first request
-        * immediately, even if we have more pending.
-        */
-       dptr = NULL;
+       if (list_empty(list))
+               return false;
  
         /*
          * Now process all the entries, sending them to the driver.
          */
-       queued = 0;
-       while (!list_empty(list)) {
+       errors = queued = 0;
+       do {
                 struct blk_mq_queue_data bd;
  
                 rq = list_first_entry(list, struct request, queuelist);
@@ -1002,23 +1006,21 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
                          * The initial allocation attempt failed, so we need to
                          * rerun the hardware queue when a tag is freed.
                          */
-                       if (blk_mq_dispatch_wait_add(hctx)) {
-                               /*
-                                * It's possible that a tag was freed in the
-                                * window between the allocation failure and
-                                * adding the hardware queue to the wait queue.
-                                */
-                               if (!blk_mq_get_driver_tag(rq, &hctx, false))
-                                       break;
-                       } else {
+                       if (!blk_mq_dispatch_wait_add(hctx))
+                               break;
+
+                       /*
+                        * It's possible that a tag was freed in the window
+                        * between the allocation failure and adding the
+                        * hardware queue to the wait queue.
+                        */
+                       if (!blk_mq_get_driver_tag(rq, &hctx, false))
                                 break;
-                       }
                 }
  
                 list_del_init(&rq->queuelist);
  
                 bd.rq = rq;
-               bd.list = dptr;
  
                 /*
                  * Flag last if we have no more requests, or if we have more
@@ -1046,21 +1048,14 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
                 default:
                         pr_err("blk-mq: bad return on queue: %d\n", ret);
                 case BLK_MQ_RQ_QUEUE_ERROR:
-                       rq->errors = -EIO;
-                       blk_mq_end_request(rq, rq->errors);
+                       errors++;
+                       blk_mq_end_request(rq, -EIO);
                         break;
                 }
  
                 if (ret == BLK_MQ_RQ_QUEUE_BUSY)
                         break;
-
-               /*
-                * We've done the first request. If we have more than 1
-                * left in the list, set dptr to defer issue.
-                */
-               if (!dptr && list->next != list->prev)
-                       dptr = &driver_list;
-       }
+       } while (!list_empty(list));
  
         hctx->dispatched[queued_to_index(queued)]++;
  
@@ -1070,8 +1065,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
          */
         if (!list_empty(list)) {
                 /*
-                * If we got a driver tag for the next request already,
-                * free it again.
+                * If an I/O scheduler has been configured and we got a driver
+                * tag for the next request already, free it again.
                  */
                 rq = list_first_entry(list, struct request, queuelist);
                 blk_mq_put_driver_tag(rq);
@@ -1081,23 +1076,31 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
                 spin_unlock(&hctx->lock);
  
                 /*
-                * the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but
-                * it's possible the queue is stopped and restarted again
-                * before this. Queue restart will dispatch requests. And since
-                * requests in rq_list aren't added into hctx->dispatch yet,
-                * the requests in rq_list might get lost.
+                * If SCHED_RESTART was set by the caller of this function and
+                * it is no longer set that means that it was cleared by another
+                * thread and hence that a queue rerun is needed.
                  *
-                * blk_mq_run_hw_queue() already checks the STOPPED bit
+                * If TAG_WAITING is set that means that an I/O scheduler has
+                * been configured and another thread is waiting for a driver
+                * tag. To guarantee fairness, do not rerun this hardware queue
+                * but let the other thread grab the driver tag.
                  *
-                * If RESTART or TAG_WAITING is set, then let completion restart
-                * the queue instead of potentially looping here.
+                * If no I/O scheduler has been configured it is possible that
+                * the hardware queue got stopped and restarted before requests
+                * were pushed back onto the dispatch list. Rerun the queue to
+                * avoid starvation. Notes:
+                * - blk_mq_run_hw_queue() checks whether or not a queue has
+                *   been stopped before rerunning a queue.
+                * - Some but not all block drivers stop a queue before
+                *   returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq
+                *   and dm-rq.
                  */
                 if (!blk_mq_sched_needs_restart(hctx) &&
                     !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
                         blk_mq_run_hw_queue(hctx, true);
         }
  
-       return queued != 0;
+       return (queued + errors) != 0;
  }
  
  static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
@@ -1112,6 +1115,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
                 blk_mq_sched_dispatch_requests(hctx);
                 rcu_read_unlock();
         } else {
+               might_sleep();
+
                 srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
                 blk_mq_sched_dispatch_requests(hctx);
                 srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
@@ -1143,7 +1148,8 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
         return hctx->next_cpu;
  }
  
-void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
+static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
+                                       unsigned long msecs)
  {
         if (unlikely(blk_mq_hctx_stopped(hctx) ||
                      !blk_mq_hw_queue_mapped(hctx)))
@@ -1160,8 +1166,22 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
                 put_cpu();
         }
  
-       kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work);
+       kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
+                                        &hctx->run_work,
+                                        msecs_to_jiffies(msecs));
+}
+
+void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
+{
+       __blk_mq_delay_run_hw_queue(hctx, true, msecs);
  }
+EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
+
+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
+{
+       __blk_mq_delay_run_hw_queue(hctx, async, 0);
+}
+EXPORT_SYMBOL(blk_mq_run_hw_queue);
  
  void blk_mq_run_hw_queues(struct request_queue *q, bool async)
  {
@@ -1200,8 +1220,7 @@ EXPORT_SYMBOL(blk_mq_queue_stopped);
  
  void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
-       cancel_work(&hctx->run_work);
-       cancel_delayed_work(&hctx->delay_work);
+       cancel_delayed_work_sync(&hctx->run_work);
         set_bit(BLK_MQ_S_STOPPED, &hctx->state);
  }
  EXPORT_SYMBOL(blk_mq_stop_hw_queue);
@@ -1258,29 +1277,40 @@ static void blk_mq_run_work_fn(struct work_struct *work)
  {
         struct blk_mq_hw_ctx *hctx;
  
-       hctx = container_of(work, struct blk_mq_hw_ctx, run_work);
-
-       __blk_mq_run_hw_queue(hctx);
-}
+       hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
  
-static void blk_mq_delay_work_fn(struct work_struct *work)
-{
-       struct blk_mq_hw_ctx *hctx;
+       /*
+        * If we are stopped, don't run the queue. The exception is if
+        * BLK_MQ_S_START_ON_RUN is set. For that case, we auto-clear
+        * the STOPPED bit and run it.
+        */
+       if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) {
+               if (!test_bit(BLK_MQ_S_START_ON_RUN, &hctx->state))
+                       return;
  
-       hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work);
+               clear_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
+               clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+       }
  
-       if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state))
-               __blk_mq_run_hw_queue(hctx);
+       __blk_mq_run_hw_queue(hctx);
  }
  
+
  void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
  {
         if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
                 return;
  
+       /*
+        * Stop the hw queue, then modify currently delayed work.
+        * This should prevent us from running the queue prematurely.
+        * Mark the queue as auto-clearing STOPPED when it runs.
+        */
         blk_mq_stop_hw_queue(hctx);
-       kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
-                       &hctx->delay_work, msecs_to_jiffies(msecs));
+       set_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
+       kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
+                                       &hctx->run_work,
+                                       msecs_to_jiffies(msecs));
  }
  EXPORT_SYMBOL(blk_mq_delay_queue);
  
@@ -1389,7 +1419,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  
  static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
  {
-       init_request_from_bio(rq, bio);
+       blk_init_request_from_bio(rq, bio);
  
         blk_account_io_start(rq, true);
  }
@@ -1434,13 +1464,13 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
         return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
  }
  
-static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
+static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
+                                     bool may_sleep)
  {
         struct request_queue *q = rq->q;
         struct blk_mq_queue_data bd = {
                 .rq = rq,
-               .list = NULL,
-               .last = 1
+               .last = true,
         };
         struct blk_mq_hw_ctx *hctx;
         blk_qc_t new_cookie;
@@ -1465,31 +1495,42 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
                 return;
         }
  
-       __blk_mq_requeue_request(rq);
-
         if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
                 *cookie = BLK_QC_T_NONE;
-               rq->errors = -EIO;
-               blk_mq_end_request(rq, rq->errors);
+               blk_mq_end_request(rq, -EIO);
                 return;
         }
  
+       __blk_mq_requeue_request(rq);
  insert:
-       blk_mq_sched_insert_request(rq, false, true, true, false);
+       blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
+}
+
+static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+               struct request *rq, blk_qc_t *cookie)
+{
+       if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
+               rcu_read_lock();
+               __blk_mq_try_issue_directly(rq, cookie, false);
+               rcu_read_unlock();
+       } else {
+               unsigned int srcu_idx;
+
+               might_sleep();
+
+               srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+               __blk_mq_try_issue_directly(rq, cookie, true);
+               srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+       }
  }
  
-/*
- * Multiple hardware queue variant. This will not use per-process plugs,
- * but will attempt to bypass the hctx queueing if we can go straight to
- * hardware for SYNC IO.
- */
  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
  {
         const int is_sync = op_is_sync(bio->bi_opf);
         const int is_flush_fua = op_is_flush(bio->bi_opf);
         struct blk_mq_alloc_data data = { .flags = 0 };
         struct request *rq;
-       unsigned int request_count = 0, srcu_idx;
+       unsigned int request_count = 0;
         struct blk_plug *plug;
         struct request *same_queue_rq = NULL;
         blk_qc_t cookie;
@@ -1525,147 +1566,21 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
  
         cookie = request_to_qc_t(data.hctx, rq);
  
-       if (unlikely(is_flush_fua)) {
-               if (q->elevator)
-                       goto elv_insert;
-               blk_mq_bio_to_request(rq, bio);
-               blk_insert_flush(rq);
-               goto run_queue;
-       }
-
         plug = current->plug;
-       /*
-        * If the driver supports defer issued based on 'last', then
-        * queue it up like normal since we can potentially save some
-        * CPU this way.
-        */
-       if (((plug && !blk_queue_nomerges(q)) || is_sync) &&
-           !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
-               struct request *old_rq = NULL;
-
-               blk_mq_bio_to_request(rq, bio);
-
-               /*
-                * We do limited plugging. If the bio can be merged, do that.
-                * Otherwise the existing request in the plug list will be
-                * issued. So the plug list will have one request at most
-                */
-               if (plug) {
-                       /*
-                        * The plug list might get flushed before this. If that
-                        * happens, same_queue_rq is invalid and plug list is
-                        * empty
-                        */
-                       if (same_queue_rq && !list_empty(&plug->mq_list)) {
-                               old_rq = same_queue_rq;
-                               list_del_init(&old_rq->queuelist);
-                       }
-                       list_add_tail(&rq->queuelist, &plug->mq_list);
-               } else /* is_sync */
-                       old_rq = rq;
+       if (unlikely(is_flush_fua)) {
                 blk_mq_put_ctx(data.ctx);
-               if (!old_rq)
-                       goto done;
-
-               if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
-                       rcu_read_lock();
-                       blk_mq_try_issue_directly(old_rq, &cookie);
-                       rcu_read_unlock();
+               blk_mq_bio_to_request(rq, bio);
+               if (q->elevator) {
+                       blk_mq_sched_insert_request(rq, false, true, true,
+                                       true);
                 } else {
-                       srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
-                       blk_mq_try_issue_directly(old_rq, &cookie);
-                       srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
+                       blk_insert_flush(rq);
+                       blk_mq_run_hw_queue(data.hctx, true);
                 }
-               goto done;
-       }
-
-       if (q->elevator) {
-elv_insert:
-               blk_mq_put_ctx(data.ctx);
-               blk_mq_bio_to_request(rq, bio);
-               blk_mq_sched_insert_request(rq, false, true,
-                                               !is_sync || is_flush_fua, true);
-               goto done;
-       }
-       if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
-               /*
-                * For a SYNC request, send it to the hardware immediately. For
-                * an ASYNC request, just ensure that we run it later on. The
-                * latter allows for merging opportunities and more efficient
-                * dispatching.
-                */
-run_queue:
-               blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
-       }
-       blk_mq_put_ctx(data.ctx);
-done:
-       return cookie;
-}
-
-/*
- * Single hardware queue variant. This will attempt to use any per-process
- * plug for merging and IO deferral.
- */
-static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
-{
-       const int is_sync = op_is_sync(bio->bi_opf);
-       const int is_flush_fua = op_is_flush(bio->bi_opf);
-       struct blk_plug *plug;
-       unsigned int request_count = 0;
-       struct blk_mq_alloc_data data = { .flags = 0 };
-       struct request *rq;
-       blk_qc_t cookie;
-       unsigned int wb_acct;
-
-       blk_queue_bounce(q, &bio);
-
-       if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-               bio_io_error(bio);
-               return BLK_QC_T_NONE;
-       }
-
-       blk_queue_split(q, &bio, q->bio_split);
-
-       if (!is_flush_fua && !blk_queue_nomerges(q)) {
-               if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
-                       return BLK_QC_T_NONE;
-       } else
-               request_count = blk_plug_queued_count(q);
-
-       if (blk_mq_sched_bio_merge(q, bio))
-               return BLK_QC_T_NONE;
-
-       wb_acct = wbt_wait(q->rq_wb, bio, NULL);
-
-       trace_block_getrq(q, bio, bio->bi_opf);
-
-       rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
-       if (unlikely(!rq)) {
-               __wbt_done(q->rq_wb, wb_acct);
-               return BLK_QC_T_NONE;
-       }
-
-       wbt_track(&rq->issue_stat, wb_acct);
-
-       cookie = request_to_qc_t(data.hctx, rq);
-
-       if (unlikely(is_flush_fua)) {
-               if (q->elevator)
-                       goto elv_insert;
-               blk_mq_bio_to_request(rq, bio);
-               blk_insert_flush(rq);
-               goto run_queue;
-       }
-
-       /*
-        * A task plug currently exists. Since this is completely lockless,
-        * utilize that to temporarily store requests until the task is
-        * either done or scheduled away.
-        */
-       plug = current->plug;
-       if (plug) {
+       } else if (plug && q->nr_hw_queues == 1) {
                 struct request *last = NULL;
  
+               blk_mq_put_ctx(data.ctx);
                 blk_mq_bio_to_request(rq, bio);
  
                 /*
@@ -1674,13 +1589,14 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
                  */
                 if (list_empty(&plug->mq_list))
                         request_count = 0;
+               else if (blk_queue_nomerges(q))
+                       request_count = blk_plug_queued_count(q);
+
                 if (!request_count)
                         trace_block_plug(q);
                 else
                         last = list_entry_rq(plug->mq_list.prev);
  
-               blk_mq_put_ctx(data.ctx);
-
                 if (request_count >= BLK_MAX_REQUEST_COUNT || (last &&
                     blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
                         blk_flush_plug_list(plug, false);
@@ -1688,30 +1604,41 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
                 }
  
                 list_add_tail(&rq->queuelist, &plug->mq_list);
-               return cookie;
-       }
-
-       if (q->elevator) {
-elv_insert:
-               blk_mq_put_ctx(data.ctx);
+       } else if (plug && !blk_queue_nomerges(q)) {
                 blk_mq_bio_to_request(rq, bio);
-               blk_mq_sched_insert_request(rq, false, true,
-                                               !is_sync || is_flush_fua, true);
-               goto done;
-       }
-       if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
+
                 /*
-                * For a SYNC request, send it to the hardware immediately. For
-                * an ASYNC request, just ensure that we run it later on. The
-                * latter allows for merging opportunities and more efficient
-                * dispatching.
+                * We do limited plugging. If the bio can be merged, do that.
+                * Otherwise the existing request in the plug list will be
+                * issued. So the plug list will have one request at most
+                * The plug list might get flushed before this. If that happens,
+                * the plug list is empty, and same_queue_rq is invalid.
                  */
-run_queue:
-               blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
-       }
+               if (list_empty(&plug->mq_list))
+                       same_queue_rq = NULL;
+               if (same_queue_rq)
+                       list_del_init(&same_queue_rq->queuelist);
+               list_add_tail(&rq->queuelist, &plug->mq_list);
+
+               blk_mq_put_ctx(data.ctx);
+
+               if (same_queue_rq)
+                       blk_mq_try_issue_directly(data.hctx, same_queue_rq,
+                                       &cookie);
+       } else if (q->nr_hw_queues > 1 && is_sync) {
+               blk_mq_put_ctx(data.ctx);
+               blk_mq_bio_to_request(rq, bio);
+               blk_mq_try_issue_directly(data.hctx, rq, &cookie);
+       } else if (q->elevator) {
+               blk_mq_put_ctx(data.ctx);
+               blk_mq_bio_to_request(rq, bio);
+               blk_mq_sched_insert_request(rq, false, true, true, true);
+       } else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
+               blk_mq_put_ctx(data.ctx);
+               blk_mq_run_hw_queue(data.hctx, true);
+       } else
+               blk_mq_put_ctx(data.ctx);
  
-       blk_mq_put_ctx(data.ctx);
-done:
         return cookie;
  }
  
@@ -1931,6 +1858,8 @@ static void blk_mq_exit_hctx(struct request_queue *q,
                                        hctx->fq->flush_rq, hctx_idx,
                                        flush_start_tag + hctx_idx);
  
+       blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
+
         if (set->ops->exit_hctx)
                 set->ops->exit_hctx(hctx, hctx_idx);
  
@@ -1955,16 +1884,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
         }
  }
  
-static void blk_mq_free_hw_queues(struct request_queue *q,
-               struct blk_mq_tag_set *set)
-{
-       struct blk_mq_hw_ctx *hctx;
-       unsigned int i;
-
-       queue_for_each_hw_ctx(q, hctx, i)
-               free_cpumask_var(hctx->cpumask);
-}
-
  static int blk_mq_init_hctx(struct request_queue *q,
                 struct blk_mq_tag_set *set,
                 struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
@@ -1976,8 +1895,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
         if (node == NUMA_NO_NODE)
                 node = hctx->numa_node = set->numa_node;
  
-       INIT_WORK(&hctx->run_work, blk_mq_run_work_fn);
-       INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
+       INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
         spin_lock_init(&hctx->lock);
         INIT_LIST_HEAD(&hctx->dispatch);
         hctx->queue = q;
@@ -2007,9 +1925,12 @@ static int blk_mq_init_hctx(struct request_queue *q,
             set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
                 goto free_bitmap;
  
+       if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
+               goto exit_hctx;
+
         hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
         if (!hctx->fq)
-               goto exit_hctx;
+               goto sched_exit_hctx;
  
         if (set->ops->init_request &&
             set->ops->init_request(set->driver_data,
@@ -2024,6 +1945,8 @@ static int blk_mq_init_hctx(struct request_queue *q,
  
   free_fq:
         kfree(hctx->fq);
+ sched_exit_hctx:
+       blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
   exit_hctx:
         if (set->ops->exit_hctx)
                 set->ops->exit_hctx(hctx, hctx_idx);
@@ -2045,13 +1968,10 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
                 struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
                 struct blk_mq_hw_ctx *hctx;
  
-               memset(__ctx, 0, sizeof(*__ctx));
                 __ctx->cpu = i;
                 spin_lock_init(&__ctx->lock);
                 INIT_LIST_HEAD(&__ctx->rq_list);
                 __ctx->queue = q;
-               blk_stat_init(&__ctx->stat[BLK_STAT_READ]);
-               blk_stat_init(&__ctx->stat[BLK_STAT_WRITE]);
  
                 /* If the cpu isn't online, the cpu is mapped to first hctx */
                 if (!cpu_online(i))
@@ -2198,6 +2118,8 @@ static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
  {
         struct request_queue *q;
  
+       lockdep_assert_held(&set->tag_list_lock);
+
         list_for_each_entry(q, &set->tag_list, tag_set_list) {
                 blk_mq_freeze_queue(q);
                 queue_set_hctx_shared(q, shared);
@@ -2210,7 +2132,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
         struct blk_mq_tag_set *set = q->tag_set;
  
         mutex_lock(&set->tag_list_lock);
-       list_del_init(&q->tag_set_list);
+       list_del_rcu(&q->tag_set_list);
+       INIT_LIST_HEAD(&q->tag_set_list);
         if (list_is_singular(&set->tag_list)) {
                 /* just transitioned to unshared */
                 set->flags &= ~BLK_MQ_F_TAG_SHARED;
@@ -2218,6 +2141,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
                 blk_mq_update_tag_set_depth(set, false);
         }
         mutex_unlock(&set->tag_list_lock);
+
+       synchronize_rcu();
  }
  
  static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
@@ -2235,7 +2160,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
         }
         if (set->flags & BLK_MQ_F_TAG_SHARED)
                 queue_set_hctx_shared(q, true);
-       list_add_tail(&q->tag_set_list, &set->tag_list);
+       list_add_tail_rcu(&q->tag_set_list, &set->tag_list);
  
         mutex_unlock(&set->tag_list_lock);
  }
@@ -2251,21 +2176,23 @@ void blk_mq_release(struct request_queue *q)
         struct blk_mq_hw_ctx *hctx;
         unsigned int i;
  
-       blk_mq_sched_teardown(q);
-
         /* hctx kobj stays in hctx */
         queue_for_each_hw_ctx(q, hctx, i) {
                 if (!hctx)
                         continue;
-               kfree(hctx->ctxs);
-               kfree(hctx);
+               kobject_put(&hctx->kobj);
         }
  
         q->mq_map = NULL;
  
         kfree(q->queue_hw_ctx);
  
-       /* ctx kobj stays in queue_ctx */
+       /*
+        * release .mq_kobj and sw queue's kobject now because
+        * both share lifetime with request queue.
+        */
+       blk_mq_sysfs_deinit(q);
+
         free_percpu(q->queue_ctx);
  }
  
@@ -2330,10 +2257,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
                         if (hctx->tags)
                                 blk_mq_free_map_and_requests(set, j);
                         blk_mq_exit_hctx(q, set, hctx, j);
-                       free_cpumask_var(hctx->cpumask);
                         kobject_put(&hctx->kobj);
-                       kfree(hctx->ctxs);
-                       kfree(hctx);
                         hctxs[j] = NULL;
  
                 }
@@ -2348,10 +2272,19 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
         /* mark the queue as mq asap */
         q->mq_ops = set->ops;
  
+       q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
+                                            blk_mq_poll_stats_bkt,
+                                            BLK_MQ_POLL_STATS_BKTS, q);
+       if (!q->poll_cb)
+               goto err_exit;
+
         q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
         if (!q->queue_ctx)
                 goto err_exit;
  
+       /* init q->mq_kobj and sw queues' kobjects */
+       blk_mq_sysfs_init(q);
+
         q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
                                                 GFP_KERNEL, set->numa_node);
         if (!q->queue_hw_ctx)
@@ -2379,10 +2312,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
         INIT_LIST_HEAD(&q->requeue_list);
         spin_lock_init(&q->requeue_lock);
  
-       if (q->nr_hw_queues > 1)
-               blk_queue_make_request(q, blk_mq_make_request);
-       else
-               blk_queue_make_request(q, blk_sq_make_request);
+       blk_queue_make_request(q, blk_mq_make_request);
  
         /*
          * Do this after blk_queue_make_request() overrides it...
@@ -2437,12 +2367,9 @@ void blk_mq_free_queue(struct request_queue *q)
         list_del_init(&q->all_q_node);
         mutex_unlock(&all_q_mutex);
  
-       wbt_exit(q);
-
         blk_mq_del_queue_tag_set(q);
  
         blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
-       blk_mq_free_hw_queues(q, set);
  }
  
  /* Basically redo blk_mq_init_queue with queue frozen */
@@ -2484,7 +2411,7 @@ static void blk_mq_queue_reinit_work(void)
          * take place in parallel.
          */
         list_for_each_entry(q, &all_q_list, all_q_node)
-               blk_mq_freeze_queue_start(q);
+               blk_freeze_queue_start(q);
         list_for_each_entry(q, &all_q_list, all_q_node)
                 blk_mq_freeze_queue_wait(q);
  
@@ -2580,6 +2507,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
         return 0;
  }
  
+static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
+{
+       if (set->ops->map_queues)
+               return set->ops->map_queues(set);
+       else
+               return blk_mq_map_queues(set);
+}
+
  /*
   * Alloc a tag set to be associated with one or more request queues.
   * May fail with EINVAL for various error conditions. May adjust the
@@ -2634,10 +2569,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
         if (!set->mq_map)
                 goto out_free_tags;
  
-       if (set->ops->map_queues)
-               ret = set->ops->map_queues(set);
-       else
-               ret = blk_mq_map_queues(set);
+       ret = blk_mq_update_queue_map(set);
         if (ret)
                 goto out_free_mq_map;
  
@@ -2720,6 +2652,8 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
  {
         struct request_queue *q;
  
+       lockdep_assert_held(&set->tag_list_lock);
+
         if (nr_hw_queues > nr_cpu_ids)
                 nr_hw_queues = nr_cpu_ids;
         if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
@@ -2729,18 +2663,9 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
                 blk_mq_freeze_queue(q);
  
         set->nr_hw_queues = nr_hw_queues;
+       blk_mq_update_queue_map(set);
         list_for_each_entry(q, &set->tag_list, tag_set_list) {
                 blk_mq_realloc_hw_ctxs(set, q);
-
-               /*
-                * Manually set the make_request_fn as blk_queue_make_request
-                * resets a lot of the queue settings.
-                */
-               if (q->nr_hw_queues > 1)
-                       q->make_request_fn = blk_mq_make_request;
-               else
-                       q->make_request_fn = blk_sq_make_request;
-
                 blk_mq_queue_reinit(q, cpu_online_mask);
         }
  
@@ -2749,39 +2674,69 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
  }
  EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
  
+/* Enable polling stats and return whether they were already enabled. */
+static bool blk_poll_stats_enable(struct request_queue *q)
+{
+       if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
+           test_and_set_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
+               return true;
+       blk_stat_add_callback(q, q->poll_cb);
+       return false;
+}
+
+static void blk_mq_poll_stats_start(struct request_queue *q)
+{
+       /*
+        * We don't arm the callback if polling stats are not enabled or the
+        * callback is already active.
+        */
+       if (!test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
+           blk_stat_is_active(q->poll_cb))
+               return;
+
+       blk_stat_activate_msecs(q->poll_cb, 100);
+}
+
+static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
+{
+       struct request_queue *q = cb->data;
+       int bucket;
+
+       for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
+               if (cb->stat[bucket].nr_samples)
+                       q->poll_stat[bucket] = cb->stat[bucket];
+       }
+}
+
  static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
                                        struct blk_mq_hw_ctx *hctx,
                                        struct request *rq)
  {
-       struct blk_rq_stat stat[2];
         unsigned long ret = 0;
+       int bucket;
  
         /*
          * If stats collection isn't on, don't sleep but turn it on for
          * future users
          */
-       if (!blk_stat_enable(q))
+       if (!blk_poll_stats_enable(q))
                 return 0;
  
-       /*
-        * We don't have to do this once per IO, should optimize this
-        * to just use the current window of stats until it changes
-        */
-       memset(&stat, 0, sizeof(stat));
-       blk_hctx_stat_get(hctx, stat);
-
         /*
          * As an optimistic guess, use half of the mean service time
          * for this type of request. We can (and should) make this smarter.
          * For instance, if the completion latencies are tight, we can
          * get closer than just half the mean. This is especially
          * important on devices where the completion latencies are longer
-        * than ~10 usec.
+        * than ~10 usec. We do use the stats for the relevant IO size
+        * if available which does lead to better estimates.
          */
-       if (req_op(rq) == REQ_OP_READ && stat[BLK_STAT_READ].nr_samples)
-               ret = (stat[BLK_STAT_READ].mean + 1) / 2;
-       else if (req_op(rq) == REQ_OP_WRITE && stat[BLK_STAT_WRITE].nr_samples)
-               ret = (stat[BLK_STAT_WRITE].mean + 1) / 2;
+       bucket = blk_mq_poll_stats_bkt(rq);
+       if (bucket < 0)
+               return ret;
+
+       if (q->poll_stat[bucket].nr_samples)
+               ret = (q->poll_stat[bucket].mean + 1) / 2;
  
         return ret;
  }
@@ -2904,8 +2859,17 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
         hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
         if (!blk_qc_t_is_internal(cookie))
                 rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
-       else
+       else {
                 rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
+               /*
+                * With scheduling, if the request has completed, we'll
+                * get a NULL return here, as we clear the sched tag when
+                * that happens. The request still remains valid, like always,
+                * so we should be safe with just the NULL check.
+                */
+               if (!rq)
+                       return false;
+       }
  
         return __blk_mq_poll(hctx, rq);
  }
diff --git a/block/blk-mq.h b/block/blk-mq.h

index 088ced003c13d7282712b423ade0521c16aeebdc..2814a14e529cdd18c58b3bb0f18663f803596167 100644 (file)
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -20,7 +20,6 @@ struct blk_mq_ctx {
  
         /* incremented at completion time */
         unsigned long           ____cacheline_aligned_in_smp rq_completed[2];
-       struct blk_rq_stat      stat[2];
  
         struct request_queue    *queue;
         struct kobject          kobj;
@@ -31,7 +30,7 @@ void blk_mq_freeze_queue(struct request_queue *q);
  void blk_mq_free_queue(struct request_queue *q);
  int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
  void blk_mq_wake_waiters(struct request_queue *q);
-bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
+bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *);
  void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
  bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
  bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
@@ -77,6 +76,9 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
  /*
   * sysfs helpers
   */
+extern void blk_mq_sysfs_init(struct request_queue *q);
+extern void blk_mq_sysfs_deinit(struct request_queue *q);
+extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q);
  extern int blk_mq_sysfs_register(struct request_queue *q);
  extern void blk_mq_sysfs_unregister(struct request_queue *q);
  extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
@@ -85,13 +87,12 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
   * debugfs helpers
   */
  #ifdef CONFIG_BLK_DEBUG_FS
-int blk_mq_debugfs_register(struct request_queue *q, const char *name);
+int blk_mq_debugfs_register(struct request_queue *q);
  void blk_mq_debugfs_unregister(struct request_queue *q);
-int blk_mq_debugfs_register_hctxs(struct request_queue *q);
-void blk_mq_debugfs_unregister_hctxs(struct request_queue *q);
+int blk_mq_debugfs_register_mq(struct request_queue *q);
+void blk_mq_debugfs_unregister_mq(struct request_queue *q);
  #else
-static inline int blk_mq_debugfs_register(struct request_queue *q,
-                                         const char *name)
+static inline int blk_mq_debugfs_register(struct request_queue *q)
  {
         return 0;
  }
@@ -100,12 +101,12 @@ static inline void blk_mq_debugfs_unregister(struct request_queue *q)
  {
  }
  
-static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q)
+static inline int blk_mq_debugfs_register_mq(struct request_queue *q)
  {
         return 0;
  }
  
-static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
+static inline void blk_mq_debugfs_unregister_mq(struct request_queue *q)
  {
  }
  #endif
@@ -140,6 +141,7 @@ struct blk_mq_alloc_data {
         /* input parameter */
         struct request_queue *q;
         unsigned int flags;
+       unsigned int shallow_depth;
  
         /* input & output parameter */
         struct blk_mq_ctx *ctx;
diff --git a/block/blk-settings.c b/block/blk-settings.c

index 1e7174ffc9d49d0757cf7cb7da1ffe822f7fbbd6..4fa81ed383cab4ec1f543fa33e0536cbac8d7540 100644 (file)
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -103,7 +103,6 @@ void blk_set_default_limits(struct queue_limits *lim)
         lim->discard_granularity = 0;
         lim->discard_alignment = 0;
         lim->discard_misaligned = 0;
-       lim->discard_zeroes_data = 0;
         lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
         lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
         lim->alignment_offset = 0;
@@ -127,7 +126,6 @@ void blk_set_stacking_limits(struct queue_limits *lim)
         blk_set_default_limits(lim);
  
         /* Inherit limits from component devices */
-       lim->discard_zeroes_data = 1;
         lim->max_segments = USHRT_MAX;
         lim->max_discard_segments = 1;
         lim->max_hw_sectors = UINT_MAX;
@@ -609,7 +607,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
         t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
  
         t->cluster &= b->cluster;
-       t->discard_zeroes_data &= b->discard_zeroes_data;
  
         /* Physical block size a multiple of the logical block size? */
         if (t->physical_block_size & (t->logical_block_size - 1)) {
diff --git a/block/blk-stat.c b/block/blk-stat.c

index 9b43efb8933fb9a0f352922029b86903e5b7c8e7..6c2f40940439c5b50a6aad0c0a560e8a2b2bf08b 100644 (file)
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -4,10 +4,27 @@
   * Copyright (C) 2016 Jens Axboe
   */
  #include <linux/kernel.h>
+#include <linux/rculist.h>
  #include <linux/blk-mq.h>
  
  #include "blk-stat.h"
  #include "blk-mq.h"
+#include "blk.h"
+
+#define BLK_RQ_STAT_BATCH      64
+
+struct blk_queue_stats {
+       struct list_head callbacks;
+       spinlock_t lock;
+       bool enable_accounting;
+};
+
+static void blk_stat_init(struct blk_rq_stat *stat)
+{
+       stat->min = -1ULL;
+       stat->max = stat->nr_samples = stat->mean = 0;
+       stat->batch = stat->nr_batch = 0;
+}
  
  static void blk_stat_flush_batch(struct blk_rq_stat *stat)
  {
@@ -30,11 +47,11 @@ static void blk_stat_flush_batch(struct blk_rq_stat *stat)
  
  static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
  {
+       blk_stat_flush_batch(src);
+
         if (!src->nr_samples)
                 return;
  
-       blk_stat_flush_batch(src);
-
         dst->min = min(dst->min, src->min);
         dst->max = max(dst->max, src->max);
  
@@ -48,209 +65,185 @@ static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
         dst->nr_samples += src->nr_samples;
  }
  
-static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
+static void __blk_stat_add(struct blk_rq_stat *stat, u64 value)
  {
-       struct blk_mq_hw_ctx *hctx;
-       struct blk_mq_ctx *ctx;
-       uint64_t latest = 0;
-       int i, j, nr;
-
-       blk_stat_init(&dst[BLK_STAT_READ]);
-       blk_stat_init(&dst[BLK_STAT_WRITE]);
-
-       nr = 0;
-       do {
-               uint64_t newest = 0;
-
-               queue_for_each_hw_ctx(q, hctx, i) {
-                       hctx_for_each_ctx(hctx, ctx, j) {
-                               blk_stat_flush_batch(&ctx->stat[BLK_STAT_READ]);
-                               blk_stat_flush_batch(&ctx->stat[BLK_STAT_WRITE]);
-
-                               if (!ctx->stat[BLK_STAT_READ].nr_samples &&
-                                   !ctx->stat[BLK_STAT_WRITE].nr_samples)
-                                       continue;
-                               if (ctx->stat[BLK_STAT_READ].time > newest)
-                                       newest = ctx->stat[BLK_STAT_READ].time;
-                               if (ctx->stat[BLK_STAT_WRITE].time > newest)
-                                       newest = ctx->stat[BLK_STAT_WRITE].time;
-                       }
-               }
+       stat->min = min(stat->min, value);
+       stat->max = max(stat->max, value);
  
-               /*
-                * No samples
-                */
-               if (!newest)
-                       break;
-
-               if (newest > latest)
-                       latest = newest;
-
-               queue_for_each_hw_ctx(q, hctx, i) {
-                       hctx_for_each_ctx(hctx, ctx, j) {
-                               if (ctx->stat[BLK_STAT_READ].time == newest) {
-                                       blk_stat_sum(&dst[BLK_STAT_READ],
-                                                    &ctx->stat[BLK_STAT_READ]);
-                                       nr++;
-                               }
-                               if (ctx->stat[BLK_STAT_WRITE].time == newest) {
-                                       blk_stat_sum(&dst[BLK_STAT_WRITE],
-                                                    &ctx->stat[BLK_STAT_WRITE]);
-                                       nr++;
-                               }
-                       }
-               }
-               /*
-                * If we race on finding an entry, just loop back again.
-                * Should be very rare.
-                */
-       } while (!nr);
+       if (stat->batch + value < stat->batch ||
+           stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
+               blk_stat_flush_batch(stat);
  
-       dst[BLK_STAT_READ].time = dst[BLK_STAT_WRITE].time = latest;
+       stat->batch += value;
+       stat->nr_batch++;
  }
  
-void blk_queue_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
+void blk_stat_add(struct request *rq)
  {
-       if (q->mq_ops)
-               blk_mq_stat_get(q, dst);
-       else {
-               blk_stat_flush_batch(&q->rq_stats[BLK_STAT_READ]);
-               blk_stat_flush_batch(&q->rq_stats[BLK_STAT_WRITE]);
-               memcpy(&dst[BLK_STAT_READ], &q->rq_stats[BLK_STAT_READ],
-                               sizeof(struct blk_rq_stat));
-               memcpy(&dst[BLK_STAT_WRITE], &q->rq_stats[BLK_STAT_WRITE],
-                               sizeof(struct blk_rq_stat));
+       struct request_queue *q = rq->q;
+       struct blk_stat_callback *cb;
+       struct blk_rq_stat *stat;
+       int bucket;
+       s64 now, value;
+
+       now = __blk_stat_time(ktime_to_ns(ktime_get()));
+       if (now < blk_stat_time(&rq->issue_stat))
+               return;
+
+       value = now - blk_stat_time(&rq->issue_stat);
+
+       blk_throtl_stat_add(rq, value);
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
+               if (blk_stat_is_active(cb)) {
+                       bucket = cb->bucket_fn(rq);
+                       if (bucket < 0)
+                               continue;
+                       stat = &this_cpu_ptr(cb->cpu_stat)[bucket];
+                       __blk_stat_add(stat, value);
+               }
         }
+       rcu_read_unlock();
  }
  
-void blk_hctx_stat_get(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst)
+static void blk_stat_timer_fn(unsigned long data)
  {
-       struct blk_mq_ctx *ctx;
-       unsigned int i, nr;
+       struct blk_stat_callback *cb = (void *)data;
+       unsigned int bucket;
+       int cpu;
  
-       nr = 0;
-       do {
-               uint64_t newest = 0;
+       for (bucket = 0; bucket < cb->buckets; bucket++)
+               blk_stat_init(&cb->stat[bucket]);
  
-               hctx_for_each_ctx(hctx, ctx, i) {
-                       blk_stat_flush_batch(&ctx->stat[BLK_STAT_READ]);
-                       blk_stat_flush_batch(&ctx->stat[BLK_STAT_WRITE]);
+       for_each_online_cpu(cpu) {
+               struct blk_rq_stat *cpu_stat;
  
-                       if (!ctx->stat[BLK_STAT_READ].nr_samples &&
-                           !ctx->stat[BLK_STAT_WRITE].nr_samples)
-                               continue;
-
-                       if (ctx->stat[BLK_STAT_READ].time > newest)
-                               newest = ctx->stat[BLK_STAT_READ].time;
-                       if (ctx->stat[BLK_STAT_WRITE].time > newest)
-                               newest = ctx->stat[BLK_STAT_WRITE].time;
+               cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu);
+               for (bucket = 0; bucket < cb->buckets; bucket++) {
+                       blk_stat_sum(&cb->stat[bucket], &cpu_stat[bucket]);
+                       blk_stat_init(&cpu_stat[bucket]);
                 }
+       }
  
-               if (!newest)
-                       break;
-
-               hctx_for_each_ctx(hctx, ctx, i) {
-                       if (ctx->stat[BLK_STAT_READ].time == newest) {
-                               blk_stat_sum(&dst[BLK_STAT_READ],
-                                               &ctx->stat[BLK_STAT_READ]);
-                               nr++;
-                       }
-                       if (ctx->stat[BLK_STAT_WRITE].time == newest) {
-                               blk_stat_sum(&dst[BLK_STAT_WRITE],
-                                               &ctx->stat[BLK_STAT_WRITE]);
-                               nr++;
-                       }
-               }
-               /*
-                * If we race on finding an entry, just loop back again.
-                * Should be very rare, as the window is only updated
-                * occasionally
-                */
-       } while (!nr);
+       cb->timer_fn(cb);
  }
  
-static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now)
+struct blk_stat_callback *
+blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *),
+                       int (*bucket_fn)(const struct request *),
+                       unsigned int buckets, void *data)
  {
-       stat->min = -1ULL;
-       stat->max = stat->nr_samples = stat->mean = 0;
-       stat->batch = stat->nr_batch = 0;
-       stat->time = time_now & BLK_STAT_NSEC_MASK;
-}
+       struct blk_stat_callback *cb;
  
-void blk_stat_init(struct blk_rq_stat *stat)
-{
-       __blk_stat_init(stat, ktime_to_ns(ktime_get()));
-}
+       cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+       if (!cb)
+               return NULL;
  
-static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now)
-{
-       return (now & BLK_STAT_NSEC_MASK) == (stat->time & BLK_STAT_NSEC_MASK);
+       cb->stat = kmalloc_array(buckets, sizeof(struct blk_rq_stat),
+                                GFP_KERNEL);
+       if (!cb->stat) {
+               kfree(cb);
+               return NULL;
+       }
+       cb->cpu_stat = __alloc_percpu(buckets * sizeof(struct blk_rq_stat),
+                                     __alignof__(struct blk_rq_stat));
+       if (!cb->cpu_stat) {
+               kfree(cb->stat);
+               kfree(cb);
+               return NULL;
+       }
+
+       cb->timer_fn = timer_fn;
+       cb->bucket_fn = bucket_fn;
+       cb->data = data;
+       cb->buckets = buckets;
+       setup_timer(&cb->timer, blk_stat_timer_fn, (unsigned long)cb);
+
+       return cb;
  }
+EXPORT_SYMBOL_GPL(blk_stat_alloc_callback);
  
-bool blk_stat_is_current(struct blk_rq_stat *stat)
+void blk_stat_add_callback(struct request_queue *q,
+                          struct blk_stat_callback *cb)
  {
-       return __blk_stat_is_current(stat, ktime_to_ns(ktime_get()));
+       unsigned int bucket;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               struct blk_rq_stat *cpu_stat;
+
+               cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu);
+               for (bucket = 0; bucket < cb->buckets; bucket++)
+                       blk_stat_init(&cpu_stat[bucket]);
+       }
+
+       spin_lock(&q->stats->lock);
+       list_add_tail_rcu(&cb->list, &q->stats->callbacks);
+       set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+       spin_unlock(&q->stats->lock);
  }
+EXPORT_SYMBOL_GPL(blk_stat_add_callback);
  
-void blk_stat_add(struct blk_rq_stat *stat, struct request *rq)
+void blk_stat_remove_callback(struct request_queue *q,
+                             struct blk_stat_callback *cb)
  {
-       s64 now, value;
+       spin_lock(&q->stats->lock);
+       list_del_rcu(&cb->list);
+       if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting)
+               clear_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+       spin_unlock(&q->stats->lock);
  
-       now = __blk_stat_time(ktime_to_ns(ktime_get()));
-       if (now < blk_stat_time(&rq->issue_stat))
-               return;
-
-       if (!__blk_stat_is_current(stat, now))
-               __blk_stat_init(stat, now);
+       del_timer_sync(&cb->timer);
+}
+EXPORT_SYMBOL_GPL(blk_stat_remove_callback);
  
-       value = now - blk_stat_time(&rq->issue_stat);
-       if (value > stat->max)
-               stat->max = value;
-       if (value < stat->min)
-               stat->min = value;
+static void blk_stat_free_callback_rcu(struct rcu_head *head)
+{
+       struct blk_stat_callback *cb;
  
-       if (stat->batch + value < stat->batch ||
-           stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
-               blk_stat_flush_batch(stat);
+       cb = container_of(head, struct blk_stat_callback, rcu);
+       free_percpu(cb->cpu_stat);
+       kfree(cb->stat);
+       kfree(cb);
+}
  
-       stat->batch += value;
-       stat->nr_batch++;
+void blk_stat_free_callback(struct blk_stat_callback *cb)
+{
+       if (cb)
+               call_rcu(&cb->rcu, blk_stat_free_callback_rcu);
  }
+EXPORT_SYMBOL_GPL(blk_stat_free_callback);
  
-void blk_stat_clear(struct request_queue *q)
+void blk_stat_enable_accounting(struct request_queue *q)
  {
-       if (q->mq_ops) {
-               struct blk_mq_hw_ctx *hctx;
-               struct blk_mq_ctx *ctx;
-               int i, j;
-
-               queue_for_each_hw_ctx(q, hctx, i) {
-                       hctx_for_each_ctx(hctx, ctx, j) {
-                               blk_stat_init(&ctx->stat[BLK_STAT_READ]);
-                               blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
-                       }
-               }
-       } else {
-               blk_stat_init(&q->rq_stats[BLK_STAT_READ]);
-               blk_stat_init(&q->rq_stats[BLK_STAT_WRITE]);
-       }
+       spin_lock(&q->stats->lock);
+       q->stats->enable_accounting = true;
+       set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+       spin_unlock(&q->stats->lock);
  }
  
-void blk_stat_set_issue_time(struct blk_issue_stat *stat)
+struct blk_queue_stats *blk_alloc_queue_stats(void)
  {
-       stat->time = (stat->time & BLK_STAT_MASK) |
-                       (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK);
+       struct blk_queue_stats *stats;
+
+       stats = kmalloc(sizeof(*stats), GFP_KERNEL);
+       if (!stats)
+               return NULL;
+
+       INIT_LIST_HEAD(&stats->callbacks);
+       spin_lock_init(&stats->lock);
+       stats->enable_accounting = false;
+
+       return stats;
  }
  
-/*
- * Enable stat tracking, return whether it was enabled
- */
-bool blk_stat_enable(struct request_queue *q)
+void blk_free_queue_stats(struct blk_queue_stats *stats)
  {
-       if (!test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
-               set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
-               return false;
-       }
+       if (!stats)
+               return;
+
+       WARN_ON(!list_empty(&stats->callbacks));
  
-       return true;
+       kfree(stats);
  }
diff --git a/block/blk-stat.h b/block/blk-stat.h

index a2050a0a5314bba690cfe6b1f6526e285c892f0d..2fb20d1a341a8db97afb11ce366e740cd29d3bcc 100644 (file)
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -1,33 +1,85 @@
  #ifndef BLK_STAT_H
  #define BLK_STAT_H
  
-/*
- * ~0.13s window as a power-of-2 (2^27 nsecs)
- */
-#define BLK_STAT_NSEC          134217728ULL
-#define BLK_STAT_NSEC_MASK     ~(BLK_STAT_NSEC - 1)
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/ktime.h>
+#include <linux/rcupdate.h>
+#include <linux/timer.h>
  
  /*
- * Upper 3 bits can be used elsewhere
+ * from upper:
+ * 3 bits: reserved for other usage
+ * 12 bits: size
+ * 49 bits: time
   */
  #define BLK_STAT_RES_BITS      3
-#define BLK_STAT_SHIFT         (64 - BLK_STAT_RES_BITS)
-#define BLK_STAT_TIME_MASK     ((1ULL << BLK_STAT_SHIFT) - 1)
-#define BLK_STAT_MASK          ~BLK_STAT_TIME_MASK
+#define BLK_STAT_SIZE_BITS     12
+#define BLK_STAT_RES_SHIFT     (64 - BLK_STAT_RES_BITS)
+#define BLK_STAT_SIZE_SHIFT    (BLK_STAT_RES_SHIFT - BLK_STAT_SIZE_BITS)
+#define BLK_STAT_TIME_MASK     ((1ULL << BLK_STAT_SIZE_SHIFT) - 1)
+#define BLK_STAT_SIZE_MASK     \
+       (((1ULL << BLK_STAT_SIZE_BITS) - 1) << BLK_STAT_SIZE_SHIFT)
+#define BLK_STAT_RES_MASK      (~((1ULL << BLK_STAT_RES_SHIFT) - 1))
+
+/**
+ * struct blk_stat_callback - Block statistics callback.
+ *
+ * A &struct blk_stat_callback is associated with a &struct request_queue. While
+ * @timer is active, that queue's request completion latencies are sorted into
+ * buckets by @bucket_fn and added to a per-cpu buffer, @cpu_stat. When the
+ * timer fires, @cpu_stat is flushed to @stat and @timer_fn is invoked.
+ */
+struct blk_stat_callback {
+       /*
+        * @list: RCU list of callbacks for a &struct request_queue.
+        */
+       struct list_head list;
+
+       /**
+        * @timer: Timer for the next callback invocation.
+        */
+       struct timer_list timer;
+
+       /**
+        * @cpu_stat: Per-cpu statistics buckets.
+        */
+       struct blk_rq_stat __percpu *cpu_stat;
+
+       /**
+        * @bucket_fn: Given a request, returns which statistics bucket it
+        * should be accounted under. Return -1 for no bucket for this
+        * request.
+        */
+       int (*bucket_fn)(const struct request *);
+
+       /**
+        * @buckets: Number of statistics buckets.
+        */
+       unsigned int buckets;
+
+       /**
+        * @stat: Array of statistics buckets.
+        */
+       struct blk_rq_stat *stat;
+
+       /**
+        * @fn: Callback function.
+        */
+       void (*timer_fn)(struct blk_stat_callback *);
+
+       /**
+        * @data: Private pointer for the user.
+        */
+       void *data;
  
-enum {
-       BLK_STAT_READ   = 0,
-       BLK_STAT_WRITE,
+       struct rcu_head rcu;
  };
  
-void blk_stat_add(struct blk_rq_stat *, struct request *);
-void blk_hctx_stat_get(struct blk_mq_hw_ctx *, struct blk_rq_stat *);
-void blk_queue_stat_get(struct request_queue *, struct blk_rq_stat *);
-void blk_stat_clear(struct request_queue *);
-void blk_stat_init(struct blk_rq_stat *);
-bool blk_stat_is_current(struct blk_rq_stat *);
-void blk_stat_set_issue_time(struct blk_issue_stat *);
-bool blk_stat_enable(struct request_queue *);
+struct blk_queue_stats *blk_alloc_queue_stats(void);
+void blk_free_queue_stats(struct blk_queue_stats *);
+
+void blk_stat_add(struct request *);
  
  static inline u64 __blk_stat_time(u64 time)
  {
@@ -36,7 +88,117 @@ static inline u64 __blk_stat_time(u64 time)
  
  static inline u64 blk_stat_time(struct blk_issue_stat *stat)
  {
-       return __blk_stat_time(stat->time);
+       return __blk_stat_time(stat->stat);
+}
+
+static inline sector_t blk_capped_size(sector_t size)
+{
+       return size & ((1ULL << BLK_STAT_SIZE_BITS) - 1);
+}
+
+static inline sector_t blk_stat_size(struct blk_issue_stat *stat)
+{
+       return (stat->stat & BLK_STAT_SIZE_MASK) >> BLK_STAT_SIZE_SHIFT;
+}
+
+static inline void blk_stat_set_issue(struct blk_issue_stat *stat,
+       sector_t size)
+{
+       stat->stat = (stat->stat & BLK_STAT_RES_MASK) |
+               (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK) |
+               (((u64)blk_capped_size(size)) << BLK_STAT_SIZE_SHIFT);
+}
+
+/* record time/size info in request but not add a callback */
+void blk_stat_enable_accounting(struct request_queue *q);
+
+/**
+ * blk_stat_alloc_callback() - Allocate a block statistics callback.
+ * @timer_fn: Timer callback function.
+ * @bucket_fn: Bucket callback function.
+ * @buckets: Number of statistics buckets.
+ * @data: Value for the @data field of the &struct blk_stat_callback.
+ *
+ * See &struct blk_stat_callback for details on the callback functions.
+ *
+ * Return: &struct blk_stat_callback on success or NULL on ENOMEM.
+ */
+struct blk_stat_callback *
+blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *),
+                       int (*bucket_fn)(const struct request *),
+                       unsigned int buckets, void *data);
+
+/**
+ * blk_stat_add_callback() - Add a block statistics callback to be run on a
+ * request queue.
+ * @q: The request queue.
+ * @cb: The callback.
+ *
+ * Note that a single &struct blk_stat_callback can only be added to a single
+ * &struct request_queue.
+ */
+void blk_stat_add_callback(struct request_queue *q,
+                          struct blk_stat_callback *cb);
+
+/**
+ * blk_stat_remove_callback() - Remove a block statistics callback from a
+ * request queue.
+ * @q: The request queue.
+ * @cb: The callback.
+ *
+ * When this returns, the callback is not running on any CPUs and will not be
+ * called again unless readded.
+ */
+void blk_stat_remove_callback(struct request_queue *q,
+                             struct blk_stat_callback *cb);
+
+/**
+ * blk_stat_free_callback() - Free a block statistics callback.
+ * @cb: The callback.
+ *
+ * @cb may be NULL, in which case this does nothing. If it is not NULL, @cb must
+ * not be associated with a request queue. I.e., if it was previously added with
+ * blk_stat_add_callback(), it must also have been removed since then with
+ * blk_stat_remove_callback().
+ */
+void blk_stat_free_callback(struct blk_stat_callback *cb);
+
+/**
+ * blk_stat_is_active() - Check if a block statistics callback is currently
+ * gathering statistics.
+ * @cb: The callback.
+ */
+static inline bool blk_stat_is_active(struct blk_stat_callback *cb)
+{
+       return timer_pending(&cb->timer);
+}
+
+/**
+ * blk_stat_activate_nsecs() - Gather block statistics during a time window in
+ * nanoseconds.
+ * @cb: The callback.
+ * @nsecs: Number of nanoseconds to gather statistics for.
+ *
+ * The timer callback will be called when the window expires.
+ */
+static inline void blk_stat_activate_nsecs(struct blk_stat_callback *cb,
+                                          u64 nsecs)
+{
+       mod_timer(&cb->timer, jiffies + nsecs_to_jiffies(nsecs));
+}
+
+/**
+ * blk_stat_activate_msecs() - Gather block statistics during a time window in
+ * milliseconds.
+ * @cb: The callback.
+ * @msecs: Number of milliseconds to gather statistics for.
+ *
+ * The timer callback will be called when the window expires.
+ */
+static inline void blk_stat_activate_msecs(struct blk_stat_callback *cb,
+                                          unsigned int msecs)
+{
+       mod_timer(&cb->timer, jiffies + msecs_to_jiffies(msecs));
  }
  
  #endif
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c

index c44b321335f3ebbcc662f0f70b7605f5019c60b7..3f37813ccbafdf418f6875e69dbb8f5c4022e96e 100644 (file)
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -208,7 +208,7 @@ static ssize_t queue_discard_max_store(struct request_queue *q,
  
  static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
  {
-       return queue_var_show(queue_discard_zeroes_data(q), page);
+       return queue_var_show(0, page);
  }
  
  static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
@@ -503,26 +503,6 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
         return queue_var_show(blk_queue_dax(q), page);
  }
  
-static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
-{
-       return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
-                       pre, (long long) stat->nr_samples,
-                       (long long) stat->mean, (long long) stat->min,
-                       (long long) stat->max);
-}
-
-static ssize_t queue_stats_show(struct request_queue *q, char *page)
-{
-       struct blk_rq_stat stat[2];
-       ssize_t ret;
-
-       blk_queue_stat_get(q, stat);
-
-       ret = print_stat(page, &stat[BLK_STAT_READ], "read :");
-       ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:");
-       return ret;
-}
-
  static struct queue_sysfs_entry queue_requests_entry = {
         .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
         .show = queue_requests_show,
@@ -691,17 +671,20 @@ static struct queue_sysfs_entry queue_dax_entry = {
         .show = queue_dax_show,
  };
  
-static struct queue_sysfs_entry queue_stats_entry = {
-       .attr = {.name = "stats", .mode = S_IRUGO },
-       .show = queue_stats_show,
-};
-
  static struct queue_sysfs_entry queue_wb_lat_entry = {
         .attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
         .show = queue_wb_lat_show,
         .store = queue_wb_lat_store,
  };
  
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+static struct queue_sysfs_entry throtl_sample_time_entry = {
+       .attr = {.name = "throttle_sample_time", .mode = S_IRUGO | S_IWUSR },
+       .show = blk_throtl_sample_time_show,
+       .store = blk_throtl_sample_time_store,
+};
+#endif
+
  static struct attribute *default_attrs[] = {
         &queue_requests_entry.attr,
         &queue_ra_entry.attr,
@@ -733,9 +716,11 @@ static struct attribute *default_attrs[] = {
         &queue_poll_entry.attr,
         &queue_wc_entry.attr,
         &queue_dax_entry.attr,
-       &queue_stats_entry.attr,
         &queue_wb_lat_entry.attr,
         &queue_poll_delay_entry.attr,
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+       &throtl_sample_time_entry.attr,
+#endif
         NULL,
  };
  
@@ -810,15 +795,19 @@ static void blk_release_queue(struct kobject *kobj)
         struct request_queue *q =
                 container_of(kobj, struct request_queue, kobj);
  
-       wbt_exit(q);
+       if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
+               blk_stat_remove_callback(q, q->poll_cb);
+       blk_stat_free_callback(q->poll_cb);
         bdi_put(q->backing_dev_info);
         blkcg_exit_queue(q);
  
         if (q->elevator) {
                 ioc_clear_queue(q);
-               elevator_exit(q->elevator);
+               elevator_exit(q, q->elevator);
         }
  
+       blk_free_queue_stats(q->stats);
+
         blk_exit_rl(&q->root_rl);
  
         if (q->queue_tags)
@@ -855,23 +844,6 @@ struct kobj_type blk_queue_ktype = {
         .release        = blk_release_queue,
  };
  
-static void blk_wb_init(struct request_queue *q)
-{
-#ifndef CONFIG_BLK_WBT_MQ
-       if (q->mq_ops)
-               return;
-#endif
-#ifndef CONFIG_BLK_WBT_SQ
-       if (q->request_fn)
-               return;
-#endif
-
-       /*
-        * If this fails, we don't get throttling
-        */
-       wbt_init(q);
-}
-
  int blk_register_queue(struct gendisk *disk)
  {
         int ret;
@@ -881,6 +853,11 @@ int blk_register_queue(struct gendisk *disk)
         if (WARN_ON(!q))
                 return -ENXIO;
  
+       WARN_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags),
+                 "%s is registering an already registered queue\n",
+                 kobject_name(&dev->kobj));
+       queue_flag_set_unlocked(QUEUE_FLAG_REGISTERED, q);
+
         /*
          * SCSI probing may synchronously create and destroy a lot of
          * request_queues for non-existent devices.  Shutting down a fully
@@ -900,9 +877,6 @@ int blk_register_queue(struct gendisk *disk)
         if (ret)
                 return ret;
  
-       if (q->mq_ops)
-               blk_mq_register_dev(dev, q);
-
         /* Prevent changes through sysfs until registration is completed. */
         mutex_lock(&q->sysfs_lock);
  
@@ -912,9 +886,14 @@ int blk_register_queue(struct gendisk *disk)
                 goto unlock;
         }
  
+       if (q->mq_ops)
+               __blk_mq_register_dev(dev, q);
+
         kobject_uevent(&q->kobj, KOBJ_ADD);
  
-       blk_wb_init(q);
+       wbt_enable_default(q);
+
+       blk_throtl_register_queue(q);
  
         if (q->request_fn || (q->mq_ops && q->elevator)) {
                 ret = elv_register_queue(q);
@@ -939,6 +918,11 @@ void blk_unregister_queue(struct gendisk *disk)
         if (WARN_ON(!q))
                 return;
  
+       queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q);
+
+       wbt_exit(q);
+
+
         if (q->mq_ops)
                 blk_mq_unregister_dev(disk_to_dev(disk), q);
  
diff --git a/block/blk-throttle.c b/block/blk-throttle.c

index 8fab716e40596199d680dba33230d8c01d37b45c..b78db2e5fdff1e158ea52c179313ff3eba282015 100644 (file)
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -18,8 +18,17 @@ static int throtl_grp_quantum = 8;
  /* Total max dispatch from all groups in one round */
  static int throtl_quantum = 32;
  
-/* Throttling is performed over 100ms slice and after that slice is renewed */
-static unsigned long throtl_slice = HZ/10;     /* 100 ms */
+/* Throttling is performed over a slice and after that slice is renewed */
+#define DFL_THROTL_SLICE_HD (HZ / 10)
+#define DFL_THROTL_SLICE_SSD (HZ / 50)
+#define MAX_THROTL_SLICE (HZ)
+#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
+#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
+#define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
+/* default latency target is 0, eg, guarantee IO latency by default */
+#define DFL_LATENCY_TARGET (0)
+
+#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
  
  static struct blkcg_policy blkcg_policy_throtl;
  
@@ -83,6 +92,12 @@ enum tg_state_flags {
  
  #define rb_entry_tg(node)      rb_entry((node), struct throtl_grp, rb_node)
  
+enum {
+       LIMIT_LOW,
+       LIMIT_MAX,
+       LIMIT_CNT,
+};
+
  struct throtl_grp {
         /* must be the first member */
         struct blkg_policy_data pd;
@@ -119,20 +134,54 @@ struct throtl_grp {
         /* are there any throtl rules between this group and td? */
         bool has_rules[2];
  
-       /* bytes per second rate limits */
-       uint64_t bps[2];
+       /* internally used bytes per second rate limits */
+       uint64_t bps[2][LIMIT_CNT];
+       /* user configured bps limits */
+       uint64_t bps_conf[2][LIMIT_CNT];
  
-       /* IOPS limits */
-       unsigned int iops[2];
+       /* internally used IOPS limits */
+       unsigned int iops[2][LIMIT_CNT];
+       /* user configured IOPS limits */
+       unsigned int iops_conf[2][LIMIT_CNT];
  
         /* Number of bytes disptached in current slice */
         uint64_t bytes_disp[2];
         /* Number of bio's dispatched in current slice */
         unsigned int io_disp[2];
  
+       unsigned long last_low_overflow_time[2];
+
+       uint64_t last_bytes_disp[2];
+       unsigned int last_io_disp[2];
+
+       unsigned long last_check_time;
+
+       unsigned long latency_target; /* us */
         /* When did we start a new slice */
         unsigned long slice_start[2];
         unsigned long slice_end[2];
+
+       unsigned long last_finish_time; /* ns / 1024 */
+       unsigned long checked_last_finish_time; /* ns / 1024 */
+       unsigned long avg_idletime; /* ns / 1024 */
+       unsigned long idletime_threshold; /* us */
+
+       unsigned int bio_cnt; /* total bios */
+       unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
+       unsigned long bio_cnt_reset_time;
+};
+
+/* We measure latency for request size from <= 4k to >= 1M */
+#define LATENCY_BUCKET_SIZE 9
+
+struct latency_bucket {
+       unsigned long total_latency; /* ns / 1024 */
+       int samples;
+};
+
+struct avg_latency_bucket {
+       unsigned long latency; /* ns / 1024 */
+       bool valid;
  };
  
  struct throtl_data
@@ -145,8 +194,26 @@ struct throtl_data
         /* Total Number of queued bios on READ and WRITE lists */
         unsigned int nr_queued[2];
  
+       unsigned int throtl_slice;
+
         /* Work for dispatching throttled bios */
         struct work_struct dispatch_work;
+       unsigned int limit_index;
+       bool limit_valid[LIMIT_CNT];
+
+       unsigned long dft_idletime_threshold; /* us */
+
+       unsigned long low_upgrade_time;
+       unsigned long low_downgrade_time;
+
+       unsigned int scale;
+
+       struct latency_bucket tmp_buckets[LATENCY_BUCKET_SIZE];
+       struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE];
+       struct latency_bucket __percpu *latency_buckets;
+       unsigned long last_calculate_time;
+
+       bool track_bio_latency;
  };
  
  static void throtl_pending_timer_fn(unsigned long arg);
@@ -198,6 +265,76 @@ static struct throtl_data *sq_to_td(struct throtl_service_queue *sq)
                 return container_of(sq, struct throtl_data, service_queue);
  }
  
+/*
+ * cgroup's limit in LIMIT_MAX is scaled if low limit is set. This scale is to
+ * make the IO dispatch more smooth.
+ * Scale up: linearly scale up according to lapsed time since upgrade. For
+ *           every throtl_slice, the limit scales up 1/2 .low limit till the
+ *           limit hits .max limit
+ * Scale down: exponentially scale down if a cgroup doesn't hit its .low limit
+ */
+static uint64_t throtl_adjusted_limit(uint64_t low, struct throtl_data *td)
+{
+       /* arbitrary value to avoid too big scale */
+       if (td->scale < 4096 && time_after_eq(jiffies,
+           td->low_upgrade_time + td->scale * td->throtl_slice))
+               td->scale = (jiffies - td->low_upgrade_time) / td->throtl_slice;
+
+       return low + (low >> 1) * td->scale;
+}
+
+static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
+{
+       struct blkcg_gq *blkg = tg_to_blkg(tg);
+       struct throtl_data *td;
+       uint64_t ret;
+
+       if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
+               return U64_MAX;
+
+       td = tg->td;
+       ret = tg->bps[rw][td->limit_index];
+       if (ret == 0 && td->limit_index == LIMIT_LOW)
+               return tg->bps[rw][LIMIT_MAX];
+
+       if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
+           tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
+               uint64_t adjusted;
+
+               adjusted = throtl_adjusted_limit(tg->bps[rw][LIMIT_LOW], td);
+               ret = min(tg->bps[rw][LIMIT_MAX], adjusted);
+       }
+       return ret;
+}
+
+static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
+{
+       struct blkcg_gq *blkg = tg_to_blkg(tg);
+       struct throtl_data *td;
+       unsigned int ret;
+
+       if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
+               return UINT_MAX;
+       td = tg->td;
+       ret = tg->iops[rw][td->limit_index];
+       if (ret == 0 && tg->td->limit_index == LIMIT_LOW)
+               return tg->iops[rw][LIMIT_MAX];
+
+       if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
+           tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
+               uint64_t adjusted;
+
+               adjusted = throtl_adjusted_limit(tg->iops[rw][LIMIT_LOW], td);
+               if (adjusted > UINT_MAX)
+                       adjusted = UINT_MAX;
+               ret = min_t(unsigned int, tg->iops[rw][LIMIT_MAX], adjusted);
+       }
+       return ret;
+}
+
+#define request_bucket_index(sectors) \
+       clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
+
  /**
   * throtl_log - log debug message via blktrace
   * @sq: the service_queue being reported
@@ -334,10 +471,17 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
         }
  
         RB_CLEAR_NODE(&tg->rb_node);
-       tg->bps[READ] = -1;
-       tg->bps[WRITE] = -1;
-       tg->iops[READ] = -1;
-       tg->iops[WRITE] = -1;
+       tg->bps[READ][LIMIT_MAX] = U64_MAX;
+       tg->bps[WRITE][LIMIT_MAX] = U64_MAX;
+       tg->iops[READ][LIMIT_MAX] = UINT_MAX;
+       tg->iops[WRITE][LIMIT_MAX] = UINT_MAX;
+       tg->bps_conf[READ][LIMIT_MAX] = U64_MAX;
+       tg->bps_conf[WRITE][LIMIT_MAX] = U64_MAX;
+       tg->iops_conf[READ][LIMIT_MAX] = UINT_MAX;
+       tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX;
+       /* LIMIT_LOW will have default value 0 */
+
+       tg->latency_target = DFL_LATENCY_TARGET;
  
         return &tg->pd;
  }
@@ -366,6 +510,8 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
         if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
                 sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
         tg->td = td;
+
+       tg->idletime_threshold = td->dft_idletime_threshold;
  }
  
  /*
@@ -376,20 +522,59 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
  static void tg_update_has_rules(struct throtl_grp *tg)
  {
         struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
+       struct throtl_data *td = tg->td;
         int rw;
  
         for (rw = READ; rw <= WRITE; rw++)
                 tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
-                                   (tg->bps[rw] != -1 || tg->iops[rw] != -1);
+                       (td->limit_valid[td->limit_index] &&
+                        (tg_bps_limit(tg, rw) != U64_MAX ||
+                         tg_iops_limit(tg, rw) != UINT_MAX));
  }
  
  static void throtl_pd_online(struct blkg_policy_data *pd)
  {
+       struct throtl_grp *tg = pd_to_tg(pd);
         /*
          * We don't want new groups to escape the limits of its ancestors.
          * Update has_rules[] after a new group is brought online.
          */
-       tg_update_has_rules(pd_to_tg(pd));
+       tg_update_has_rules(tg);
+}
+
+static void blk_throtl_update_limit_valid(struct throtl_data *td)
+{
+       struct cgroup_subsys_state *pos_css;
+       struct blkcg_gq *blkg;
+       bool low_valid = false;
+
+       rcu_read_lock();
+       blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+               struct throtl_grp *tg = blkg_to_tg(blkg);
+
+               if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] ||
+                   tg->iops[READ][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW])
+                       low_valid = true;
+       }
+       rcu_read_unlock();
+
+       td->limit_valid[LIMIT_LOW] = low_valid;
+}
+
+static void throtl_upgrade_state(struct throtl_data *td);
+static void throtl_pd_offline(struct blkg_policy_data *pd)
+{
+       struct throtl_grp *tg = pd_to_tg(pd);
+
+       tg->bps[READ][LIMIT_LOW] = 0;
+       tg->bps[WRITE][LIMIT_LOW] = 0;
+       tg->iops[READ][LIMIT_LOW] = 0;
+       tg->iops[WRITE][LIMIT_LOW] = 0;
+
+       blk_throtl_update_limit_valid(tg->td);
+
+       if (!tg->td->limit_valid[tg->td->limit_index])
+               throtl_upgrade_state(tg->td);
  }
  
  static void throtl_pd_free(struct blkg_policy_data *pd)
@@ -499,6 +684,17 @@ static void throtl_dequeue_tg(struct throtl_grp *tg)
  static void throtl_schedule_pending_timer(struct throtl_service_queue *sq,
                                           unsigned long expires)
  {
+       unsigned long max_expire = jiffies + 8 * sq_to_tg(sq)->td->throtl_slice;
+
+       /*
+        * Since we are adjusting the throttle limit dynamically, the sleep
+        * time calculated according to previous limit might be invalid. It's
+        * possible the cgroup sleep time is very long and no other cgroups
+        * have IO running so notify the limit changes. Make sure the cgroup
+        * doesn't sleep too long to avoid the missed notification.
+        */
+       if (time_after(expires, max_expire))
+               expires = max_expire;
         mod_timer(&sq->pending_timer, expires);
         throtl_log(sq, "schedule timer. delay=%lu jiffies=%lu",
                    expires - jiffies, jiffies);
@@ -556,7 +752,7 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
         if (time_after_eq(start, tg->slice_start[rw]))
                 tg->slice_start[rw] = start;
  
-       tg->slice_end[rw] = jiffies + throtl_slice;
+       tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
         throtl_log(&tg->service_queue,
                    "[%c] new slice with credit start=%lu end=%lu jiffies=%lu",
                    rw == READ ? 'R' : 'W', tg->slice_start[rw],
@@ -568,7 +764,7 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
         tg->bytes_disp[rw] = 0;
         tg->io_disp[rw] = 0;
         tg->slice_start[rw] = jiffies;
-       tg->slice_end[rw] = jiffies + throtl_slice;
+       tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
         throtl_log(&tg->service_queue,
                    "[%c] new slice start=%lu end=%lu jiffies=%lu",
                    rw == READ ? 'R' : 'W', tg->slice_start[rw],
@@ -578,13 +774,13 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
  static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
                                         unsigned long jiffy_end)
  {
-       tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
+       tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice);
  }
  
  static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
                                        unsigned long jiffy_end)
  {
-       tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
+       tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice);
         throtl_log(&tg->service_queue,
                    "[%c] extend slice start=%lu end=%lu jiffies=%lu",
                    rw == READ ? 'R' : 'W', tg->slice_start[rw],
@@ -624,19 +820,20 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
          * is bad because it does not allow new slice to start.
          */
  
-       throtl_set_slice_end(tg, rw, jiffies + throtl_slice);
+       throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
  
         time_elapsed = jiffies - tg->slice_start[rw];
  
-       nr_slices = time_elapsed / throtl_slice;
+       nr_slices = time_elapsed / tg->td->throtl_slice;
  
         if (!nr_slices)
                 return;
-       tmp = tg->bps[rw] * throtl_slice * nr_slices;
+       tmp = tg_bps_limit(tg, rw) * tg->td->throtl_slice * nr_slices;
         do_div(tmp, HZ);
         bytes_trim = tmp;
  
-       io_trim = (tg->iops[rw] * throtl_slice * nr_slices)/HZ;
+       io_trim = (tg_iops_limit(tg, rw) * tg->td->throtl_slice * nr_slices) /
+               HZ;
  
         if (!bytes_trim && !io_trim)
                 return;
@@ -651,7 +848,7 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
         else
                 tg->io_disp[rw] = 0;
  
-       tg->slice_start[rw] += nr_slices * throtl_slice;
+       tg->slice_start[rw] += nr_slices * tg->td->throtl_slice;
  
         throtl_log(&tg->service_queue,
                    "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
@@ -671,9 +868,9 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
  
         /* Slice has just started. Consider one slice interval */
         if (!jiffy_elapsed)
-               jiffy_elapsed_rnd = throtl_slice;
+               jiffy_elapsed_rnd = tg->td->throtl_slice;
  
-       jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice);
+       jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
  
         /*
          * jiffy_elapsed_rnd should not be a big value as minimum iops can be
@@ -682,7 +879,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
          * have been trimmed.
          */
  
-       tmp = (u64)tg->iops[rw] * jiffy_elapsed_rnd;
+       tmp = (u64)tg_iops_limit(tg, rw) * jiffy_elapsed_rnd;
         do_div(tmp, HZ);
  
         if (tmp > UINT_MAX)
@@ -697,7 +894,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
         }
  
         /* Calc approx time to dispatch */
-       jiffy_wait = ((tg->io_disp[rw] + 1) * HZ)/tg->iops[rw] + 1;
+       jiffy_wait = ((tg->io_disp[rw] + 1) * HZ) / tg_iops_limit(tg, rw) + 1;
  
         if (jiffy_wait > jiffy_elapsed)
                 jiffy_wait = jiffy_wait - jiffy_elapsed;
@@ -720,11 +917,11 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
  
         /* Slice has just started. Consider one slice interval */
         if (!jiffy_elapsed)
-               jiffy_elapsed_rnd = throtl_slice;
+               jiffy_elapsed_rnd = tg->td->throtl_slice;
  
-       jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice);
+       jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
  
-       tmp = tg->bps[rw] * jiffy_elapsed_rnd;
+       tmp = tg_bps_limit(tg, rw) * jiffy_elapsed_rnd;
         do_div(tmp, HZ);
         bytes_allowed = tmp;
  
@@ -736,7 +933,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
  
         /* Calc approx time to dispatch */
         extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed;
-       jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]);
+       jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw));
  
         if (!jiffy_wait)
                 jiffy_wait = 1;
@@ -771,7 +968,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
                bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
  
         /* If tg->bps = -1, then BW is unlimited */
-       if (tg->bps[rw] == -1 && tg->iops[rw] == -1) {
+       if (tg_bps_limit(tg, rw) == U64_MAX &&
+           tg_iops_limit(tg, rw) == UINT_MAX) {
                 if (wait)
                         *wait = 0;
                 return true;
@@ -787,8 +985,10 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
         if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
                 throtl_start_new_slice(tg, rw);
         else {
-               if (time_before(tg->slice_end[rw], jiffies + throtl_slice))
-                       throtl_extend_slice(tg, rw, jiffies + throtl_slice);
+               if (time_before(tg->slice_end[rw],
+                   jiffies + tg->td->throtl_slice))
+                       throtl_extend_slice(tg, rw,
+                               jiffies + tg->td->throtl_slice);
         }
  
         if (tg_with_in_bps_limit(tg, bio, &bps_wait) &&
@@ -816,6 +1016,8 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
         /* Charge the bio to the group */
         tg->bytes_disp[rw] += bio->bi_iter.bi_size;
         tg->io_disp[rw]++;
+       tg->last_bytes_disp[rw] += bio->bi_iter.bi_size;
+       tg->last_io_disp[rw]++;
  
         /*
          * BIO_THROTTLED is used to prevent the same bio to be throttled
@@ -999,6 +1201,8 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
         return nr_disp;
  }
  
+static bool throtl_can_upgrade(struct throtl_data *td,
+       struct throtl_grp *this_tg);
  /**
   * throtl_pending_timer_fn - timer function for service_queue->pending_timer
   * @arg: the throtl_service_queue being serviced
@@ -1025,6 +1229,9 @@ static void throtl_pending_timer_fn(unsigned long arg)
         int ret;
  
         spin_lock_irq(q->queue_lock);
+       if (throtl_can_upgrade(td, NULL))
+               throtl_upgrade_state(td);
+
  again:
         parent_sq = sq->parent_sq;
         dispatched = false;
@@ -1112,7 +1319,7 @@ static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
         struct throtl_grp *tg = pd_to_tg(pd);
         u64 v = *(u64 *)((void *)tg + off);
  
-       if (v == -1)
+       if (v == U64_MAX)
                 return 0;
         return __blkg_prfill_u64(sf, pd, v);
  }
@@ -1123,7 +1330,7 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
         struct throtl_grp *tg = pd_to_tg(pd);
         unsigned int v = *(unsigned int *)((void *)tg + off);
  
-       if (v == -1)
+       if (v == UINT_MAX)
                 return 0;
         return __blkg_prfill_u64(sf, pd, v);
  }
@@ -1150,8 +1357,8 @@ static void tg_conf_updated(struct throtl_grp *tg)
  
         throtl_log(&tg->service_queue,
                    "limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
-                  tg->bps[READ], tg->bps[WRITE],
-                  tg->iops[READ], tg->iops[WRITE]);
+                  tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE),
+                  tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE));
  
         /*
          * Update has_rules[] flags for the updated tg's subtree.  A tg is
@@ -1197,7 +1404,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
         if (sscanf(ctx.body, "%llu", &v) != 1)
                 goto out_finish;
         if (!v)
-               v = -1;
+               v = U64_MAX;
  
         tg = blkg_to_tg(ctx.blkg);
  
@@ -1228,25 +1435,25 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
  static struct cftype throtl_legacy_files[] = {
         {
                 .name = "throttle.read_bps_device",
-               .private = offsetof(struct throtl_grp, bps[READ]),
+               .private = offsetof(struct throtl_grp, bps[READ][LIMIT_MAX]),
                 .seq_show = tg_print_conf_u64,
                 .write = tg_set_conf_u64,
         },
         {
                 .name = "throttle.write_bps_device",
-               .private = offsetof(struct throtl_grp, bps[WRITE]),
+               .private = offsetof(struct throtl_grp, bps[WRITE][LIMIT_MAX]),
                 .seq_show = tg_print_conf_u64,
                 .write = tg_set_conf_u64,
         },
         {
                 .name = "throttle.read_iops_device",
-               .private = offsetof(struct throtl_grp, iops[READ]),
+               .private = offsetof(struct throtl_grp, iops[READ][LIMIT_MAX]),
                 .seq_show = tg_print_conf_uint,
                 .write = tg_set_conf_uint,
         },
         {
                 .name = "throttle.write_iops_device",
-               .private = offsetof(struct throtl_grp, iops[WRITE]),
+               .private = offsetof(struct throtl_grp, iops[WRITE][LIMIT_MAX]),
                 .seq_show = tg_print_conf_uint,
                 .write = tg_set_conf_uint,
         },
@@ -1263,48 +1470,87 @@ static struct cftype throtl_legacy_files[] = {
         { }     /* terminate */
  };
  
-static u64 tg_prfill_max(struct seq_file *sf, struct blkg_policy_data *pd,
+static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
                          int off)
  {
         struct throtl_grp *tg = pd_to_tg(pd);
         const char *dname = blkg_dev_name(pd->blkg);
         char bufs[4][21] = { "max", "max", "max", "max" };
+       u64 bps_dft;
+       unsigned int iops_dft;
+       char idle_time[26] = "";
+       char latency_time[26] = "";
  
         if (!dname)
                 return 0;
-       if (tg->bps[READ] == -1 && tg->bps[WRITE] == -1 &&
-           tg->iops[READ] == -1 && tg->iops[WRITE] == -1)
+
+       if (off == LIMIT_LOW) {
+               bps_dft = 0;
+               iops_dft = 0;
+       } else {
+               bps_dft = U64_MAX;
+               iops_dft = UINT_MAX;
+       }
+
+       if (tg->bps_conf[READ][off] == bps_dft &&
+           tg->bps_conf[WRITE][off] == bps_dft &&
+           tg->iops_conf[READ][off] == iops_dft &&
+           tg->iops_conf[WRITE][off] == iops_dft &&
+           (off != LIMIT_LOW ||
+            (tg->idletime_threshold == tg->td->dft_idletime_threshold &&
+             tg->latency_target == DFL_LATENCY_TARGET)))
                 return 0;
  
-       if (tg->bps[READ] != -1)
-               snprintf(bufs[0], sizeof(bufs[0]), "%llu", tg->bps[READ]);
-       if (tg->bps[WRITE] != -1)
-               snprintf(bufs[1], sizeof(bufs[1]), "%llu", tg->bps[WRITE]);
-       if (tg->iops[READ] != -1)
-               snprintf(bufs[2], sizeof(bufs[2]), "%u", tg->iops[READ]);
-       if (tg->iops[WRITE] != -1)
-               snprintf(bufs[3], sizeof(bufs[3]), "%u", tg->iops[WRITE]);
-
-       seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s\n",
-                  dname, bufs[0], bufs[1], bufs[2], bufs[3]);
+       if (tg->bps_conf[READ][off] != bps_dft)
+               snprintf(bufs[0], sizeof(bufs[0]), "%llu",
+                       tg->bps_conf[READ][off]);
+       if (tg->bps_conf[WRITE][off] != bps_dft)
+               snprintf(bufs[1], sizeof(bufs[1]), "%llu",
+                       tg->bps_conf[WRITE][off]);
+       if (tg->iops_conf[READ][off] != iops_dft)
+               snprintf(bufs[2], sizeof(bufs[2]), "%u",
+                       tg->iops_conf[READ][off]);
+       if (tg->iops_conf[WRITE][off] != iops_dft)
+               snprintf(bufs[3], sizeof(bufs[3]), "%u",
+                       tg->iops_conf[WRITE][off]);
+       if (off == LIMIT_LOW) {
+               if (tg->idletime_threshold == ULONG_MAX)
+                       strcpy(idle_time, " idle=max");
+               else
+                       snprintf(idle_time, sizeof(idle_time), " idle=%lu",
+                               tg->idletime_threshold);
+
+               if (tg->latency_target == ULONG_MAX)
+                       strcpy(latency_time, " latency=max");
+               else
+                       snprintf(latency_time, sizeof(latency_time),
+                               " latency=%lu", tg->latency_target);
+       }
+
+       seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
+                  dname, bufs[0], bufs[1], bufs[2], bufs[3], idle_time,
+                  latency_time);
         return 0;
  }
  
-static int tg_print_max(struct seq_file *sf, void *v)
+static int tg_print_limit(struct seq_file *sf, void *v)
  {
-       blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_max,
+       blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_limit,
                           &blkcg_policy_throtl, seq_cft(sf)->private, false);
         return 0;
  }
  
-static ssize_t tg_set_max(struct kernfs_open_file *of,
+static ssize_t tg_set_limit(struct kernfs_open_file *of,
                           char *buf, size_t nbytes, loff_t off)
  {
         struct blkcg *blkcg = css_to_blkcg(of_css(of));
         struct blkg_conf_ctx ctx;
         struct throtl_grp *tg;
         u64 v[4];
+       unsigned long idle_time;
+       unsigned long latency_time;
         int ret;
+       int index = of_cft(of)->private;
  
         ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
         if (ret)
@@ -1312,15 +1558,17 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
  
         tg = blkg_to_tg(ctx.blkg);
  
-       v[0] = tg->bps[READ];
-       v[1] = tg->bps[WRITE];
-       v[2] = tg->iops[READ];
-       v[3] = tg->iops[WRITE];
+       v[0] = tg->bps_conf[READ][index];
+       v[1] = tg->bps_conf[WRITE][index];
+       v[2] = tg->iops_conf[READ][index];
+       v[3] = tg->iops_conf[WRITE][index];
  
+       idle_time = tg->idletime_threshold;
+       latency_time = tg->latency_target;
         while (true) {
                 char tok[27];   /* wiops=18446744073709551616 */
                 char *p;
-               u64 val = -1;
+               u64 val = U64_MAX;
                 int len;
  
                 if (sscanf(ctx.body, "%26s%n", tok, &len) != 1)
@@ -1348,15 +1596,43 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
                         v[2] = min_t(u64, val, UINT_MAX);
                 else if (!strcmp(tok, "wiops"))
                         v[3] = min_t(u64, val, UINT_MAX);
+               else if (off == LIMIT_LOW && !strcmp(tok, "idle"))
+                       idle_time = val;
+               else if (off == LIMIT_LOW && !strcmp(tok, "latency"))
+                       latency_time = val;
                 else
                         goto out_finish;
         }
  
-       tg->bps[READ] = v[0];
-       tg->bps[WRITE] = v[1];
-       tg->iops[READ] = v[2];
-       tg->iops[WRITE] = v[3];
+       tg->bps_conf[READ][index] = v[0];
+       tg->bps_conf[WRITE][index] = v[1];
+       tg->iops_conf[READ][index] = v[2];
+       tg->iops_conf[WRITE][index] = v[3];
  
+       if (index == LIMIT_MAX) {
+               tg->bps[READ][index] = v[0];
+               tg->bps[WRITE][index] = v[1];
+               tg->iops[READ][index] = v[2];
+               tg->iops[WRITE][index] = v[3];
+       }
+       tg->bps[READ][LIMIT_LOW] = min(tg->bps_conf[READ][LIMIT_LOW],
+               tg->bps_conf[READ][LIMIT_MAX]);
+       tg->bps[WRITE][LIMIT_LOW] = min(tg->bps_conf[WRITE][LIMIT_LOW],
+               tg->bps_conf[WRITE][LIMIT_MAX]);
+       tg->iops[READ][LIMIT_LOW] = min(tg->iops_conf[READ][LIMIT_LOW],
+               tg->iops_conf[READ][LIMIT_MAX]);
+       tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
+               tg->iops_conf[WRITE][LIMIT_MAX]);
+
+       if (index == LIMIT_LOW) {
+               blk_throtl_update_limit_valid(tg->td);
+               if (tg->td->limit_valid[LIMIT_LOW])
+                       tg->td->limit_index = LIMIT_LOW;
+               tg->idletime_threshold = (idle_time == ULONG_MAX) ?
+                       ULONG_MAX : idle_time;
+               tg->latency_target = (latency_time == ULONG_MAX) ?
+                       ULONG_MAX : latency_time;
+       }
         tg_conf_updated(tg);
         ret = 0;
  out_finish:
@@ -1365,11 +1641,21 @@ out_finish:
  }
  
  static struct cftype throtl_files[] = {
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+       {
+               .name = "low",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = tg_print_limit,
+               .write = tg_set_limit,
+               .private = LIMIT_LOW,
+       },
+#endif
         {
                 .name = "max",
                 .flags = CFTYPE_NOT_ON_ROOT,
-               .seq_show = tg_print_max,
-               .write = tg_set_max,
+               .seq_show = tg_print_limit,
+               .write = tg_set_limit,
+               .private = LIMIT_MAX,
         },
         { }     /* terminate */
  };
@@ -1388,9 +1674,376 @@ static struct blkcg_policy blkcg_policy_throtl = {
         .pd_alloc_fn            = throtl_pd_alloc,
         .pd_init_fn             = throtl_pd_init,
         .pd_online_fn           = throtl_pd_online,
+       .pd_offline_fn          = throtl_pd_offline,
         .pd_free_fn             = throtl_pd_free,
  };
  
+static unsigned long __tg_last_low_overflow_time(struct throtl_grp *tg)
+{
+       unsigned long rtime = jiffies, wtime = jiffies;
+
+       if (tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW])
+               rtime = tg->last_low_overflow_time[READ];
+       if (tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW])
+               wtime = tg->last_low_overflow_time[WRITE];
+       return min(rtime, wtime);
+}
+
+/* tg should not be an intermediate node */
+static unsigned long tg_last_low_overflow_time(struct throtl_grp *tg)
+{
+       struct throtl_service_queue *parent_sq;
+       struct throtl_grp *parent = tg;
+       unsigned long ret = __tg_last_low_overflow_time(tg);
+
+       while (true) {
+               parent_sq = parent->service_queue.parent_sq;
+               parent = sq_to_tg(parent_sq);
+               if (!parent)
+                       break;
+
+               /*
+                * The parent doesn't have low limit, it always reaches low
+                * limit. Its overflow time is useless for children
+                */
+               if (!parent->bps[READ][LIMIT_LOW] &&
+                   !parent->iops[READ][LIMIT_LOW] &&
+                   !parent->bps[WRITE][LIMIT_LOW] &&
+                   !parent->iops[WRITE][LIMIT_LOW])
+                       continue;
+               if (time_after(__tg_last_low_overflow_time(parent), ret))
+                       ret = __tg_last_low_overflow_time(parent);
+       }
+       return ret;
+}
+
+static bool throtl_tg_is_idle(struct throtl_grp *tg)
+{
+       /*
+        * cgroup is idle if:
+        * - single idle is too long, longer than a fixed value (in case user
+        *   configure a too big threshold) or 4 times of slice
+        * - average think time is more than threshold
+        * - IO latency is largely below threshold
+        */
+       unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice);
+
+       time = min_t(unsigned long, MAX_IDLE_TIME, time);
+       return (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
+              tg->avg_idletime > tg->idletime_threshold ||
+              (tg->latency_target && tg->bio_cnt &&
+               tg->bad_bio_cnt * 5 < tg->bio_cnt);
+}
+
+static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
+{
+       struct throtl_service_queue *sq = &tg->service_queue;
+       bool read_limit, write_limit;
+
+       /*
+        * if cgroup reaches low limit (if low limit is 0, the cgroup always
+        * reaches), it's ok to upgrade to next limit
+        */
+       read_limit = tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW];
+       write_limit = tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW];
+       if (!read_limit && !write_limit)
+               return true;
+       if (read_limit && sq->nr_queued[READ] &&
+           (!write_limit || sq->nr_queued[WRITE]))
+               return true;
+       if (write_limit && sq->nr_queued[WRITE] &&
+           (!read_limit || sq->nr_queued[READ]))
+               return true;
+
+       if (time_after_eq(jiffies,
+               tg_last_low_overflow_time(tg) + tg->td->throtl_slice) &&
+           throtl_tg_is_idle(tg))
+               return true;
+       return false;
+}
+
+static bool throtl_hierarchy_can_upgrade(struct throtl_grp *tg)
+{
+       while (true) {
+               if (throtl_tg_can_upgrade(tg))
+                       return true;
+               tg = sq_to_tg(tg->service_queue.parent_sq);
+               if (!tg || !tg_to_blkg(tg)->parent)
+                       return false;
+       }
+       return false;
+}
+
+static bool throtl_can_upgrade(struct throtl_data *td,
+       struct throtl_grp *this_tg)
+{
+       struct cgroup_subsys_state *pos_css;
+       struct blkcg_gq *blkg;
+
+       if (td->limit_index != LIMIT_LOW)
+               return false;
+
+       if (time_before(jiffies, td->low_downgrade_time + td->throtl_slice))
+               return false;
+
+       rcu_read_lock();
+       blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+               struct throtl_grp *tg = blkg_to_tg(blkg);
+
+               if (tg == this_tg)
+                       continue;
+               if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
+                       continue;
+               if (!throtl_hierarchy_can_upgrade(tg)) {
+                       rcu_read_unlock();
+                       return false;
+               }
+       }
+       rcu_read_unlock();
+       return true;
+}
+
+static void throtl_upgrade_check(struct throtl_grp *tg)
+{
+       unsigned long now = jiffies;
+
+       if (tg->td->limit_index != LIMIT_LOW)
+               return;
+
+       if (time_after(tg->last_check_time + tg->td->throtl_slice, now))
+               return;
+
+       tg->last_check_time = now;
+
+       if (!time_after_eq(now,
+            __tg_last_low_overflow_time(tg) + tg->td->throtl_slice))
+               return;
+
+       if (throtl_can_upgrade(tg->td, NULL))
+               throtl_upgrade_state(tg->td);
+}
+
+static void throtl_upgrade_state(struct throtl_data *td)
+{
+       struct cgroup_subsys_state *pos_css;
+       struct blkcg_gq *blkg;
+
+       td->limit_index = LIMIT_MAX;
+       td->low_upgrade_time = jiffies;
+       td->scale = 0;
+       rcu_read_lock();
+       blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+               struct throtl_grp *tg = blkg_to_tg(blkg);
+               struct throtl_service_queue *sq = &tg->service_queue;
+
+               tg->disptime = jiffies - 1;
+               throtl_select_dispatch(sq);
+               throtl_schedule_next_dispatch(sq, false);
+       }
+       rcu_read_unlock();
+       throtl_select_dispatch(&td->service_queue);
+       throtl_schedule_next_dispatch(&td->service_queue, false);
+       queue_work(kthrotld_workqueue, &td->dispatch_work);
+}
+
+static void throtl_downgrade_state(struct throtl_data *td, int new)
+{
+       td->scale /= 2;
+
+       if (td->scale) {
+               td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
+               return;
+       }
+
+       td->limit_index = new;
+       td->low_downgrade_time = jiffies;
+}
+
+static bool throtl_tg_can_downgrade(struct throtl_grp *tg)
+{
+       struct throtl_data *td = tg->td;
+       unsigned long now = jiffies;
+
+       /*
+        * If cgroup is below low limit, consider downgrade and throttle other
+        * cgroups
+        */
+       if (time_after_eq(now, td->low_upgrade_time + td->throtl_slice) &&
+           time_after_eq(now, tg_last_low_overflow_time(tg) +
+                                       td->throtl_slice) &&
+           (!throtl_tg_is_idle(tg) ||
+            !list_empty(&tg_to_blkg(tg)->blkcg->css.children)))
+               return true;
+       return false;
+}
+
+static bool throtl_hierarchy_can_downgrade(struct throtl_grp *tg)
+{
+       while (true) {
+               if (!throtl_tg_can_downgrade(tg))
+                       return false;
+               tg = sq_to_tg(tg->service_queue.parent_sq);
+               if (!tg || !tg_to_blkg(tg)->parent)
+                       break;
+       }
+       return true;
+}
+
+static void throtl_downgrade_check(struct throtl_grp *tg)
+{
+       uint64_t bps;
+       unsigned int iops;
+       unsigned long elapsed_time;
+       unsigned long now = jiffies;
+
+       if (tg->td->limit_index != LIMIT_MAX ||
+           !tg->td->limit_valid[LIMIT_LOW])
+               return;
+       if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
+               return;
+       if (time_after(tg->last_check_time + tg->td->throtl_slice, now))
+               return;
+
+       elapsed_time = now - tg->last_check_time;
+       tg->last_check_time = now;
+
+       if (time_before(now, tg_last_low_overflow_time(tg) +
+                       tg->td->throtl_slice))
+               return;
+
+       if (tg->bps[READ][LIMIT_LOW]) {
+               bps = tg->last_bytes_disp[READ] * HZ;
+               do_div(bps, elapsed_time);
+               if (bps >= tg->bps[READ][LIMIT_LOW])
+                       tg->last_low_overflow_time[READ] = now;
+       }
+
+       if (tg->bps[WRITE][LIMIT_LOW]) {
+               bps = tg->last_bytes_disp[WRITE] * HZ;
+               do_div(bps, elapsed_time);
+               if (bps >= tg->bps[WRITE][LIMIT_LOW])
+                       tg->last_low_overflow_time[WRITE] = now;
+       }
+
+       if (tg->iops[READ][LIMIT_LOW]) {
+               iops = tg->last_io_disp[READ] * HZ / elapsed_time;
+               if (iops >= tg->iops[READ][LIMIT_LOW])
+                       tg->last_low_overflow_time[READ] = now;
+       }
+
+       if (tg->iops[WRITE][LIMIT_LOW]) {
+               iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
+               if (iops >= tg->iops[WRITE][LIMIT_LOW])
+                       tg->last_low_overflow_time[WRITE] = now;
+       }
+
+       /*
+        * If cgroup is below low limit, consider downgrade and throttle other
+        * cgroups
+        */
+       if (throtl_hierarchy_can_downgrade(tg))
+               throtl_downgrade_state(tg->td, LIMIT_LOW);
+
+       tg->last_bytes_disp[READ] = 0;
+       tg->last_bytes_disp[WRITE] = 0;
+       tg->last_io_disp[READ] = 0;
+       tg->last_io_disp[WRITE] = 0;
+}
+
+static void blk_throtl_update_idletime(struct throtl_grp *tg)
+{
+       unsigned long now = ktime_get_ns() >> 10;
+       unsigned long last_finish_time = tg->last_finish_time;
+
+       if (now <= last_finish_time || last_finish_time == 0 ||
+           last_finish_time == tg->checked_last_finish_time)
+               return;
+
+       tg->avg_idletime = (tg->avg_idletime * 7 + now - last_finish_time) >> 3;
+       tg->checked_last_finish_time = last_finish_time;
+}
+
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+static void throtl_update_latency_buckets(struct throtl_data *td)
+{
+       struct avg_latency_bucket avg_latency[LATENCY_BUCKET_SIZE];
+       int i, cpu;
+       unsigned long last_latency = 0;
+       unsigned long latency;
+
+       if (!blk_queue_nonrot(td->queue))
+               return;
+       if (time_before(jiffies, td->last_calculate_time + HZ))
+               return;
+       td->last_calculate_time = jiffies;
+
+       memset(avg_latency, 0, sizeof(avg_latency));
+       for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
+               struct latency_bucket *tmp = &td->tmp_buckets[i];
+
+               for_each_possible_cpu(cpu) {
+                       struct latency_bucket *bucket;
+
+                       /* this isn't race free, but ok in practice */
+                       bucket = per_cpu_ptr(td->latency_buckets, cpu);
+                       tmp->total_latency += bucket[i].total_latency;
+                       tmp->samples += bucket[i].samples;
+                       bucket[i].total_latency = 0;
+                       bucket[i].samples = 0;
+               }
+
+               if (tmp->samples >= 32) {
+                       int samples = tmp->samples;
+
+                       latency = tmp->total_latency;
+
+                       tmp->total_latency = 0;
+                       tmp->samples = 0;
+                       latency /= samples;
+                       if (latency == 0)
+                               continue;
+                       avg_latency[i].latency = latency;
+               }
+       }
+
+       for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
+               if (!avg_latency[i].latency) {
+                       if (td->avg_buckets[i].latency < last_latency)
+                               td->avg_buckets[i].latency = last_latency;
+                       continue;
+               }
+
+               if (!td->avg_buckets[i].valid)
+                       latency = avg_latency[i].latency;
+               else
+                       latency = (td->avg_buckets[i].latency * 7 +
+                               avg_latency[i].latency) >> 3;
+
+               td->avg_buckets[i].latency = max(latency, last_latency);
+               td->avg_buckets[i].valid = true;
+               last_latency = td->avg_buckets[i].latency;
+       }
+}
+#else
+static inline void throtl_update_latency_buckets(struct throtl_data *td)
+{
+}
+#endif
+
+static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
+{
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+       int ret;
+
+       ret = bio_associate_current(bio);
+       if (ret == 0 || ret == -EBUSY)
+               bio->bi_cg_private = tg;
+       blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
+#else
+       bio_associate_current(bio);
+#endif
+}
+
  bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
                     struct bio *bio)
  {
@@ -1399,6 +2052,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
         struct throtl_service_queue *sq;
         bool rw = bio_data_dir(bio);
         bool throttled = false;
+       struct throtl_data *td = tg->td;
  
         WARN_ON_ONCE(!rcu_read_lock_held());
  
@@ -1408,19 +2062,35 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
  
         spin_lock_irq(q->queue_lock);
  
+       throtl_update_latency_buckets(td);
+
         if (unlikely(blk_queue_bypass(q)))
                 goto out_unlock;
  
+       blk_throtl_assoc_bio(tg, bio);
+       blk_throtl_update_idletime(tg);
+
         sq = &tg->service_queue;
  
+again:
         while (true) {
+               if (tg->last_low_overflow_time[rw] == 0)
+                       tg->last_low_overflow_time[rw] = jiffies;
+               throtl_downgrade_check(tg);
+               throtl_upgrade_check(tg);
                 /* throtl is FIFO - if bios are already queued, should queue */
                 if (sq->nr_queued[rw])
                         break;
  
                 /* if above limits, break to queue */
-               if (!tg_may_dispatch(tg, bio, NULL))
+               if (!tg_may_dispatch(tg, bio, NULL)) {
+                       tg->last_low_overflow_time[rw] = jiffies;
+                       if (throtl_can_upgrade(td, tg)) {
+                               throtl_upgrade_state(td);
+                               goto again;
+                       }
                         break;
+               }
  
                 /* within limits, let's charge and dispatch directly */
                 throtl_charge_bio(tg, bio);
@@ -1453,12 +2123,14 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
         /* out-of-limit, queue to @tg */
         throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d",
                    rw == READ ? 'R' : 'W',
-                  tg->bytes_disp[rw], bio->bi_iter.bi_size, tg->bps[rw],
-                  tg->io_disp[rw], tg->iops[rw],
+                  tg->bytes_disp[rw], bio->bi_iter.bi_size,
+                  tg_bps_limit(tg, rw),
+                  tg->io_disp[rw], tg_iops_limit(tg, rw),
                    sq->nr_queued[READ], sq->nr_queued[WRITE]);
  
-       bio_associate_current(bio);
-       tg->td->nr_queued[rw]++;
+       tg->last_low_overflow_time[rw] = jiffies;
+
+       td->nr_queued[rw]++;
         throtl_add_bio_tg(bio, qn, tg);
         throttled = true;
  
@@ -1483,9 +2155,94 @@ out:
          */
         if (!throttled)
                 bio_clear_flag(bio, BIO_THROTTLED);
+
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+       if (throttled || !td->track_bio_latency)
+               bio->bi_issue_stat.stat |= SKIP_LATENCY;
+#endif
         return throttled;
  }
  
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+static void throtl_track_latency(struct throtl_data *td, sector_t size,
+       int op, unsigned long time)
+{
+       struct latency_bucket *latency;
+       int index;
+
+       if (!td || td->limit_index != LIMIT_LOW || op != REQ_OP_READ ||
+           !blk_queue_nonrot(td->queue))
+               return;
+
+       index = request_bucket_index(size);
+
+       latency = get_cpu_ptr(td->latency_buckets);
+       latency[index].total_latency += time;
+       latency[index].samples++;
+       put_cpu_ptr(td->latency_buckets);
+}
+
+void blk_throtl_stat_add(struct request *rq, u64 time_ns)
+{
+       struct request_queue *q = rq->q;
+       struct throtl_data *td = q->td;
+
+       throtl_track_latency(td, blk_stat_size(&rq->issue_stat),
+               req_op(rq), time_ns >> 10);
+}
+
+void blk_throtl_bio_endio(struct bio *bio)
+{
+       struct throtl_grp *tg;
+       u64 finish_time_ns;
+       unsigned long finish_time;
+       unsigned long start_time;
+       unsigned long lat;
+
+       tg = bio->bi_cg_private;
+       if (!tg)
+               return;
+       bio->bi_cg_private = NULL;
+
+       finish_time_ns = ktime_get_ns();
+       tg->last_finish_time = finish_time_ns >> 10;
+
+       start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
+       finish_time = __blk_stat_time(finish_time_ns) >> 10;
+       if (!start_time || finish_time <= start_time)
+               return;
+
+       lat = finish_time - start_time;
+       /* this is only for bio based driver */
+       if (!(bio->bi_issue_stat.stat & SKIP_LATENCY))
+               throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
+                       bio_op(bio), lat);
+
+       if (tg->latency_target) {
+               int bucket;
+               unsigned int threshold;
+
+               bucket = request_bucket_index(
+                       blk_stat_size(&bio->bi_issue_stat));
+               threshold = tg->td->avg_buckets[bucket].latency +
+                       tg->latency_target;
+               if (lat > threshold)
+                       tg->bad_bio_cnt++;
+               /*
+                * Not race free, could get wrong count, which means cgroups
+                * will be throttled
+                */
+               tg->bio_cnt++;
+       }
+
+       if (time_after(jiffies, tg->bio_cnt_reset_time) || tg->bio_cnt > 1024) {
+               tg->bio_cnt_reset_time = tg->td->throtl_slice + jiffies;
+               tg->bio_cnt /= 2;
+               tg->bad_bio_cnt /= 2;
+       }
+}
+#endif
+
  /*
   * Dispatch all bios from all children tg's queued on @parent_sq.  On
   * return, @parent_sq is guaranteed to not have any active children tg's
@@ -1558,6 +2315,12 @@ int blk_throtl_init(struct request_queue *q)
         td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
         if (!td)
                 return -ENOMEM;
+       td->latency_buckets = __alloc_percpu(sizeof(struct latency_bucket) *
+               LATENCY_BUCKET_SIZE, __alignof__(u64));
+       if (!td->latency_buckets) {
+               kfree(td);
+               return -ENOMEM;
+       }
  
         INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
         throtl_service_queue_init(&td->service_queue);
@@ -1565,10 +2328,17 @@ int blk_throtl_init(struct request_queue *q)
         q->td = td;
         td->queue = q;
  
+       td->limit_valid[LIMIT_MAX] = true;
+       td->limit_index = LIMIT_MAX;
+       td->low_upgrade_time = jiffies;
+       td->low_downgrade_time = jiffies;
+
         /* activate policy */
         ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
-       if (ret)
+       if (ret) {
+               free_percpu(td->latency_buckets);
                 kfree(td);
+       }
         return ret;
  }
  
@@ -1577,9 +2347,74 @@ void blk_throtl_exit(struct request_queue *q)
         BUG_ON(!q->td);
         throtl_shutdown_wq(q);
         blkcg_deactivate_policy(q, &blkcg_policy_throtl);
+       free_percpu(q->td->latency_buckets);
         kfree(q->td);
  }
  
+void blk_throtl_register_queue(struct request_queue *q)
+{
+       struct throtl_data *td;
+       struct cgroup_subsys_state *pos_css;
+       struct blkcg_gq *blkg;
+
+       td = q->td;
+       BUG_ON(!td);
+
+       if (blk_queue_nonrot(q)) {
+               td->throtl_slice = DFL_THROTL_SLICE_SSD;
+               td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD;
+       } else {
+               td->throtl_slice = DFL_THROTL_SLICE_HD;
+               td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD;
+       }
+#ifndef CONFIG_BLK_DEV_THROTTLING_LOW
+       /* if no low limit, use previous default */
+       td->throtl_slice = DFL_THROTL_SLICE_HD;
+#endif
+
+       td->track_bio_latency = !q->mq_ops && !q->request_fn;
+       if (!td->track_bio_latency)
+               blk_stat_enable_accounting(q);
+
+       /*
+        * some tg are created before queue is fully initialized, eg, nonrot
+        * isn't initialized yet
+        */
+       rcu_read_lock();
+       blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
+               struct throtl_grp *tg = blkg_to_tg(blkg);
+
+               tg->idletime_threshold = td->dft_idletime_threshold;
+       }
+       rcu_read_unlock();
+}
+
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page)
+{
+       if (!q->td)
+               return -EINVAL;
+       return sprintf(page, "%u\n", jiffies_to_msecs(q->td->throtl_slice));
+}
+
+ssize_t blk_throtl_sample_time_store(struct request_queue *q,
+       const char *page, size_t count)
+{
+       unsigned long v;
+       unsigned long t;
+
+       if (!q->td)
+               return -EINVAL;
+       if (kstrtoul(page, 10, &v))
+               return -EINVAL;
+       t = msecs_to_jiffies(v);
+       if (t == 0 || t > MAX_THROTL_SLICE)
+               return -EINVAL;
+       q->td->throtl_slice = t;
+       return count;
+}
+#endif
+
  static int __init throtl_init(void)
  {
         kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
diff --git a/block/blk-timeout.c b/block/blk-timeout.c

index a30441a200c0952eec31f6694dd05641a54227fa..cbff183f3d9f963441a6cfa4f9b8bd3d863a2c50 100644 (file)
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -89,7 +89,6 @@ static void blk_rq_timed_out(struct request *req)
                 ret = q->rq_timed_out_fn(req);
         switch (ret) {
         case BLK_EH_HANDLED:
-               /* Can we use req->errors here? */
                 __blk_complete_request(req);
                 break;
         case BLK_EH_RESET_TIMER:
diff --git a/block/blk-wbt.c b/block/blk-wbt.c

index 1aedb1f7ee0c7fde717d7701d3ee74cc90c21d17..17676f4d7fd157b2c889b093dd869866c2e468d1 100644 (file)
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -255,8 +255,8 @@ static inline bool stat_sample_valid(struct blk_rq_stat *stat)
          * that it's writes impacting us, and not just some sole read on
          * a device that is in a lower power state.
          */
-       return stat[BLK_STAT_READ].nr_samples >= 1 &&
-               stat[BLK_STAT_WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES;
+       return (stat[READ].nr_samples >= 1 &&
+               stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES);
  }
  
  static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
@@ -277,7 +277,7 @@ enum {
         LAT_EXCEEDED,
  };
  
-static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
+static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
  {
         struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
         u64 thislat;
@@ -293,7 +293,7 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
          */
         thislat = rwb_sync_issue_lat(rwb);
         if (thislat > rwb->cur_win_nsec ||
-           (thislat > rwb->min_lat_nsec && !stat[BLK_STAT_READ].nr_samples)) {
+           (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) {
                 trace_wbt_lat(bdi, thislat);
                 return LAT_EXCEEDED;
         }
@@ -308,8 +308,8 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
                  * waited or still has writes in flights, consider us doing
                  * just writes as well.
                  */
-               if ((stat[BLK_STAT_WRITE].nr_samples && blk_stat_is_current(stat)) ||
-                   wb_recent_wait(rwb) || wbt_inflight(rwb))
+               if (stat[WRITE].nr_samples || wb_recent_wait(rwb) ||
+                   wbt_inflight(rwb))
                         return LAT_UNKNOWN_WRITES;
                 return LAT_UNKNOWN;
         }
@@ -317,8 +317,8 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
         /*
          * If the 'min' latency exceeds our target, step down.
          */
-       if (stat[BLK_STAT_READ].min > rwb->min_lat_nsec) {
-               trace_wbt_lat(bdi, stat[BLK_STAT_READ].min);
+       if (stat[READ].min > rwb->min_lat_nsec) {
+               trace_wbt_lat(bdi, stat[READ].min);
                 trace_wbt_stat(bdi, stat);
                 return LAT_EXCEEDED;
         }
@@ -329,14 +329,6 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
         return LAT_OK;
  }
  
-static int latency_exceeded(struct rq_wb *rwb)
-{
-       struct blk_rq_stat stat[2];
-
-       blk_queue_stat_get(rwb->queue, stat);
-       return __latency_exceeded(rwb, stat);
-}
-
  static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
  {
         struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
@@ -355,7 +347,6 @@ static void scale_up(struct rq_wb *rwb)
  
         rwb->scale_step--;
         rwb->unknown_cnt = 0;
-       blk_stat_clear(rwb->queue);
  
         rwb->scaled_max = calc_wb_limits(rwb);
  
@@ -385,15 +376,12 @@ static void scale_down(struct rq_wb *rwb, bool hard_throttle)
  
         rwb->scaled_max = false;
         rwb->unknown_cnt = 0;
-       blk_stat_clear(rwb->queue);
         calc_wb_limits(rwb);
         rwb_trace_step(rwb, "step down");
  }
  
  static void rwb_arm_timer(struct rq_wb *rwb)
  {
-       unsigned long expires;
-
         if (rwb->scale_step > 0) {
                 /*
                  * We should speed this up, using some variant of a fast
@@ -411,17 +399,16 @@ static void rwb_arm_timer(struct rq_wb *rwb)
                 rwb->cur_win_nsec = rwb->win_nsec;
         }
  
-       expires = jiffies + nsecs_to_jiffies(rwb->cur_win_nsec);
-       mod_timer(&rwb->window_timer, expires);
+       blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec);
  }
  
-static void wb_timer_fn(unsigned long data)
+static void wb_timer_fn(struct blk_stat_callback *cb)
  {
-       struct rq_wb *rwb = (struct rq_wb *) data;
+       struct rq_wb *rwb = cb->data;
         unsigned int inflight = wbt_inflight(rwb);
         int status;
  
-       status = latency_exceeded(rwb);
+       status = latency_exceeded(rwb, cb->stat);
  
         trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step,
                         inflight);
@@ -614,7 +601,7 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
  
         __wbt_wait(rwb, bio->bi_opf, lock);
  
-       if (!timer_pending(&rwb->window_timer))
+       if (!blk_stat_is_active(rwb->cb))
                 rwb_arm_timer(rwb);
  
         if (current_is_kswapd())
@@ -666,22 +653,37 @@ void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on)
                 rwb->wc = write_cache_on;
  }
  
- /*
- * Disable wbt, if enabled by default. Only called from CFQ, if we have
- * cgroups enabled
+/*
+ * Disable wbt, if enabled by default. Only called from CFQ.
   */
  void wbt_disable_default(struct request_queue *q)
  {
         struct rq_wb *rwb = q->rq_wb;
  
-       if (rwb && rwb->enable_state == WBT_STATE_ON_DEFAULT) {
-               del_timer_sync(&rwb->window_timer);
-               rwb->win_nsec = rwb->min_lat_nsec = 0;
-               wbt_update_limits(rwb);
-       }
+       if (rwb && rwb->enable_state == WBT_STATE_ON_DEFAULT)
+               wbt_exit(q);
  }
  EXPORT_SYMBOL_GPL(wbt_disable_default);
  
+/*
+ * Enable wbt if defaults are configured that way
+ */
+void wbt_enable_default(struct request_queue *q)
+{
+       /* Throttling already enabled? */
+       if (q->rq_wb)
+               return;
+
+       /* Queue not registered? Maybe shutting down... */
+       if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
+               return;
+
+       if ((q->mq_ops && IS_ENABLED(CONFIG_BLK_WBT_MQ)) ||
+           (q->request_fn && IS_ENABLED(CONFIG_BLK_WBT_SQ)))
+               wbt_init(q);
+}
+EXPORT_SYMBOL_GPL(wbt_enable_default);
+
  u64 wbt_default_latency_nsec(struct request_queue *q)
  {
         /*
@@ -694,29 +696,33 @@ u64 wbt_default_latency_nsec(struct request_queue *q)
                 return 75000000ULL;
  }
  
+static int wbt_data_dir(const struct request *rq)
+{
+       return rq_data_dir(rq);
+}
+
  int wbt_init(struct request_queue *q)
  {
         struct rq_wb *rwb;
         int i;
  
-       /*
-        * For now, we depend on the stats window being larger than
-        * our monitoring window. Ensure that this isn't inadvertently
-        * violated.
-        */
-       BUILD_BUG_ON(RWB_WINDOW_NSEC > BLK_STAT_NSEC);
         BUILD_BUG_ON(WBT_NR_BITS > BLK_STAT_RES_BITS);
  
         rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
         if (!rwb)
                 return -ENOMEM;
  
+       rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb);
+       if (!rwb->cb) {
+               kfree(rwb);
+               return -ENOMEM;
+       }
+
         for (i = 0; i < WBT_NUM_RWQ; i++) {
                 atomic_set(&rwb->rq_wait[i].inflight, 0);
                 init_waitqueue_head(&rwb->rq_wait[i].wait);
         }
  
-       setup_timer(&rwb->window_timer, wb_timer_fn, (unsigned long) rwb);
         rwb->wc = 1;
         rwb->queue_depth = RWB_DEF_DEPTH;
         rwb->last_comp = rwb->last_issue = jiffies;
@@ -726,10 +732,10 @@ int wbt_init(struct request_queue *q)
         wbt_update_limits(rwb);
  
         /*
-        * Assign rwb, and turn on stats tracking for this queue
+        * Assign rwb and add the stats callback.
          */
         q->rq_wb = rwb;
-       blk_stat_enable(q);
+       blk_stat_add_callback(q, rwb->cb);
  
         rwb->min_lat_nsec = wbt_default_latency_nsec(q);
  
@@ -744,7 +750,8 @@ void wbt_exit(struct request_queue *q)
         struct rq_wb *rwb = q->rq_wb;
  
         if (rwb) {
-               del_timer_sync(&rwb->window_timer);
+               blk_stat_remove_callback(q, rwb->cb);
+               blk_stat_free_callback(rwb->cb);
                 q->rq_wb = NULL;
                 kfree(rwb);
         }
diff --git a/block/blk-wbt.h b/block/blk-wbt.h

index 65f1de519f67ebd72780a07cd12a87a17eafe501..df6de50c5d594847b16aabb9d88e16c5ca2312d0 100644 (file)
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -32,27 +32,27 @@ enum {
  
  static inline void wbt_clear_state(struct blk_issue_stat *stat)
  {
-       stat->time &= BLK_STAT_TIME_MASK;
+       stat->stat &= ~BLK_STAT_RES_MASK;
  }
  
  static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat)
  {
-       return (stat->time & BLK_STAT_MASK) >> BLK_STAT_SHIFT;
+       return (stat->stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT;
  }
  
  static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags wb_acct)
  {
-       stat->time |= ((u64) wb_acct) << BLK_STAT_SHIFT;
+       stat->stat |= ((u64) wb_acct) << BLK_STAT_RES_SHIFT;
  }
  
  static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
  {
-       return (stat->time >> BLK_STAT_SHIFT) & WBT_TRACKED;
+       return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED;
  }
  
  static inline bool wbt_is_read(struct blk_issue_stat *stat)
  {
-       return (stat->time >> BLK_STAT_SHIFT) & WBT_READ;
+       return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
  }
  
  struct rq_wait {
@@ -81,7 +81,7 @@ struct rq_wb {
         u64 win_nsec;                           /* default window size */
         u64 cur_win_nsec;                       /* current window size */
  
-       struct timer_list window_timer;
+       struct blk_stat_callback *cb;
  
         s64 sync_issue;
         void *sync_cookie;
@@ -117,6 +117,7 @@ void wbt_update_limits(struct rq_wb *);
  void wbt_requeue(struct rq_wb *, struct blk_issue_stat *);
  void wbt_issue(struct rq_wb *, struct blk_issue_stat *);
  void wbt_disable_default(struct request_queue *);
+void wbt_enable_default(struct request_queue *);
  
  void wbt_set_queue_depth(struct rq_wb *, unsigned int);
  void wbt_set_write_cache(struct rq_wb *, bool);
@@ -155,6 +156,9 @@ static inline void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
  static inline void wbt_disable_default(struct request_queue *q)
  {
  }
+static inline void wbt_enable_default(struct request_queue *q)
+{
+}
  static inline void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth)
  {
  }
diff --git a/block/blk.h b/block/blk.h

index d1ea4bd9b9a3f8f24eba17f4e35fbddaebd23c7d..2ed70228e44fc706e6efee71ca000e5e47433217 100644 (file)
--- a/block/blk.h
+++ b/block/blk.h
@@ -60,15 +60,12 @@ void blk_free_flush_queue(struct blk_flush_queue *q);
  int blk_init_rl(struct request_list *rl, struct request_queue *q,
                 gfp_t gfp_mask);
  void blk_exit_rl(struct request_list *rl);
-void init_request_from_bio(struct request *req, struct bio *bio);
  void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
                         struct bio *bio);
  void blk_queue_bypass_start(struct request_queue *q);
  void blk_queue_bypass_end(struct request_queue *q);
  void blk_dequeue_request(struct request *rq);
  void __blk_queue_free_tags(struct request_queue *q);
-bool __blk_end_bidi_request(struct request *rq, int error,
-                           unsigned int nr_bytes, unsigned int bidi_bytes);
  void blk_freeze_queue(struct request_queue *q);
  
  static inline void blk_queue_enter_live(struct request_queue *q)
@@ -319,10 +316,22 @@ static inline struct io_context *create_io_context(gfp_t gfp_mask, int node)
  extern void blk_throtl_drain(struct request_queue *q);
  extern int blk_throtl_init(struct request_queue *q);
  extern void blk_throtl_exit(struct request_queue *q);
+extern void blk_throtl_register_queue(struct request_queue *q);
  #else /* CONFIG_BLK_DEV_THROTTLING */
  static inline void blk_throtl_drain(struct request_queue *q) { }
  static inline int blk_throtl_init(struct request_queue *q) { return 0; }
  static inline void blk_throtl_exit(struct request_queue *q) { }
+static inline void blk_throtl_register_queue(struct request_queue *q) { }
  #endif /* CONFIG_BLK_DEV_THROTTLING */
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
+extern ssize_t blk_throtl_sample_time_store(struct request_queue *q,
+       const char *page, size_t count);
+extern void blk_throtl_bio_endio(struct bio *bio);
+extern void blk_throtl_stat_add(struct request *rq, u64 time);
+#else
+static inline void blk_throtl_bio_endio(struct bio *bio) { }
+static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
+#endif
  
  #endif /* BLK_INTERNAL_H */
diff --git a/block/bsg-lib.c b/block/bsg-lib.c

index cd15f9dbb1474ad5aeb7b387420f9a6dbd021f95..0a23dbba2d3018edf10c49c774ebd5dd3ae79c87 100644 (file)
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -37,7 +37,7 @@ static void bsg_destroy_job(struct kref *kref)
         struct bsg_job *job = container_of(kref, struct bsg_job, kref);
         struct request *rq = job->req;
  
-       blk_end_request_all(rq, rq->errors);
+       blk_end_request_all(rq, scsi_req(rq)->result);
  
         put_device(job->dev);   /* release reference for the request */
  
@@ -74,7 +74,7 @@ void bsg_job_done(struct bsg_job *job, int result,
         struct scsi_request *rq = scsi_req(req);
         int err;
  
-       err = job->req->errors = result;
+       err = scsi_req(job->req)->result = result;
         if (err < 0)
                 /* we're only returning the result field in the reply */
                 rq->sense_len = sizeof(u32);
@@ -177,7 +177,7 @@ failjob_rls_job:
   * @q: request queue to manage
   *
   * On error the create_bsg_job function should return a -Exyz error value
- * that will be set to the req->errors.
+ * that will be set to ->result.
   *
   * Drivers/subsys should pass this to the queue init function.
   */
@@ -201,7 +201,7 @@ static void bsg_request_fn(struct request_queue *q)
  
                 ret = bsg_create_job(dev, req);
                 if (ret) {
-                       req->errors = ret;
+                       scsi_req(req)->result = ret;
                         blk_end_request_all(req, ret);
                         spin_lock_irq(q->queue_lock);
                         continue;
diff --git a/block/bsg.c b/block/bsg.c

index 74835dbf0c47c3fc1c6f2f80c9da53ee112402da..d9da1b613cedf6329ce3283de08a0b2f7a1fcb5d 100644 (file)
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -391,13 +391,13 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
         struct scsi_request *req = scsi_req(rq);
         int ret = 0;
  
-       dprintk("rq %p bio %p 0x%x\n", rq, bio, rq->errors);
+       dprintk("rq %p bio %p 0x%x\n", rq, bio, req->result);
         /*
          * fill in all the output members
          */
-       hdr->device_status = rq->errors & 0xff;
-       hdr->transport_status = host_byte(rq->errors);
-       hdr->driver_status = driver_byte(rq->errors);
+       hdr->device_status = req->result & 0xff;
+       hdr->transport_status = host_byte(req->result);
+       hdr->driver_status = driver_byte(req->result);
         hdr->info = 0;
         if (hdr->device_status || hdr->transport_status || hdr->driver_status)
                 hdr->info |= SG_INFO_CHECK;
@@ -431,8 +431,8 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
          * just a protocol response (i.e. non negative), that gets
          * processed above.
          */
-       if (!ret && rq->errors < 0)
-               ret = rq->errors;
+       if (!ret && req->result < 0)
+               ret = req->result;
  
         blk_rq_unmap_user(bio);
         scsi_req_free_cmd(req);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c

index 440b95ee593c97a81b749f02809dbc5e2ce42b72..da69b079725fbf62a407db76f7c5c430c52be3f9 100644 (file)
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3761,16 +3761,14 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
  }
  
  #ifdef CONFIG_CFQ_GROUP_IOSCHED
-static bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
+static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
  {
         struct cfq_data *cfqd = cic_to_cfqd(cic);
         struct cfq_queue *cfqq;
         uint64_t serial_nr;
-       bool nonroot_cg;
  
         rcu_read_lock();
         serial_nr = bio_blkcg(bio)->css.serial_nr;
-       nonroot_cg = bio_blkcg(bio) != &blkcg_root;
         rcu_read_unlock();
  
         /*
@@ -3778,7 +3776,7 @@ static bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
          * spuriously on a newly created cic but there's no harm.
          */
         if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
-               return nonroot_cg;
+               return;
  
         /*
          * Drop reference to queues.  New queues will be assigned in new
@@ -3799,12 +3797,10 @@ static bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
         }
  
         cic->blkcg_serial_nr = serial_nr;
-       return nonroot_cg;
  }
  #else
-static inline bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
+static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
  {
-       return false;
  }
  #endif  /* CONFIG_CFQ_GROUP_IOSCHED */
  
@@ -4449,12 +4445,11 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
         const int rw = rq_data_dir(rq);
         const bool is_sync = rq_is_sync(rq);
         struct cfq_queue *cfqq;
-       bool disable_wbt;
  
         spin_lock_irq(q->queue_lock);
  
         check_ioprio_changed(cic, bio);
-       disable_wbt = check_blkcg_changed(cic, bio);
+       check_blkcg_changed(cic, bio);
  new_queue:
         cfqq = cic_to_cfqq(cic, is_sync);
         if (!cfqq || cfqq == &cfqd->oom_cfqq) {
@@ -4491,9 +4486,6 @@ new_queue:
         rq->elv.priv[1] = cfqq->cfqg;
         spin_unlock_irq(q->queue_lock);
  
-       if (disable_wbt)
-               wbt_disable_default(q);
-
         return 0;
  }
  
@@ -4706,6 +4698,7 @@ static void cfq_registered_queue(struct request_queue *q)
          */
         if (blk_queue_nonrot(q))
                 cfqd->cfq_slice_idle = 0;
+       wbt_disable_default(q);
  }
  
  /*
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c

index 570021a0dc1ca5b903c990c1ae26d5913e71cd47..04325b81c2b410ed560b43af82a4a906e040f16d 100644 (file)
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -685,7 +685,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
         case BLKALIGNOFF:
                 return compat_put_int(arg, bdev_alignment_offset(bdev));
         case BLKDISCARDZEROES:
-               return compat_put_uint(arg, bdev_discard_zeroes_data(bdev));
+               return compat_put_uint(arg, 0);
         case BLKFLSBUF:
         case BLKROSET:
         case BLKDISCARD:
diff --git a/block/elevator.c b/block/elevator.c

index 01139f549b5be73047f346153f5b8fedcb23b3d0..bf11e70f008b10692cdc290e14fc351f30f972c9 100644 (file)
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -41,6 +41,7 @@
  
  #include "blk.h"
  #include "blk-mq-sched.h"
+#include "blk-wbt.h"
  
  static DEFINE_SPINLOCK(elv_list_lock);
  static LIST_HEAD(elv_list);
@@ -242,26 +243,21 @@ int elevator_init(struct request_queue *q, char *name)
                 }
         }
  
-       if (e->uses_mq) {
-               err = blk_mq_sched_setup(q);
-               if (!err)
-                       err = e->ops.mq.init_sched(q, e);
-       } else
+       if (e->uses_mq)
+               err = blk_mq_init_sched(q, e);
+       else
                 err = e->ops.sq.elevator_init_fn(q, e);
-       if (err) {
-               if (e->uses_mq)
-                       blk_mq_sched_teardown(q);
+       if (err)
                 elevator_put(e);
-       }
         return err;
  }
  EXPORT_SYMBOL(elevator_init);
  
-void elevator_exit(struct elevator_queue *e)
+void elevator_exit(struct request_queue *q, struct elevator_queue *e)
  {
         mutex_lock(&e->sysfs_lock);
         if (e->uses_mq && e->type->ops.mq.exit_sched)
-               e->type->ops.mq.exit_sched(e);
+               blk_mq_exit_sched(q, e);
         else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
                 e->type->ops.sq.elevator_exit_fn(e);
         mutex_unlock(&e->sysfs_lock);
@@ -882,6 +878,8 @@ void elv_unregister_queue(struct request_queue *q)
                 kobject_uevent(&e->kobj, KOBJ_REMOVE);
                 kobject_del(&e->kobj);
                 e->registered = 0;
+               /* Re-enable throttling in case elevator disabled it */
+               wbt_enable_default(q);
         }
  }
  EXPORT_SYMBOL(elv_unregister_queue);
@@ -946,6 +944,45 @@ void elv_unregister(struct elevator_type *e)
  }
  EXPORT_SYMBOL_GPL(elv_unregister);
  
+static int elevator_switch_mq(struct request_queue *q,
+                             struct elevator_type *new_e)
+{
+       int ret;
+
+       blk_mq_freeze_queue(q);
+       blk_mq_quiesce_queue(q);
+
+       if (q->elevator) {
+               if (q->elevator->registered)
+                       elv_unregister_queue(q);
+               ioc_clear_queue(q);
+               elevator_exit(q, q->elevator);
+       }
+
+       ret = blk_mq_init_sched(q, new_e);
+       if (ret)
+               goto out;
+
+       if (new_e) {
+               ret = elv_register_queue(q);
+               if (ret) {
+                       elevator_exit(q, q->elevator);
+                       goto out;
+               }
+       }
+
+       if (new_e)
+               blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
+       else
+               blk_add_trace_msg(q, "elv switch: none");
+
+out:
+       blk_mq_unfreeze_queue(q);
+       blk_mq_start_stopped_hw_queues(q, true);
+       return ret;
+
+}
+
  /*
   * switch to new_e io scheduler. be careful not to introduce deadlocks -
   * we don't free the old io scheduler, before we have allocated what we
@@ -958,10 +995,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
         bool old_registered = false;
         int err;
  
-       if (q->mq_ops) {
-               blk_mq_freeze_queue(q);
-               blk_mq_quiesce_queue(q);
-       }
+       if (q->mq_ops)
+               return elevator_switch_mq(q, new_e);
  
         /*
          * Turn on BYPASS and drain all requests w/ elevator private data.
@@ -973,11 +1008,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
         if (old) {
                 old_registered = old->registered;
  
-               if (old->uses_mq)
-                       blk_mq_sched_teardown(q);
-
-               if (!q->mq_ops)
-                       blk_queue_bypass_start(q);
+               blk_queue_bypass_start(q);
  
                 /* unregister and clear all auxiliary data of the old elevator */
                 if (old_registered)
@@ -987,56 +1018,32 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
         }
  
         /* allocate, init and register new elevator */
-       if (new_e) {
-               if (new_e->uses_mq) {
-                       err = blk_mq_sched_setup(q);
-                       if (!err)
-                               err = new_e->ops.mq.init_sched(q, new_e);
-               } else
-                       err = new_e->ops.sq.elevator_init_fn(q, new_e);
-               if (err)
-                       goto fail_init;
+       err = new_e->ops.sq.elevator_init_fn(q, new_e);
+       if (err)
+               goto fail_init;
  
-               err = elv_register_queue(q);
-               if (err)
-                       goto fail_register;
-       } else
-               q->elevator = NULL;
+       err = elv_register_queue(q);
+       if (err)
+               goto fail_register;
  
         /* done, kill the old one and finish */
         if (old) {
-               elevator_exit(old);
-               if (!q->mq_ops)
-                       blk_queue_bypass_end(q);
+               elevator_exit(q, old);
+               blk_queue_bypass_end(q);
         }
  
-       if (q->mq_ops) {
-               blk_mq_unfreeze_queue(q);
-               blk_mq_start_stopped_hw_queues(q, true);
-       }
-
-       if (new_e)
-               blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
-       else
-               blk_add_trace_msg(q, "elv switch: none");
+       blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
  
         return 0;
  
  fail_register:
-       if (q->mq_ops)
-               blk_mq_sched_teardown(q);
-       elevator_exit(q->elevator);
+       elevator_exit(q, q->elevator);
  fail_init:
         /* switch failed, restore and re-register old elevator */
         if (old) {
                 q->elevator = old;
                 elv_register_queue(q);
-               if (!q->mq_ops)
-                       blk_queue_bypass_end(q);
-       }
-       if (q->mq_ops) {
-               blk_mq_unfreeze_queue(q);
-               blk_mq_start_stopped_hw_queues(q, true);
+               blk_queue_bypass_end(q);
         }
  
         return err;
@@ -1094,12 +1101,20 @@ int elevator_change(struct request_queue *q, const char *name)
  }
  EXPORT_SYMBOL(elevator_change);
  
+static inline bool elv_support_iosched(struct request_queue *q)
+{
+       if (q->mq_ops && q->tag_set && (q->tag_set->flags &
+                               BLK_MQ_F_NO_SCHED))
+               return false;
+       return true;
+}
+
  ssize_t elv_iosched_store(struct request_queue *q, const char *name,
                           size_t count)
  {
         int ret;
  
-       if (!(q->mq_ops || q->request_fn))
+       if (!(q->mq_ops || q->request_fn) || !elv_support_iosched(q))
                 return count;
  
         ret = __elevator_change(q, name);
@@ -1131,7 +1146,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name)
                         len += sprintf(name+len, "[%s] ", elv->elevator_name);
                         continue;
                 }
-               if (__e->uses_mq && q->mq_ops)
+               if (__e->uses_mq && q->mq_ops && elv_support_iosched(q))
                         len += sprintf(name+len, "%s ", __e->elevator_name);
                 else if (!__e->uses_mq && !q->mq_ops)
                         len += sprintf(name+len, "%s ", __e->elevator_name);
diff --git a/block/genhd.c b/block/genhd.c

index b26a5ea115d00b51f20c8e59f09a420b2dbadc9f..9a2d01abfa3b453058dd1c44f36cf4647757d7c4 100644 (file)
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -572,20 +572,6 @@ exit:
         disk_part_iter_exit(&piter);
  }
  
-void put_disk_devt(struct disk_devt *disk_devt)
-{
-       if (disk_devt && atomic_dec_and_test(&disk_devt->count))
-               disk_devt->release(disk_devt);
-}
-EXPORT_SYMBOL(put_disk_devt);
-
-void get_disk_devt(struct disk_devt *disk_devt)
-{
-       if (disk_devt)
-               atomic_inc(&disk_devt->count);
-}
-EXPORT_SYMBOL(get_disk_devt);
-
  /**
   * device_add_disk - add partitioning information to kernel list
   * @parent: parent device for the disk
@@ -626,13 +612,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
  
         disk_alloc_events(disk);
  
-       /*
-        * Take a reference on the devt and assign it to queue since it
-        * must not be reallocated while the bdi is registered
-        */
-       disk->queue->disk_devt = disk->disk_devt;
-       get_disk_devt(disk->disk_devt);
-
         /* Register BDI before referencing it from bdev */
         bdi = disk->queue->backing_dev_info;
         bdi_register_owner(bdi, disk_to_dev(disk));
@@ -681,12 +660,16 @@ void del_gendisk(struct gendisk *disk)
         disk->flags &= ~GENHD_FL_UP;
  
         sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
-       /*
-        * Unregister bdi before releasing device numbers (as they can get
-        * reused and we'd get clashes in sysfs).
-        */
-       bdi_unregister(disk->queue->backing_dev_info);
-       blk_unregister_queue(disk);
+       if (disk->queue) {
+               /*
+                * Unregister bdi before releasing device numbers (as they can
+                * get reused and we'd get clashes in sysfs).
+                */
+               bdi_unregister(disk->queue->backing_dev_info);
+               blk_unregister_queue(disk);
+       } else {
+               WARN_ON(1);
+       }
         blk_unregister_region(disk_devt(disk), disk->minors);
  
         part_stat_set_all(&disk->part0, 0);
@@ -1077,8 +1060,19 @@ static struct attribute *disk_attrs[] = {
         NULL
  };
  
+static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+       struct device *dev = container_of(kobj, typeof(*dev), kobj);
+       struct gendisk *disk = dev_to_disk(dev);
+
+       if (a == &dev_attr_badblocks.attr && !disk->bb)
+               return 0;
+       return a->mode;
+}
+
  static struct attribute_group disk_attr_group = {
         .attrs = disk_attrs,
+       .is_visible = disk_visible,
  };
  
  static const struct attribute_group *disk_attr_groups[] = {
@@ -1369,7 +1363,7 @@ struct kobject *get_disk(struct gendisk *disk)
         owner = disk->fops->owner;
         if (owner && !try_module_get(owner))
                 return NULL;
-       kobj = kobject_get(&disk_to_dev(disk)->kobj);
+       kobj = kobject_get_unless_zero(&disk_to_dev(disk)->kobj);
         if (kobj == NULL) {
                 module_put(owner);
                 return NULL;
diff --git a/block/ioctl.c b/block/ioctl.c

index 7b88820b93d9d92a8368d29d3bda2d2e51f5f905..0de02ee67eed89282a7651605d622db4206cf5fa 100644 (file)
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -255,7 +255,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
         truncate_inode_pages_range(mapping, start, end);
  
         return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
-                                   false);
+                       BLKDEV_ZERO_NOUNMAP);
  }
  
  static int put_ushort(unsigned long arg, unsigned short val)
@@ -547,7 +547,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
         case BLKALIGNOFF:
                 return put_int(arg, bdev_alignment_offset(bdev));
         case BLKDISCARDZEROES:
-               return put_uint(arg, bdev_discard_zeroes_data(bdev));
+               return put_uint(arg, 0);
         case BLKSECTGET:
                 max_sectors = min_t(unsigned int, USHRT_MAX,
                                     queue_max_sectors(bdev_get_queue(bdev)));
diff --git a/block/ioprio.c b/block/ioprio.c

index 0c47a00f92a852542dc11be132a30d44e9af055c..4b120c9cf7e8b1bd5ce45a55371454d9785f0c37 100644 (file)
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -163,22 +163,12 @@ out:
  
  int ioprio_best(unsigned short aprio, unsigned short bprio)
  {
-       unsigned short aclass;
-       unsigned short bclass;
-
         if (!ioprio_valid(aprio))
                 aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
         if (!ioprio_valid(bprio))
                 bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
  
-       aclass = IOPRIO_PRIO_CLASS(aprio);
-       bclass = IOPRIO_PRIO_CLASS(bprio);
-       if (aclass == bclass)
-               return min(aprio, bprio);
-       if (aclass > bclass)
-               return bprio;
-       else
-               return aprio;
+       return min(aprio, bprio);
  }
  
  SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c

new file mode 100644 (file)

index 0000000..3b0090b
--- /dev/null
+++ b/block/kyber-iosched.c
@@ -0,0 +1,719 @@
+/*
+ * The Kyber I/O scheduler. Controls latency by throttling queue depths using
+ * scalable techniques.
+ *
+ * Copyright (C) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
+#include <linux/elevator.h>
+#include <linux/module.h>
+#include <linux/sbitmap.h>
+
+#include "blk.h"
+#include "blk-mq.h"
+#include "blk-mq-sched.h"
+#include "blk-mq-tag.h"
+#include "blk-stat.h"
+
+/* Scheduling domains. */
+enum {
+       KYBER_READ,
+       KYBER_SYNC_WRITE,
+       KYBER_OTHER, /* Async writes, discard, etc. */
+       KYBER_NUM_DOMAINS,
+};
+
+enum {
+       KYBER_MIN_DEPTH = 256,
+
+       /*
+        * In order to prevent starvation of synchronous requests by a flood of
+        * asynchronous requests, we reserve 25% of requests for synchronous
+        * operations.
+        */
+       KYBER_ASYNC_PERCENT = 75,
+};
+
+/*
+ * Initial device-wide depths for each scheduling domain.
+ *
+ * Even for fast devices with lots of tags like NVMe, you can saturate
+ * the device with only a fraction of the maximum possible queue depth.
+ * So, we cap these to a reasonable value.
+ */
+static const unsigned int kyber_depth[] = {
+       [KYBER_READ] = 256,
+       [KYBER_SYNC_WRITE] = 128,
+       [KYBER_OTHER] = 64,
+};
+
+/*
+ * Scheduling domain batch sizes. We favor reads.
+ */
+static const unsigned int kyber_batch_size[] = {
+       [KYBER_READ] = 16,
+       [KYBER_SYNC_WRITE] = 8,
+       [KYBER_OTHER] = 8,
+};
+
+struct kyber_queue_data {
+       struct request_queue *q;
+
+       struct blk_stat_callback *cb;
+
+       /*
+        * The device is divided into multiple scheduling domains based on the
+        * request type. Each domain has a fixed number of in-flight requests of
+        * that type device-wide, limited by these tokens.
+        */
+       struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
+
+       /*
+        * Async request percentage, converted to per-word depth for
+        * sbitmap_get_shallow().
+        */
+       unsigned int async_depth;
+
+       /* Target latencies in nanoseconds. */
+       u64 read_lat_nsec, write_lat_nsec;
+};
+
+struct kyber_hctx_data {
+       spinlock_t lock;
+       struct list_head rqs[KYBER_NUM_DOMAINS];
+       unsigned int cur_domain;
+       unsigned int batching;
+       wait_queue_t domain_wait[KYBER_NUM_DOMAINS];
+       atomic_t wait_index[KYBER_NUM_DOMAINS];
+};
+
+static int rq_sched_domain(const struct request *rq)
+{
+       unsigned int op = rq->cmd_flags;
+
+       if ((op & REQ_OP_MASK) == REQ_OP_READ)
+               return KYBER_READ;
+       else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
+               return KYBER_SYNC_WRITE;
+       else
+               return KYBER_OTHER;
+}
+
+enum {
+       NONE = 0,
+       GOOD = 1,
+       GREAT = 2,
+       BAD = -1,
+       AWFUL = -2,
+};
+
+#define IS_GOOD(status) ((status) > 0)
+#define IS_BAD(status) ((status) < 0)
+
+static int kyber_lat_status(struct blk_stat_callback *cb,
+                           unsigned int sched_domain, u64 target)
+{
+       u64 latency;
+
+       if (!cb->stat[sched_domain].nr_samples)
+               return NONE;
+
+       latency = cb->stat[sched_domain].mean;
+       if (latency >= 2 * target)
+               return AWFUL;
+       else if (latency > target)
+               return BAD;
+       else if (latency <= target / 2)
+               return GREAT;
+       else /* (latency <= target) */
+               return GOOD;
+}
+
+/*
+ * Adjust the read or synchronous write depth given the status of reads and
+ * writes. The goal is that the latencies of the two domains are fair (i.e., if
+ * one is good, then the other is good).
+ */
+static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd,
+                                 unsigned int sched_domain, int this_status,
+                                 int other_status)
+{
+       unsigned int orig_depth, depth;
+
+       /*
+        * If this domain had no samples, or reads and writes are both good or
+        * both bad, don't adjust the depth.
+        */
+       if (this_status == NONE ||
+           (IS_GOOD(this_status) && IS_GOOD(other_status)) ||
+           (IS_BAD(this_status) && IS_BAD(other_status)))
+               return;
+
+       orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth;
+
+       if (other_status == NONE) {
+               depth++;
+       } else {
+               switch (this_status) {
+               case GOOD:
+                       if (other_status == AWFUL)
+                               depth -= max(depth / 4, 1U);
+                       else
+                               depth -= max(depth / 8, 1U);
+                       break;
+               case GREAT:
+                       if (other_status == AWFUL)
+                               depth /= 2;
+                       else
+                               depth -= max(depth / 4, 1U);
+                       break;
+               case BAD:
+                       depth++;
+                       break;
+               case AWFUL:
+                       if (other_status == GREAT)
+                               depth += 2;
+                       else
+                               depth++;
+                       break;
+               }
+       }
+
+       depth = clamp(depth, 1U, kyber_depth[sched_domain]);
+       if (depth != orig_depth)
+               sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
+}
+
+/*
+ * Adjust the depth of other requests given the status of reads and synchronous
+ * writes. As long as either domain is doing fine, we don't throttle, but if
+ * both domains are doing badly, we throttle heavily.
+ */
+static void kyber_adjust_other_depth(struct kyber_queue_data *kqd,
+                                    int read_status, int write_status,
+                                    bool have_samples)
+{
+       unsigned int orig_depth, depth;
+       int status;
+
+       orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth;
+
+       if (read_status == NONE && write_status == NONE) {
+               depth += 2;
+       } else if (have_samples) {
+               if (read_status == NONE)
+                       status = write_status;
+               else if (write_status == NONE)
+                       status = read_status;
+               else
+                       status = max(read_status, write_status);
+               switch (status) {
+               case GREAT:
+                       depth += 2;
+                       break;
+               case GOOD:
+                       depth++;
+                       break;
+               case BAD:
+                       depth -= max(depth / 4, 1U);
+                       break;
+               case AWFUL:
+                       depth /= 2;
+                       break;
+               }
+       }
+
+       depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]);
+       if (depth != orig_depth)
+               sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth);
+}
+
+/*
+ * Apply heuristics for limiting queue depths based on gathered latency
+ * statistics.
+ */
+static void kyber_stat_timer_fn(struct blk_stat_callback *cb)
+{
+       struct kyber_queue_data *kqd = cb->data;
+       int read_status, write_status;
+
+       read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec);
+       write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec);
+
+       kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status);
+       kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status);
+       kyber_adjust_other_depth(kqd, read_status, write_status,
+                                cb->stat[KYBER_OTHER].nr_samples != 0);
+
+       /*
+        * Continue monitoring latencies if we aren't hitting the targets or
+        * we're still throttling other requests.
+        */
+       if (!blk_stat_is_active(kqd->cb) &&
+           ((IS_BAD(read_status) || IS_BAD(write_status) ||
+             kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER])))
+               blk_stat_activate_msecs(kqd->cb, 100);
+}
+
+static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
+{
+       /*
+        * All of the hardware queues have the same depth, so we can just grab
+        * the shift of the first one.
+        */
+       return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
+}
+
+static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
+{
+       struct kyber_queue_data *kqd;
+       unsigned int max_tokens;
+       unsigned int shift;
+       int ret = -ENOMEM;
+       int i;
+
+       kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
+       if (!kqd)
+               goto err;
+       kqd->q = q;
+
+       kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, rq_sched_domain,
+                                         KYBER_NUM_DOMAINS, kqd);
+       if (!kqd->cb)
+               goto err_kqd;
+
+       /*
+        * The maximum number of tokens for any scheduling domain is at least
+        * the queue depth of a single hardware queue. If the hardware doesn't
+        * have many tags, still provide a reasonable number.
+        */
+       max_tokens = max_t(unsigned int, q->tag_set->queue_depth,
+                          KYBER_MIN_DEPTH);
+       for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
+               WARN_ON(!kyber_depth[i]);
+               WARN_ON(!kyber_batch_size[i]);
+               ret = sbitmap_queue_init_node(&kqd->domain_tokens[i],
+                                             max_tokens, -1, false, GFP_KERNEL,
+                                             q->node);
+               if (ret) {
+                       while (--i >= 0)
+                               sbitmap_queue_free(&kqd->domain_tokens[i]);
+                       goto err_cb;
+               }
+               sbitmap_queue_resize(&kqd->domain_tokens[i], kyber_depth[i]);
+       }
+
+       shift = kyber_sched_tags_shift(kqd);
+       kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
+
+       kqd->read_lat_nsec = 2000000ULL;
+       kqd->write_lat_nsec = 10000000ULL;
+
+       return kqd;
+
+err_cb:
+       blk_stat_free_callback(kqd->cb);
+err_kqd:
+       kfree(kqd);
+err:
+       return ERR_PTR(ret);
+}
+
+static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
+{
+       struct kyber_queue_data *kqd;
+       struct elevator_queue *eq;
+
+       eq = elevator_alloc(q, e);
+       if (!eq)
+               return -ENOMEM;
+
+       kqd = kyber_queue_data_alloc(q);
+       if (IS_ERR(kqd)) {
+               kobject_put(&eq->kobj);
+               return PTR_ERR(kqd);
+       }
+
+       eq->elevator_data = kqd;
+       q->elevator = eq;
+
+       blk_stat_add_callback(q, kqd->cb);
+
+       return 0;
+}
+
+static void kyber_exit_sched(struct elevator_queue *e)
+{
+       struct kyber_queue_data *kqd = e->elevator_data;
+       struct request_queue *q = kqd->q;
+       int i;
+
+       blk_stat_remove_callback(q, kqd->cb);
+
+       for (i = 0; i < KYBER_NUM_DOMAINS; i++)
+               sbitmap_queue_free(&kqd->domain_tokens[i]);
+       blk_stat_free_callback(kqd->cb);
+       kfree(kqd);
+}
+
+static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
+       struct kyber_hctx_data *khd;
+       int i;
+
+       khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node);
+       if (!khd)
+               return -ENOMEM;
+
+       spin_lock_init(&khd->lock);
+
+       for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
+               INIT_LIST_HEAD(&khd->rqs[i]);
+               INIT_LIST_HEAD(&khd->domain_wait[i].task_list);
+               atomic_set(&khd->wait_index[i], 0);
+       }
+
+       khd->cur_domain = 0;
+       khd->batching = 0;
+
+       hctx->sched_data = khd;
+
+       return 0;
+}
+
+static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
+       kfree(hctx->sched_data);
+}
+
+static int rq_get_domain_token(struct request *rq)
+{
+       return (long)rq->elv.priv[0];
+}
+
+static void rq_set_domain_token(struct request *rq, int token)
+{
+       rq->elv.priv[0] = (void *)(long)token;
+}
+
+static void rq_clear_domain_token(struct kyber_queue_data *kqd,
+                                 struct request *rq)
+{
+       unsigned int sched_domain;
+       int nr;
+
+       nr = rq_get_domain_token(rq);
+       if (nr != -1) {
+               sched_domain = rq_sched_domain(rq);
+               sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
+                                   rq->mq_ctx->cpu);
+       }
+}
+
+static struct request *kyber_get_request(struct request_queue *q,
+                                        unsigned int op,
+                                        struct blk_mq_alloc_data *data)
+{
+       struct kyber_queue_data *kqd = q->elevator->elevator_data;
+       struct request *rq;
+
+       /*
+        * We use the scheduler tags as per-hardware queue queueing tokens.
+        * Async requests can be limited at this stage.
+        */
+       if (!op_is_sync(op))
+               data->shallow_depth = kqd->async_depth;
+
+       rq = __blk_mq_alloc_request(data, op);
+       if (rq)
+               rq_set_domain_token(rq, -1);
+       return rq;
+}
+
+static void kyber_put_request(struct request *rq)
+{
+       struct request_queue *q = rq->q;
+       struct kyber_queue_data *kqd = q->elevator->elevator_data;
+
+       rq_clear_domain_token(kqd, rq);
+       blk_mq_finish_request(rq);
+}
+
+static void kyber_completed_request(struct request *rq)
+{
+       struct request_queue *q = rq->q;
+       struct kyber_queue_data *kqd = q->elevator->elevator_data;
+       unsigned int sched_domain;
+       u64 now, latency, target;
+
+       /*
+        * Check if this request met our latency goal. If not, quickly gather
+        * some statistics and start throttling.
+        */
+       sched_domain = rq_sched_domain(rq);
+       switch (sched_domain) {
+       case KYBER_READ:
+               target = kqd->read_lat_nsec;
+               break;
+       case KYBER_SYNC_WRITE:
+               target = kqd->write_lat_nsec;
+               break;
+       default:
+               return;
+       }
+
+       /* If we are already monitoring latencies, don't check again. */
+       if (blk_stat_is_active(kqd->cb))
+               return;
+
+       now = __blk_stat_time(ktime_to_ns(ktime_get()));
+       if (now < blk_stat_time(&rq->issue_stat))
+               return;
+
+       latency = now - blk_stat_time(&rq->issue_stat);
+
+       if (latency > target)
+               blk_stat_activate_msecs(kqd->cb, 10);
+}
+
+static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
+                                 struct blk_mq_hw_ctx *hctx)
+{
+       LIST_HEAD(rq_list);
+       struct request *rq, *next;
+
+       blk_mq_flush_busy_ctxs(hctx, &rq_list);
+       list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
+               unsigned int sched_domain;
+
+               sched_domain = rq_sched_domain(rq);
+               list_move_tail(&rq->queuelist, &khd->rqs[sched_domain]);
+       }
+}
+
+static int kyber_domain_wake(wait_queue_t *wait, unsigned mode, int flags,
+                            void *key)
+{
+       struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private);
+
+       list_del_init(&wait->task_list);
+       blk_mq_run_hw_queue(hctx, true);
+       return 1;
+}
+
+static int kyber_get_domain_token(struct kyber_queue_data *kqd,
+                                 struct kyber_hctx_data *khd,
+                                 struct blk_mq_hw_ctx *hctx)
+{
+       unsigned int sched_domain = khd->cur_domain;
+       struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
+       wait_queue_t *wait = &khd->domain_wait[sched_domain];
+       struct sbq_wait_state *ws;
+       int nr;
+
+       nr = __sbitmap_queue_get(domain_tokens);
+       if (nr >= 0)
+               return nr;
+
+       /*
+        * If we failed to get a domain token, make sure the hardware queue is
+        * run when one becomes available. Note that this is serialized on
+        * khd->lock, but we still need to be careful about the waker.
+        */
+       if (list_empty_careful(&wait->task_list)) {
+               init_waitqueue_func_entry(wait, kyber_domain_wake);
+               wait->private = hctx;
+               ws = sbq_wait_ptr(domain_tokens,
+                                 &khd->wait_index[sched_domain]);
+               add_wait_queue(&ws->wait, wait);
+
+               /*
+                * Try again in case a token was freed before we got on the wait
+                * queue.
+                */
+               nr = __sbitmap_queue_get(domain_tokens);
+       }
+       return nr;
+}
+
+static struct request *
+kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
+                         struct kyber_hctx_data *khd,
+                         struct blk_mq_hw_ctx *hctx,
+                         bool *flushed)
+{
+       struct list_head *rqs;
+       struct request *rq;
+       int nr;
+
+       rqs = &khd->rqs[khd->cur_domain];
+       rq = list_first_entry_or_null(rqs, struct request, queuelist);
+
+       /*
+        * If there wasn't already a pending request and we haven't flushed the
+        * software queues yet, flush the software queues and check again.
+        */
+       if (!rq && !*flushed) {
+               kyber_flush_busy_ctxs(khd, hctx);
+               *flushed = true;
+               rq = list_first_entry_or_null(rqs, struct request, queuelist);
+       }
+
+       if (rq) {
+               nr = kyber_get_domain_token(kqd, khd, hctx);
+               if (nr >= 0) {
+                       khd->batching++;
+                       rq_set_domain_token(rq, nr);
+                       list_del_init(&rq->queuelist);
+                       return rq;
+               }
+       }
+
+       /* There were either no pending requests or no tokens. */
+       return NULL;
+}
+
+static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
+{
+       struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
+       struct kyber_hctx_data *khd = hctx->sched_data;
+       bool flushed = false;
+       struct request *rq;
+       int i;
+
+       spin_lock(&khd->lock);
+
+       /*
+        * First, if we are still entitled to batch, try to dispatch a request
+        * from the batch.
+        */
+       if (khd->batching < kyber_batch_size[khd->cur_domain]) {
+               rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
+               if (rq)
+                       goto out;
+       }
+
+       /*
+        * Either,
+        * 1. We were no longer entitled to a batch.
+        * 2. The domain we were batching didn't have any requests.
+        * 3. The domain we were batching was out of tokens.
+        *
+        * Start another batch. Note that this wraps back around to the original
+        * domain if no other domains have requests or tokens.
+        */
+       khd->batching = 0;
+       for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
+               if (khd->cur_domain == KYBER_NUM_DOMAINS - 1)
+                       khd->cur_domain = 0;
+               else
+                       khd->cur_domain++;
+
+               rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
+               if (rq)
+                       goto out;
+       }
+
+       rq = NULL;
+out:
+       spin_unlock(&khd->lock);
+       return rq;
+}
+
+static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
+{
+       struct kyber_hctx_data *khd = hctx->sched_data;
+       int i;
+
+       for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
+               if (!list_empty_careful(&khd->rqs[i]))
+                       return true;
+       }
+       return false;
+}
+
+#define KYBER_LAT_SHOW_STORE(op)                                       \
+static ssize_t kyber_##op##_lat_show(struct elevator_queue *e,         \
+                                    char *page)                        \
+{                                                                      \
+       struct kyber_queue_data *kqd = e->elevator_data;                \
+                                                                       \
+       return sprintf(page, "%llu\n", kqd->op##_lat_nsec);             \
+}                                                                      \
+                                                                       \
+static ssize_t kyber_##op##_lat_store(struct elevator_queue *e,                \
+                                     const char *page, size_t count)   \
+{                                                                      \
+       struct kyber_queue_data *kqd = e->elevator_data;                \
+       unsigned long long nsec;                                        \
+       int ret;                                                        \
+                                                                       \
+       ret = kstrtoull(page, 10, &nsec);                               \
+       if (ret)                                                        \
+               return ret;                                             \
+                                                                       \
+       kqd->op##_lat_nsec = nsec;                                      \
+                                                                       \
+       return count;                                                   \
+}
+KYBER_LAT_SHOW_STORE(read);
+KYBER_LAT_SHOW_STORE(write);
+#undef KYBER_LAT_SHOW_STORE
+
+#define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
+static struct elv_fs_entry kyber_sched_attrs[] = {
+       KYBER_LAT_ATTR(read),
+       KYBER_LAT_ATTR(write),
+       __ATTR_NULL
+};
+#undef KYBER_LAT_ATTR
+
+static struct elevator_type kyber_sched = {
+       .ops.mq = {
+               .init_sched = kyber_init_sched,
+               .exit_sched = kyber_exit_sched,
+               .init_hctx = kyber_init_hctx,
+               .exit_hctx = kyber_exit_hctx,
+               .get_request = kyber_get_request,
+               .put_request = kyber_put_request,
+               .completed_request = kyber_completed_request,
+               .dispatch_request = kyber_dispatch_request,
+               .has_work = kyber_has_work,
+       },
+       .uses_mq = true,
+       .elevator_attrs = kyber_sched_attrs,
+       .elevator_name = "kyber",
+       .elevator_owner = THIS_MODULE,
+};
+
+static int __init kyber_init(void)
+{
+       return elv_register(&kyber_sched);
+}
+
+static void __exit kyber_exit(void)
+{
+       elv_unregister(&kyber_sched);
+}
+
+module_init(kyber_init);
+module_exit(kyber_exit);
+
+MODULE_AUTHOR("Omar Sandoval");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Kyber I/O scheduler");
diff --git a/block/partition-generic.c b/block/partition-generic.c

index 7afb9907821fb7abe61d76ac1f3ce408bb14dbd5..0171a2faad6814754a42ed29ec001e9aacdc844d 100644 (file)
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -497,7 +497,6 @@ rescan:
  
         if (disk->fops->revalidate_disk)
                 disk->fops->revalidate_disk(disk);
-       blk_integrity_revalidate(disk);
         check_disk_size_change(disk, bdev);
         bdev->bd_invalidated = 0;
         if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c

index 2a2fc768b27ad81583424252f13b89303269b755..4a294a5f7fab2002e105a6b2ca475ad0e9738b83 100644 (file)
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -262,11 +262,11 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
         /*
          * fill in all the output members
          */
-       hdr->status = rq->errors & 0xff;
-       hdr->masked_status = status_byte(rq->errors);
-       hdr->msg_status = msg_byte(rq->errors);
-       hdr->host_status = host_byte(rq->errors);
-       hdr->driver_status = driver_byte(rq->errors);
+       hdr->status = req->result & 0xff;
+       hdr->masked_status = status_byte(req->result);
+       hdr->msg_status = msg_byte(req->result);
+       hdr->host_status = host_byte(req->result);
+       hdr->driver_status = driver_byte(req->result);
         hdr->info = 0;
         if (hdr->masked_status || hdr->host_status || hdr->driver_status)
                 hdr->info |= SG_INFO_CHECK;
@@ -362,7 +362,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
                 goto out_free_cdb;
  
         bio = rq->bio;
-       rq->retries = 0;
+       req->retries = 0;
  
         start_time = jiffies;
  
@@ -476,13 +476,13 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
                 goto error;
  
         /* default.  possible overriden later */
-       rq->retries = 5;
+       req->retries = 5;
  
         switch (opcode) {
         case SEND_DIAGNOSTIC:
         case FORMAT_UNIT:
                 rq->timeout = FORMAT_UNIT_TIMEOUT;
-               rq->retries = 1;
+               req->retries = 1;
                 break;
         case START_STOP:
                 rq->timeout = START_STOP_TIMEOUT;
@@ -495,7 +495,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
                 break;
         case READ_DEFECT_DATA:
                 rq->timeout = READ_DEFECT_DATA_TIMEOUT;
-               rq->retries = 1;
+               req->retries = 1;
                 break;
         default:
                 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
@@ -509,7 +509,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
  
         blk_execute_rq(q, disk, rq, 0);
  
-       err = rq->errors & 0xff;        /* only 8 bit SCSI status */
+       err = req->result & 0xff;       /* only 8 bit SCSI status */
         if (err) {
                 if (req->sense_len && req->sense) {
                         bytes = (OMAX_SB_LEN > req->sense_len) ?
@@ -547,7 +547,8 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
         scsi_req(rq)->cmd[0] = cmd;
         scsi_req(rq)->cmd[4] = data;
         scsi_req(rq)->cmd_len = 6;
-       err = blk_execute_rq(q, bd_disk, rq, 0);
+       blk_execute_rq(q, bd_disk, rq, 0);
+       err = scsi_req(rq)->result ? -EIO : 0;
         blk_put_request(rq);
  
         return err;
diff --git a/block/sed-opal.c b/block/sed-opal.c

index 1e18dca360fc501033762d4c505c2e32c4674ee6..9b30ae5ab843b0508494cae08a78ae831c67e58b 100644 (file)
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -275,8 +275,8 @@ static bool check_tper(const void *data)
         u8 flags = tper->supported_features;
  
         if (!(flags & TPER_SYNC_SUPPORTED)) {
-               pr_err("TPer sync not supported. flags = %d\n",
-                      tper->supported_features);
+               pr_debug("TPer sync not supported. flags = %d\n",
+                        tper->supported_features);
                 return false;
         }
  
@@ -289,7 +289,7 @@ static bool check_sum(const void *data)
         u32 nlo = be32_to_cpu(sum->num_locking_objects);
  
         if (nlo == 0) {
-               pr_err("Need at least one locking object.\n");
+               pr_debug("Need at least one locking object.\n");
                 return false;
         }
  
@@ -385,9 +385,9 @@ static int next(struct opal_dev *dev)
  
                 error = step->fn(dev, step->data);
                 if (error) {
-                       pr_err("Error on step function: %d with error %d: %s\n",
-                              state, error,
-                              opal_error_to_human(error));
+                       pr_debug("Error on step function: %d with error %d: %s\n",
+                                state, error,
+                                opal_error_to_human(error));
  
                         /* For each OPAL command we do a discovery0 then we
                          * start some sort of session.
@@ -419,8 +419,8 @@ static int opal_discovery0_end(struct opal_dev *dev)
         print_buffer(dev->resp, hlen);
  
         if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
-               pr_warn("Discovery length overflows buffer (%zu+%u)/%u\n",
-                       sizeof(*hdr), hlen, IO_BUFFER_LENGTH);
+               pr_debug("Discovery length overflows buffer (%zu+%u)/%u\n",
+                        sizeof(*hdr), hlen, IO_BUFFER_LENGTH);
                 return -EFAULT;
         }
  
@@ -503,7 +503,7 @@ static void add_token_u8(int *err, struct opal_dev *cmd, u8 tok)
         if (*err)
                 return;
         if (cmd->pos >= IO_BUFFER_LENGTH - 1) {
-               pr_err("Error adding u8: end of buffer.\n");
+               pr_debug("Error adding u8: end of buffer.\n");
                 *err = -ERANGE;
                 return;
         }
@@ -553,7 +553,7 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
         len = DIV_ROUND_UP(msb, 4);
  
         if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) {
-               pr_err("Error adding u64: end of buffer.\n");
+               pr_debug("Error adding u64: end of buffer.\n");
                 *err = -ERANGE;
                 return;
         }
@@ -579,7 +579,7 @@ static void add_token_bytestring(int *err, struct opal_dev *cmd,
         }
  
         if (len >= IO_BUFFER_LENGTH - cmd->pos - header_len) {
-               pr_err("Error adding bytestring: end of buffer.\n");
+               pr_debug("Error adding bytestring: end of buffer.\n");
                 *err = -ERANGE;
                 return;
         }
@@ -597,7 +597,7 @@ static void add_token_bytestring(int *err, struct opal_dev *cmd,
  static int build_locking_range(u8 *buffer, size_t length, u8 lr)
  {
         if (length > OPAL_UID_LENGTH) {
-               pr_err("Can't build locking range. Length OOB\n");
+               pr_debug("Can't build locking range. Length OOB\n");
                 return -ERANGE;
         }
  
@@ -614,7 +614,7 @@ static int build_locking_range(u8 *buffer, size_t length, u8 lr)
  static int build_locking_user(u8 *buffer, size_t length, u8 lr)
  {
         if (length > OPAL_UID_LENGTH) {
-               pr_err("Can't build locking range user, Length OOB\n");
+               pr_debug("Can't build locking range user, Length OOB\n");
                 return -ERANGE;
         }
  
@@ -648,7 +648,7 @@ static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn)
         add_token_u8(&err, cmd, OPAL_ENDLIST);
  
         if (err) {
-               pr_err("Error finalizing command.\n");
+               pr_debug("Error finalizing command.\n");
                 return -EFAULT;
         }
  
@@ -660,7 +660,7 @@ static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn)
         hdr->subpkt.length = cpu_to_be32(cmd->pos - sizeof(*hdr));
         while (cmd->pos % 4) {
                 if (cmd->pos >= IO_BUFFER_LENGTH) {
-                       pr_err("Error: Buffer overrun\n");
+                       pr_debug("Error: Buffer overrun\n");
                         return -ERANGE;
                 }
                 cmd->cmd[cmd->pos++] = 0;
@@ -679,14 +679,14 @@ static const struct opal_resp_tok *response_get_token(
         const struct opal_resp_tok *tok;
  
         if (n >= resp->num) {
-               pr_err("Token number doesn't exist: %d, resp: %d\n",
-                      n, resp->num);
+               pr_debug("Token number doesn't exist: %d, resp: %d\n",
+                        n, resp->num);
                 return ERR_PTR(-EINVAL);
         }
  
         tok = &resp->toks[n];
         if (tok->len == 0) {
-               pr_err("Token length must be non-zero\n");
+               pr_debug("Token length must be non-zero\n");
                 return ERR_PTR(-EINVAL);
         }
  
@@ -727,7 +727,7 @@ static ssize_t response_parse_short(struct opal_resp_tok *tok,
  
                 tok->type = OPAL_DTA_TOKENID_UINT;
                 if (tok->len > 9) {
-                       pr_warn("uint64 with more than 8 bytes\n");
+                       pr_debug("uint64 with more than 8 bytes\n");
                         return -EINVAL;
                 }
                 for (i = tok->len - 1; i > 0; i--) {
@@ -814,8 +814,8 @@ static int response_parse(const u8 *buf, size_t length,
  
         if (clen == 0 || plen == 0 || slen == 0 ||
             slen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
-               pr_err("Bad header length. cp: %u, pkt: %u, subpkt: %u\n",
-                      clen, plen, slen);
+               pr_debug("Bad header length. cp: %u, pkt: %u, subpkt: %u\n",
+                        clen, plen, slen);
                 print_buffer(pos, sizeof(*hdr));
                 return -EINVAL;
         }
@@ -848,7 +848,7 @@ static int response_parse(const u8 *buf, size_t length,
         }
  
         if (num_entries == 0) {
-               pr_err("Couldn't parse response.\n");
+               pr_debug("Couldn't parse response.\n");
                 return -EINVAL;
         }
         resp->num = num_entries;
@@ -861,18 +861,18 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
  {
         *store = NULL;
         if (!resp) {
-               pr_err("Response is NULL\n");
+               pr_debug("Response is NULL\n");
                 return 0;
         }
  
         if (n > resp->num) {
-               pr_err("Response has %d tokens. Can't access %d\n",
-                      resp->num, n);
+               pr_debug("Response has %d tokens. Can't access %d\n",
+                        resp->num, n);
                 return 0;
         }
  
         if (resp->toks[n].type != OPAL_DTA_TOKENID_BYTESTRING) {
-               pr_err("Token is not a byte string!\n");
+               pr_debug("Token is not a byte string!\n");
                 return 0;
         }
  
@@ -883,26 +883,26 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
  static u64 response_get_u64(const struct parsed_resp *resp, int n)
  {
         if (!resp) {
-               pr_err("Response is NULL\n");
+               pr_debug("Response is NULL\n");
                 return 0;
         }
  
         if (n > resp->num) {
-               pr_err("Response has %d tokens. Can't access %d\n",
-                      resp->num, n);
+               pr_debug("Response has %d tokens. Can't access %d\n",
+                        resp->num, n);
                 return 0;
         }
  
         if (resp->toks[n].type != OPAL_DTA_TOKENID_UINT) {
-               pr_err("Token is not unsigned it: %d\n",
-                      resp->toks[n].type);
+               pr_debug("Token is not unsigned it: %d\n",
+                        resp->toks[n].type);
                 return 0;
         }
  
         if (!(resp->toks[n].width == OPAL_WIDTH_TINY ||
               resp->toks[n].width == OPAL_WIDTH_SHORT)) {
-               pr_err("Atom is not short or tiny: %d\n",
-                      resp->toks[n].width);
+               pr_debug("Atom is not short or tiny: %d\n",
+                        resp->toks[n].width);
                 return 0;
         }
  
@@ -949,7 +949,7 @@ static int parse_and_check_status(struct opal_dev *dev)
  
         error = response_parse(dev->resp, IO_BUFFER_LENGTH, &dev->parsed);
         if (error) {
-               pr_err("Couldn't parse response.\n");
+               pr_debug("Couldn't parse response.\n");
                 return error;
         }
  
@@ -975,7 +975,7 @@ static int start_opal_session_cont(struct opal_dev *dev)
         tsn = response_get_u64(&dev->parsed, 5);
  
         if (hsn == 0 && tsn == 0) {
-               pr_err("Couldn't authenticate session\n");
+               pr_debug("Couldn't authenticate session\n");
                 return -EPERM;
         }
  
@@ -1012,7 +1012,7 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont)
  
         ret = cmd_finalize(dev, dev->hsn, dev->tsn);
         if (ret) {
-               pr_err("Error finalizing command buffer: %d\n", ret);
+               pr_debug("Error finalizing command buffer: %d\n", ret);
                 return ret;
         }
  
@@ -1023,7 +1023,6 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont)
  
  static int gen_key(struct opal_dev *dev, void *data)
  {
-       const u8 *method;
         u8 uid[OPAL_UID_LENGTH];
         int err = 0;
  
@@ -1031,7 +1030,6 @@ static int gen_key(struct opal_dev *dev, void *data)
         set_comid(dev, dev->comid);
  
         memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len));
-       method = opalmethod[OPAL_GENKEY];
         kfree(dev->prev_data);
         dev->prev_data = NULL;
  
@@ -1043,7 +1041,7 @@ static int gen_key(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_ENDLIST);
  
         if (err) {
-               pr_err("Error building gen key command\n");
+               pr_debug("Error building gen key command\n");
                 return err;
  
         }
@@ -1061,8 +1059,8 @@ static int get_active_key_cont(struct opal_dev *dev)
                 return error;
         keylen = response_get_string(&dev->parsed, 4, &activekey);
         if (!activekey) {
-               pr_err("%s: Couldn't extract the Activekey from the response\n",
-                      __func__);
+               pr_debug("%s: Couldn't extract the Activekey from the response\n",
+                        __func__);
                 return OPAL_INVAL_PARAM;
         }
         dev->prev_data = kmemdup(activekey, keylen, GFP_KERNEL);
@@ -1105,7 +1103,7 @@ static int get_active_key(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_ENDLIST);
         add_token_u8(&err, dev, OPAL_ENDLIST);
         if (err) {
-               pr_err("Error building get active key command\n");
+               pr_debug("Error building get active key command\n");
                 return err;
         }
  
@@ -1161,7 +1159,7 @@ static inline int enable_global_lr(struct opal_dev *dev, u8 *uid,
         err = generic_lr_enable_disable(dev, uid, !!setup->RLE, !!setup->WLE,
                                         0, 0);
         if (err)
-               pr_err("Failed to create enable global lr command\n");
+               pr_debug("Failed to create enable global lr command\n");
         return err;
  }
  
@@ -1219,7 +1217,7 @@ static int setup_locking_range(struct opal_dev *dev, void *data)
  
         }
         if (err) {
-               pr_err("Error building Setup Locking range command.\n");
+               pr_debug("Error building Setup Locking range command.\n");
                 return err;
  
         }
@@ -1236,11 +1234,8 @@ static int start_generic_opal_session(struct opal_dev *dev,
         u32 hsn;
         int err = 0;
  
-       if (key == NULL && auth != OPAL_ANYBODY_UID) {
-               pr_err("%s: Attempted to open ADMIN_SP Session without a Host" \
-                      "Challenge, and not as the Anybody UID\n", __func__);
+       if (key == NULL && auth != OPAL_ANYBODY_UID)
                 return OPAL_INVAL_PARAM;
-       }
  
         clear_opal_cmd(dev);
  
@@ -1275,12 +1270,12 @@ static int start_generic_opal_session(struct opal_dev *dev,
                 add_token_u8(&err, dev, OPAL_ENDLIST);
                 break;
         default:
-               pr_err("Cannot start Admin SP session with auth %d\n", auth);
+               pr_debug("Cannot start Admin SP session with auth %d\n", auth);
                 return OPAL_INVAL_PARAM;
         }
  
         if (err) {
-               pr_err("Error building start adminsp session command.\n");
+               pr_debug("Error building start adminsp session command.\n");
                 return err;
         }
  
@@ -1371,7 +1366,7 @@ static int start_auth_opal_session(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_ENDLIST);
  
         if (err) {
-               pr_err("Error building STARTSESSION command.\n");
+               pr_debug("Error building STARTSESSION command.\n");
                 return err;
         }
  
@@ -1393,7 +1388,7 @@ static int revert_tper(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_STARTLIST);
         add_token_u8(&err, dev, OPAL_ENDLIST);
         if (err) {
-               pr_err("Error building REVERT TPER command.\n");
+               pr_debug("Error building REVERT TPER command.\n");
                 return err;
         }
  
@@ -1428,7 +1423,7 @@ static int internal_activate_user(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_ENDLIST);
  
         if (err) {
-               pr_err("Error building Activate UserN command.\n");
+               pr_debug("Error building Activate UserN command.\n");
                 return err;
         }
  
@@ -1455,7 +1450,7 @@ static int erase_locking_range(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_ENDLIST);
  
         if (err) {
-               pr_err("Error building Erase Locking Range Command.\n");
+               pr_debug("Error building Erase Locking Range Command.\n");
                 return err;
         }
         return finalize_and_send(dev, parse_and_check_status);
@@ -1486,7 +1481,7 @@ static int set_mbr_done(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_ENDLIST);
  
         if (err) {
-               pr_err("Error Building set MBR Done command\n");
+               pr_debug("Error Building set MBR Done command\n");
                 return err;
         }
  
@@ -1518,7 +1513,7 @@ static int set_mbr_enable_disable(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_ENDLIST);
  
         if (err) {
-               pr_err("Error Building set MBR done command\n");
+               pr_debug("Error Building set MBR done command\n");
                 return err;
         }
  
@@ -1569,7 +1564,7 @@ static int set_new_pw(struct opal_dev *dev, void *data)
  
         if (generic_pw_cmd(usr->opal_key.key, usr->opal_key.key_len,
                            cpin_uid, dev)) {
-               pr_err("Error building set password command.\n");
+               pr_debug("Error building set password command.\n");
                 return -ERANGE;
         }
  
@@ -1584,7 +1579,7 @@ static int set_sid_cpin_pin(struct opal_dev *dev, void *data)
         memcpy(cpin_uid, opaluid[OPAL_C_PIN_SID], OPAL_UID_LENGTH);
  
         if (generic_pw_cmd(key->key, key->key_len, cpin_uid, dev)) {
-               pr_err("Error building Set SID cpin\n");
+               pr_debug("Error building Set SID cpin\n");
                 return -ERANGE;
         }
         return finalize_and_send(dev, parse_and_check_status);
@@ -1659,7 +1654,7 @@ static int add_user_to_lr(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_ENDLIST);
  
         if (err) {
-               pr_err("Error building add user to locking range command.\n");
+               pr_debug("Error building add user to locking range command.\n");
                 return err;
         }
  
@@ -1669,7 +1664,6 @@ static int add_user_to_lr(struct opal_dev *dev, void *data)
  static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
  {
         u8 lr_buffer[OPAL_UID_LENGTH];
-       const u8 *method;
         struct opal_lock_unlock *lkul = data;
         u8 read_locked = 1, write_locked = 1;
         int err = 0;
@@ -1677,7 +1671,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
         clear_opal_cmd(dev);
         set_comid(dev, dev->comid);
  
-       method = opalmethod[OPAL_SET];
         if (build_locking_range(lr_buffer, sizeof(lr_buffer),
                                 lkul->session.opal_key.lr) < 0)
                 return -ERANGE;
@@ -1695,7 +1688,7 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
                 /* vars are initalized to locked */
                 break;
         default:
-               pr_err("Tried to set an invalid locking state... returning to uland\n");
+               pr_debug("Tried to set an invalid locking state... returning to uland\n");
                 return OPAL_INVAL_PARAM;
         }
  
@@ -1722,7 +1715,7 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_ENDLIST);
  
         if (err) {
-               pr_err("Error building SET command.\n");
+               pr_debug("Error building SET command.\n");
                 return err;
         }
         return finalize_and_send(dev, parse_and_check_status);
@@ -1733,14 +1726,12 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data)
  {
         u8 lr_buffer[OPAL_UID_LENGTH];
         u8 read_locked = 1, write_locked = 1;
-       const u8 *method;
         struct opal_lock_unlock *lkul = data;
         int ret;
  
         clear_opal_cmd(dev);
         set_comid(dev, dev->comid);
  
-       method = opalmethod[OPAL_SET];
         if (build_locking_range(lr_buffer, sizeof(lr_buffer),
                                 lkul->session.opal_key.lr) < 0)
                 return -ERANGE;
@@ -1758,14 +1749,14 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data)
                 /* vars are initalized to locked */
                 break;
         default:
-               pr_err("Tried to set an invalid locking state.\n");
+               pr_debug("Tried to set an invalid locking state.\n");
                 return OPAL_INVAL_PARAM;
         }
         ret = generic_lr_enable_disable(dev, lr_buffer, 1, 1,
                                         read_locked, write_locked);
  
         if (ret < 0) {
-               pr_err("Error building SET command.\n");
+               pr_debug("Error building SET command.\n");
                 return ret;
         }
         return finalize_and_send(dev, parse_and_check_status);
@@ -1817,7 +1808,7 @@ static int activate_lsp(struct opal_dev *dev, void *data)
         }
  
         if (err) {
-               pr_err("Error building Activate LockingSP command.\n");
+               pr_debug("Error building Activate LockingSP command.\n");
                 return err;
         }
  
@@ -1837,7 +1828,7 @@ static int get_lsp_lifecycle_cont(struct opal_dev *dev)
         /* 0x08 is Manufacured Inactive */
         /* 0x09 is Manufactured */
         if (lc_status != OPAL_MANUFACTURED_INACTIVE) {
-               pr_err("Couldn't determine the status of the Lifcycle state\n");
+               pr_debug("Couldn't determine the status of the Lifecycle state\n");
                 return -ENODEV;
         }
  
@@ -1874,7 +1865,7 @@ static int get_lsp_lifecycle(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_ENDLIST);
  
         if (err) {
-               pr_err("Error Building GET Lifecycle Status command\n");
+               pr_debug("Error Building GET Lifecycle Status command\n");
                 return err;
         }
  
@@ -1893,7 +1884,7 @@ static int get_msid_cpin_pin_cont(struct opal_dev *dev)
  
         strlen = response_get_string(&dev->parsed, 4, &msid_pin);
         if (!msid_pin) {
-               pr_err("%s: Couldn't extract PIN from response\n", __func__);
+               pr_debug("%s: Couldn't extract PIN from response\n", __func__);
                 return OPAL_INVAL_PARAM;
         }
  
@@ -1935,7 +1926,7 @@ static int get_msid_cpin_pin(struct opal_dev *dev, void *data)
         add_token_u8(&err, dev, OPAL_ENDLIST);
  
         if (err) {
-               pr_err("Error building Get MSID CPIN PIN command.\n");
+               pr_debug("Error building Get MSID CPIN PIN command.\n");
                 return err;
         }
  
@@ -2130,18 +2121,18 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
  
         if (lk_unlk->l_state != OPAL_RO &&
             lk_unlk->l_state != OPAL_RW) {
-               pr_err("Locking state was not RO or RW\n");
+               pr_debug("Locking state was not RO or RW\n");
                 return -EINVAL;
         }
-       if (lk_unlk->session.who < OPAL_USER1 &&
+       if (lk_unlk->session.who < OPAL_USER1 ||
             lk_unlk->session.who > OPAL_USER9) {
-               pr_err("Authority was not within the range of users: %d\n",
-                      lk_unlk->session.who);
+               pr_debug("Authority was not within the range of users: %d\n",
+                        lk_unlk->session.who);
                 return -EINVAL;
         }
         if (lk_unlk->session.sum) {
-               pr_err("%s not supported in sum. Use setup locking range\n",
-                      __func__);
+               pr_debug("%s not supported in sum. Use setup locking range\n",
+                        __func__);
                 return -EINVAL;
         }
  
@@ -2316,9 +2307,9 @@ static int opal_activate_user(struct opal_dev *dev,
         int ret;
  
         /* We can't activate Admin1 it's active as manufactured */
-       if (opal_session->who < OPAL_USER1 &&
+       if (opal_session->who < OPAL_USER1 ||
             opal_session->who > OPAL_USER9) {
-               pr_err("Who was not a valid user: %d\n", opal_session->who);
+               pr_debug("Who was not a valid user: %d\n", opal_session->who);
                 return -EINVAL;
         }
  
@@ -2349,9 +2340,9 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
  
                 ret = __opal_lock_unlock(dev, &suspend->unlk);
                 if (ret) {
-                       pr_warn("Failed to unlock LR %hhu with sum %d\n",
-                               suspend->unlk.session.opal_key.lr,
-                               suspend->unlk.session.sum);
+                       pr_debug("Failed to unlock LR %hhu with sum %d\n",
+                                suspend->unlk.session.opal_key.lr,
+                                suspend->unlk.session.sum);
                         was_failure = true;
                 }
         }
@@ -2369,10 +2360,8 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
                 return -EACCES;
         if (!dev)
                 return -ENOTSUPP;
-       if (!dev->supported) {
-               pr_err("Not supported\n");
+       if (!dev->supported)
                 return -ENOTSUPP;
-       }
  
         p = memdup_user(arg, _IOC_SIZE(cmd));
         if (IS_ERR(p))
@@ -2416,7 +2405,7 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
                 ret = opal_secure_erase_locking_range(dev, p);
                 break;
         default:
-               pr_warn("No such Opal Ioctl %u\n", cmd);
+               break;
         }
  
         kfree(p);
diff --git a/block/t10-pi.c b/block/t10-pi.c

index 2c97912335a90944e04927eb7be8219f361e92e0..680c6d63629831c8e8cc6687f6e8593924105463 100644 (file)
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -160,28 +160,28 @@ static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
         return t10_pi_verify(iter, t10_pi_ip_fn, 3);
  }
  
-struct blk_integrity_profile t10_pi_type1_crc = {
+const struct blk_integrity_profile t10_pi_type1_crc = {
         .name                   = "T10-DIF-TYPE1-CRC",
         .generate_fn            = t10_pi_type1_generate_crc,
         .verify_fn              = t10_pi_type1_verify_crc,
  };
  EXPORT_SYMBOL(t10_pi_type1_crc);
  
-struct blk_integrity_profile t10_pi_type1_ip = {
+const struct blk_integrity_profile t10_pi_type1_ip = {
         .name                   = "T10-DIF-TYPE1-IP",
         .generate_fn            = t10_pi_type1_generate_ip,
         .verify_fn              = t10_pi_type1_verify_ip,
  };
  EXPORT_SYMBOL(t10_pi_type1_ip);
  
-struct blk_integrity_profile t10_pi_type3_crc = {
+const struct blk_integrity_profile t10_pi_type3_crc = {
         .name                   = "T10-DIF-TYPE3-CRC",
         .generate_fn            = t10_pi_type3_generate_crc,
         .verify_fn              = t10_pi_type3_verify_crc,
  };
  EXPORT_SYMBOL(t10_pi_type3_crc);
  
-struct blk_integrity_profile t10_pi_type3_ip = {
+const struct blk_integrity_profile t10_pi_type3_ip = {
         .name                   = "T10-DIF-TYPE3-IP",
         .generate_fn            = t10_pi_type3_generate_ip,
         .verify_fn              = t10_pi_type3_verify_ip,
diff --git a/crypto/af_alg.c b/crypto/af_alg.c

index f5e18c2a48527bb3f5bbdc5202b37577689710b3..690deca17c35287c00171466f7b06e53262b0601 100644 (file)
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -266,7 +266,7 @@ unlock:
         return err;
  }
  
-int af_alg_accept(struct sock *sk, struct socket *newsock)
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern)
  {
         struct alg_sock *ask = alg_sk(sk);
         const struct af_alg_type *type;
@@ -281,7 +281,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
         if (!type)
                 goto unlock;
  
-       sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0);
+       sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern);
         err = -ENOMEM;
         if (!sk2)
                 goto unlock;
@@ -323,9 +323,10 @@ unlock:
  }
  EXPORT_SYMBOL_GPL(af_alg_accept);
  
-static int alg_accept(struct socket *sock, struct socket *newsock, int flags)
+static int alg_accept(struct socket *sock, struct socket *newsock, int flags,
+                     bool kern)
  {
-       return af_alg_accept(sock->sk, newsock);
+       return af_alg_accept(sock->sk, newsock, kern);
  }
  
  static const struct proto_ops alg_proto_ops = {
diff --git a/crypto/ahash.c b/crypto/ahash.c

index e58c4970c22b7cc1cdb5e8f08875d9c1e7714568..826cd7ab4d4a2ec830438b40e987e230bd03f32f 100644 (file)
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -32,6 +32,7 @@ struct ahash_request_priv {
         crypto_completion_t complete;
         void *data;
         u8 *result;
+       u32 flags;
         void *ubuf[] CRYPTO_MINALIGN_ATTR;
  };
  
@@ -253,6 +254,8 @@ static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt)
         priv->result = req->result;
         priv->complete = req->base.complete;
         priv->data = req->base.data;
+       priv->flags = req->base.flags;
+
         /*
          * WARNING: We do not backup req->priv here! The req->priv
          *          is for internal use of the Crypto API and the
@@ -267,38 +270,44 @@ static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt)
         return 0;
  }
  
-static void ahash_restore_req(struct ahash_request *req)
+static void ahash_restore_req(struct ahash_request *req, int err)
  {
         struct ahash_request_priv *priv = req->priv;
  
+       if (!err)
+               memcpy(priv->result, req->result,
+                      crypto_ahash_digestsize(crypto_ahash_reqtfm(req)));
+
         /* Restore the original crypto request. */
         req->result = priv->result;
-       req->base.complete = priv->complete;
-       req->base.data = priv->data;
+
+       ahash_request_set_callback(req, priv->flags,
+                                  priv->complete, priv->data);
         req->priv = NULL;
  
         /* Free the req->priv.priv from the ADJUSTED request. */
         kzfree(priv);
  }
  
-static void ahash_op_unaligned_finish(struct ahash_request *req, int err)
+static void ahash_notify_einprogress(struct ahash_request *req)
  {
         struct ahash_request_priv *priv = req->priv;
+       struct crypto_async_request oreq;
  
-       if (err == -EINPROGRESS)
-               return;
-
-       if (!err)
-               memcpy(priv->result, req->result,
-                      crypto_ahash_digestsize(crypto_ahash_reqtfm(req)));
+       oreq.data = priv->data;
  
-       ahash_restore_req(req);
+       priv->complete(&oreq, -EINPROGRESS);
  }
  
  static void ahash_op_unaligned_done(struct crypto_async_request *req, int err)
  {
         struct ahash_request *areq = req->data;
  
+       if (err == -EINPROGRESS) {
+               ahash_notify_einprogress(areq);
+               return;
+       }
+
         /*
          * Restore the original request, see ahash_op_unaligned() for what
          * goes where.
@@ -309,7 +318,7 @@ static void ahash_op_unaligned_done(struct crypto_async_request *req, int err)
          */
  
         /* First copy req->result into req->priv.result */
-       ahash_op_unaligned_finish(areq, err);
+       ahash_restore_req(areq, err);
  
         /* Complete the ORIGINAL request. */
         areq->base.complete(&areq->base, err);
@@ -325,7 +334,12 @@ static int ahash_op_unaligned(struct ahash_request *req,
                 return err;
  
         err = op(req);
-       ahash_op_unaligned_finish(req, err);
+       if (err == -EINPROGRESS ||
+           (err == -EBUSY && (ahash_request_flags(req) &
+                              CRYPTO_TFM_REQ_MAY_BACKLOG)))
+               return err;
+
+       ahash_restore_req(req, err);
  
         return err;
  }
@@ -360,25 +374,14 @@ int crypto_ahash_digest(struct ahash_request *req)
  }
  EXPORT_SYMBOL_GPL(crypto_ahash_digest);
  
-static void ahash_def_finup_finish2(struct ahash_request *req, int err)
+static void ahash_def_finup_done2(struct crypto_async_request *req, int err)
  {
-       struct ahash_request_priv *priv = req->priv;
+       struct ahash_request *areq = req->data;
  
         if (err == -EINPROGRESS)
                 return;
  
-       if (!err)
-               memcpy(priv->result, req->result,
-                      crypto_ahash_digestsize(crypto_ahash_reqtfm(req)));
-
-       ahash_restore_req(req);
-}
-
-static void ahash_def_finup_done2(struct crypto_async_request *req, int err)
-{
-       struct ahash_request *areq = req->data;
-
-       ahash_def_finup_finish2(areq, err);
+       ahash_restore_req(areq, err);
  
         areq->base.complete(&areq->base, err);
  }
@@ -389,11 +392,15 @@ static int ahash_def_finup_finish1(struct ahash_request *req, int err)
                 goto out;
  
         req->base.complete = ahash_def_finup_done2;
-       req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
         err = crypto_ahash_reqtfm(req)->final(req);
+       if (err == -EINPROGRESS ||
+           (err == -EBUSY && (ahash_request_flags(req) &
+                              CRYPTO_TFM_REQ_MAY_BACKLOG)))
+               return err;
  
  out:
-       ahash_def_finup_finish2(req, err);
+       ahash_restore_req(req, err);
         return err;
  }
  
@@ -401,7 +408,16 @@ static void ahash_def_finup_done1(struct crypto_async_request *req, int err)
  {
         struct ahash_request *areq = req->data;
  
+       if (err == -EINPROGRESS) {
+               ahash_notify_einprogress(areq);
+               return;
+       }
+
+       areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
         err = ahash_def_finup_finish1(areq, err);
+       if (areq->priv)
+               return;
  
         areq->base.complete(&areq->base, err);
  }
@@ -416,6 +432,11 @@ static int ahash_def_finup(struct ahash_request *req)
                 return err;
  
         err = tfm->update(req);
+       if (err == -EINPROGRESS ||
+           (err == -EBUSY && (ahash_request_flags(req) &
+                              CRYPTO_TFM_REQ_MAY_BACKLOG)))
+               return err;
+
         return ahash_def_finup_finish1(req, err);
  }
  
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c

index 5a805375865731f4cc7e94790362c208dd58d05b..ef59d9926ee99bccfbdc6077451cb95008d07651 100644 (file)
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -40,6 +40,7 @@ struct aead_async_req {
         struct aead_async_rsgl first_rsgl;
         struct list_head list;
         struct kiocb *iocb;
+       struct sock *sk;
         unsigned int tsgls;
         char iv[];
  };
@@ -379,12 +380,10 @@ unlock:
  
  static void aead_async_cb(struct crypto_async_request *_req, int err)
  {
-       struct sock *sk = _req->data;
-       struct alg_sock *ask = alg_sk(sk);
-       struct aead_ctx *ctx = ask->private;
-       struct crypto_aead *tfm = crypto_aead_reqtfm(&ctx->aead_req);
-       struct aead_request *req = aead_request_cast(_req);
+       struct aead_request *req = _req->data;
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
         struct aead_async_req *areq = GET_ASYM_REQ(req, tfm);
+       struct sock *sk = areq->sk;
         struct scatterlist *sg = areq->tsgl;
         struct aead_async_rsgl *rsgl;
         struct kiocb *iocb = areq->iocb;
@@ -447,11 +446,12 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
         memset(&areq->first_rsgl, '\0', sizeof(areq->first_rsgl));
         INIT_LIST_HEAD(&areq->list);
         areq->iocb = msg->msg_iocb;
+       areq->sk = sk;
         memcpy(areq->iv, ctx->iv, crypto_aead_ivsize(tfm));
         aead_request_set_tfm(req, tfm);
         aead_request_set_ad(req, ctx->aead_assoclen);
         aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                 aead_async_cb, sk);
+                                 aead_async_cb, req);
         used -= ctx->aead_assoclen;
  
         /* take over all tx sgls from ctx */
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c

index 54fc90e8339ce83edab95908adb16ddc1e9d7d32..5e92bd275ef38e8dfe47d9ca50a0e5ced51b2d0e 100644 (file)
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -239,7 +239,8 @@ unlock:
         return err ?: len;
  }
  
-static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
+static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
+                      bool kern)
  {
         struct sock *sk = sock->sk;
         struct alg_sock *ask = alg_sk(sk);
@@ -260,7 +261,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
         if (err)
                 return err;
  
-       err = af_alg_accept(ask->parent, newsock);
+       err = af_alg_accept(ask->parent, newsock, kern);
         if (err)
                 return err;
  
@@ -378,7 +379,7 @@ static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
  }
  
  static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
-                            int flags)
+                            int flags, bool kern)
  {
         int err;
  
@@ -386,7 +387,7 @@ static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
         if (err)
                 return err;
  
-       return hash_accept(sock, newsock, flags);
+       return hash_accept(sock, newsock, flags, kern);
  }
  
  static struct proto_ops algif_hash_ops_nokey = {
diff --git a/crypto/lrw.c b/crypto/lrw.c

index ecd8474018e3bde6d16d44c66da6844b6c78ad26..a8bfae4451bfcbfd26e25bbb39280818dcc139d4 100644 (file)
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -286,8 +286,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
  
         subreq->cryptlen = LRW_BUFFER_SIZE;
         if (req->cryptlen > LRW_BUFFER_SIZE) {
-               subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
-               rctx->ext = kmalloc(subreq->cryptlen, gfp);
+               unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
+
+               rctx->ext = kmalloc(n, gfp);
+               if (rctx->ext)
+                       subreq->cryptlen = n;
         }
  
         rctx->src = req->src;
@@ -342,6 +345,13 @@ static void encrypt_done(struct crypto_async_request *areq, int err)
         struct rctx *rctx;
  
         rctx = skcipher_request_ctx(req);
+
+       if (err == -EINPROGRESS) {
+               if (rctx->left != req->cryptlen)
+                       return;
+               goto out;
+       }
+
         subreq = &rctx->subreq;
         subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
  
@@ -349,6 +359,7 @@ static void encrypt_done(struct crypto_async_request *areq, int err)
         if (rctx->left)
                 return;
  
+out:
         skcipher_request_complete(req, err);
  }
  
@@ -386,6 +397,13 @@ static void decrypt_done(struct crypto_async_request *areq, int err)
         struct rctx *rctx;
  
         rctx = skcipher_request_ctx(req);
+
+       if (err == -EINPROGRESS) {
+               if (rctx->left != req->cryptlen)
+                       return;
+               goto out;
+       }
+
         subreq = &rctx->subreq;
         subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
  
@@ -393,6 +411,7 @@ static void decrypt_done(struct crypto_async_request *areq, int err)
         if (rctx->left)
                 return;
  
+out:
         skcipher_request_complete(req, err);
  }
  
diff --git a/crypto/xts.c b/crypto/xts.c

index baeb34dd8582ebd11473ebb8e2ed5eadd7d2f71b..89ace5ebc2da88df3e049e89e1d2640b2e361e3d 100644 (file)
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -230,8 +230,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
  
         subreq->cryptlen = XTS_BUFFER_SIZE;
         if (req->cryptlen > XTS_BUFFER_SIZE) {
-               subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
-               rctx->ext = kmalloc(subreq->cryptlen, gfp);
+               unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
+
+               rctx->ext = kmalloc(n, gfp);
+               if (rctx->ext)
+                       subreq->cryptlen = n;
         }
  
         rctx->src = req->src;
@@ -283,6 +286,13 @@ static void encrypt_done(struct crypto_async_request *areq, int err)
         struct rctx *rctx;
  
         rctx = skcipher_request_ctx(req);
+
+       if (err == -EINPROGRESS) {
+               if (rctx->left != req->cryptlen)
+                       return;
+               goto out;
+       }
+
         subreq = &rctx->subreq;
         subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
  
@@ -290,6 +300,7 @@ static void encrypt_done(struct crypto_async_request *areq, int err)
         if (rctx->left)
                 return;
  
+out:
         skcipher_request_complete(req, err);
  }
  
@@ -327,6 +338,13 @@ static void decrypt_done(struct crypto_async_request *areq, int err)
         struct rctx *rctx;
  
         rctx = skcipher_request_ctx(req);
+
+       if (err == -EINPROGRESS) {
+               if (rctx->left != req->cryptlen)
+                       return;
+               goto out;
+       }
+
         subreq = &rctx->subreq;
         subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
  
@@ -334,6 +352,7 @@ static void decrypt_done(struct crypto_async_request *areq, int err)
         if (rctx->left)
                 return;
  
+out:
         skcipher_request_complete(req, err);
  }
  
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig

index 83e5f7e1a20d3f7c9fcfd3822f9102673f7c12e5..c6db511b5f73f3e90f1d90a5c8d13290b989b559 100644 (file)
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -256,7 +256,7 @@ config ACPI_PROCESSOR
  
  config ACPI_IPMI
         tristate "IPMI"
-       depends on IPMI_SI
+       depends on IPMI_HANDLER
         default n
         help
           This driver enables the ACPI to access the BMC controller. And it
@@ -469,9 +469,8 @@ config ACPI_WATCHDOG
  
  config ACPI_EXTLOG
         tristate "Extended Error Log support"
-       depends on X86_MCE && X86_LOCAL_APIC
+       depends on X86_MCE && X86_LOCAL_APIC && EDAC
         select UEFI_CPER
-       select RAS
         default n
         help
           Certain usages such as Predictive Failure Analysis (PFA) require
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile

index a391bbc48105ae6cf3504775c60b00b4dcd5705e..d94f92f88ca1c9afb7e04b4f96901f9518a4a2c1 100644 (file)
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -2,7 +2,6 @@
  # Makefile for the Linux ACPI interpreter
  #
  
-ccflags-y                      := -Os
  ccflags-$(CONFIG_ACPI_DEBUG)   += -DACPI_DEBUG_OUTPUT
  
  #
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c

index a15270a806fcab41bae160937cb09c11515056f2..502ea4dc208060d4daa2d1c296bf1b3545b4278a 100644 (file)
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -229,7 +229,7 @@ static int __init extlog_init(void)
         if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr())
                 return -ENODEV;
  
-       if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
+       if (edac_get_report_status() == EDAC_REPORTING_FORCE) {
                 pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
                 return -EPERM;
         }
@@ -285,8 +285,8 @@ static int __init extlog_init(void)
          * eMCA event report method has higher priority than EDAC method,
          * unless EDAC event report method is mandatory.
          */
-       old_edac_report_status = get_edac_report_status();
-       set_edac_report_status(EDAC_REPORTING_DISABLED);
+       old_edac_report_status = edac_get_report_status();
+       edac_set_report_status(EDAC_REPORTING_DISABLED);
         mce_register_decode_chain(&extlog_mce_dec);
         /* enable OS to be involved to take over management from BIOS */
         ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
@@ -308,7 +308,7 @@ err:
  
  static void __exit extlog_exit(void)
  {
-       set_edac_report_status(old_edac_report_status);
+       edac_set_report_status(old_edac_report_status);
         mce_unregister_decode_chain(&extlog_mce_dec);
         ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
         if (extlog_l1_addr)
diff --git a/drivers/acpi/acpi_ipmi.c b/drivers/acpi/acpi_ipmi.c

index 747c2ba98534f102bc90fa26ee86e29652af7318..1b64419e2fec0ef6f36fe82f85765918e85df0fe 100644 (file)
--- a/drivers/acpi/acpi_ipmi.c
+++ b/drivers/acpi/acpi_ipmi.c
@@ -429,8 +429,7 @@ static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data)
         if (msg->recv_type == IPMI_RESPONSE_RECV_TYPE &&
             msg->msg.data_len == 1) {
                 if (msg->msg.data[0] == IPMI_TIMEOUT_COMPLETION_CODE) {
-                       dev_WARN_ONCE(dev, true,
-                                     "Unexpected response (timeout).\n");
+                       dev_dbg_once(dev, "Unexpected response (timeout).\n");
                         tx_msg->msg_done = ACPI_IPMI_TIMEOUT;
                 }
                 goto out_comp;
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c

index b4c1a6a51da482a953051959279fc9d39cd29d49..03250e1f11039b99e6a25e1650b9d67c3cee41c9 100644 (file)
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -25,9 +25,11 @@
  ACPI_MODULE_NAME("platform");
  
  static const struct acpi_device_id forbidden_id_list[] = {
-       {"PNP0000", 0}, /* PIC */
-       {"PNP0100", 0}, /* Timer */
-       {"PNP0200", 0}, /* AT DMA Controller */
+       {"PNP0000",  0},        /* PIC */
+       {"PNP0100",  0},        /* Timer */
+       {"PNP0200",  0},        /* AT DMA Controller */
+       {"ACPI0009", 0},        /* IOxAPIC */
+       {"ACPI000A", 0},        /* IOAPIC */
         {"", 0},
  };
  
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c

index 4467a8089ab890695ccf7072220d9c43d1f29c2d..0143135b3abe3749d8a3bab492eb67b2e63a5d01 100644 (file)
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu)
  
  void __weak arch_unregister_cpu(int cpu) {}
  
-int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
-{
-       return -ENODEV;
-}
-
  static int acpi_processor_hotadd_init(struct acpi_processor *pr)
  {
         unsigned long long sta;
@@ -285,6 +280,13 @@ static int acpi_processor_get_info(struct acpi_device *device)
                 pr->acpi_id = value;
         }
  
+       if (acpi_duplicate_processor_id(pr->acpi_id)) {
+               dev_err(&device->dev,
+                       "Failed to get unique processor _UID (0x%x)\n",
+                       pr->acpi_id);
+               return -ENODEV;
+       }
+
         pr->phys_id = acpi_get_phys_id(pr->handle, device_declaration,
                                         pr->acpi_id);
         if (invalid_phys_cpuid(pr->phys_id))
@@ -585,7 +587,7 @@ static struct acpi_scan_handler processor_container_handler = {
  static int nr_unique_ids __initdata;
  
  /* The number of the duplicate processor IDs */
-static int nr_duplicate_ids __initdata;
+static int nr_duplicate_ids;
  
  /* Used to store the unique processor IDs */
  static int unique_processor_ids[] __initdata = {
@@ -593,7 +595,7 @@ static int unique_processor_ids[] __initdata = {
  };
  
  /* Used to store the duplicate processor IDs */
-static int duplicate_processor_ids[] __initdata = {
+static int duplicate_processor_ids[] = {
         [0 ... NR_CPUS - 1] = -1,
  };
  
@@ -638,28 +640,53 @@ static acpi_status __init acpi_processor_ids_walk(acpi_handle handle,
                                                   void **rv)
  {
         acpi_status status;
+       acpi_object_type acpi_type;
+       unsigned long long uid;
         union acpi_object object = { 0 };
         struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
  
-       status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+       status = acpi_get_type(handle, &acpi_type);
         if (ACPI_FAILURE(status))
-               acpi_handle_info(handle, "Not get the processor object\n");
-       else
-               processor_validated_ids_update(object.processor.proc_id);
+               return false;
+
+       switch (acpi_type) {
+       case ACPI_TYPE_PROCESSOR:
+               status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+               if (ACPI_FAILURE(status))
+                       goto err;
+               uid = object.processor.proc_id;
+               break;
+
+       case ACPI_TYPE_DEVICE:
+               status = acpi_evaluate_integer(handle, "_UID", NULL, &uid);
+               if (ACPI_FAILURE(status))
+                       goto err;
+               break;
+       default:
+               goto err;
+       }
+
+       processor_validated_ids_update(uid);
+       return true;
+
+err:
+       acpi_handle_info(handle, "Invalid processor object\n");
+       return false;
  
-       return AE_OK;
  }
  
-static void __init acpi_processor_check_duplicates(void)
+void __init acpi_processor_check_duplicates(void)
  {
-       /* Search all processor nodes in ACPI namespace */
+       /* check the correctness for all processors in ACPI namespace */
         acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
                                                 ACPI_UINT32_MAX,
                                                 acpi_processor_ids_walk,
                                                 NULL, NULL, NULL);
+       acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, acpi_processor_ids_walk,
+                                               NULL, NULL);
  }
  
-bool __init acpi_processor_validate_proc_id(int proc_id)
+bool acpi_duplicate_processor_id(int proc_id)
  {
         int i;
  
diff --git a/drivers/acpi/acpica/utresrc.c b/drivers/acpi/acpica/utresrc.c

index c86bae7b1d0fc4d0fce5832a1f2716f59d91a7e6..ff096d9755b925d9f72105f42993ebcc7c0522e1 100644 (file)
--- a/drivers/acpi/acpica/utresrc.c
+++ b/drivers/acpi/acpica/utresrc.c
@@ -421,10 +421,8 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state,
  
         ACPI_FUNCTION_TRACE(ut_walk_aml_resources);
  
-       /*
-        * The absolute minimum resource template is one end_tag descriptor.
-        * However, we will treat a lone end_tag as just a simple buffer.
-        */
+       /* The absolute minimum resource template is one end_tag descriptor */
+
         if (aml_length < sizeof(struct aml_resource_end_tag)) {
                 return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
         }
@@ -456,8 +454,9 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state,
                 /* Invoke the user function */
  
                 if (user_function) {
-                       status = user_function(aml, length, offset,
-                                              resource_index, context);
+                       status =
+                           user_function(aml, length, offset, resource_index,
+                                         context);
                         if (ACPI_FAILURE(status)) {
                                 return_ACPI_STATUS(status);
                         }
@@ -481,12 +480,6 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state,
                                 *context = aml;
                         }
  
-                       /* Check if buffer is defined to be longer than the resource length */
-
-                       if (aml_length > (offset + length)) {
-                               return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
-                       }
-
                         /* Normal exit */
  
                         return_ACPI_STATUS(AE_OK);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c

index b192b42a835105a68038ce13d568fad7154497c4..79b3c9c5a3bc9497ea7e9ef5e26fced8b55617b2 100644 (file)
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1073,6 +1073,7 @@ static int ghes_remove(struct platform_device *ghes_dev)
                 if (list_empty(&ghes_sci))
                         unregister_acpi_hed_notifier(&ghes_notifier_sci);
                 mutex_unlock(&ghes_list_mutex);
+               synchronize_rcu();
                 break;
         case ACPI_HEST_NOTIFY_NMI:
                 ghes_nmi_remove(ghes);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c

index 80cb5eb75b633db8aa278b5e709cfddd697f9a7e..34fbe027e73a26f195f981d2fbd373608f724415 100644 (file)
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1249,7 +1249,6 @@ static int __init acpi_init(void)
         acpi_wakeup_device_init();
         acpi_debugger_init();
         acpi_setup_sb_notify_handler();
-       acpi_set_processor_mapping();
         return 0;
  }
  
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c

index fb19e1cdb6415ac8d1913e7017106911dcb7a7dd..edc8663b5db313dc83e47b58cdec33544f236f67 100644 (file)
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -99,13 +99,13 @@ static int find_child_checks(struct acpi_device *adev, bool check_children)
                 return -ENODEV;
  
         /*
-        * If the device has a _HID (or _CID) returning a valid ACPI/PNP
-        * device ID, it is better to make it look less attractive here, so that
-        * the other device with the same _ADR value (that may not have a valid
-        * device ID) can be matched going forward.  [This means a second spec
-        * violation in a row, so whatever we do here is best effort anyway.]
+        * If the device has a _HID returning a valid ACPI/PNP device ID, it is
+        * better to make it look less attractive here, so that the other device
+        * with the same _ADR value (that may not have a valid device ID) can be
+        * matched going forward.  [This means a second spec violation in a row,
+        * so whatever we do here is best effort anyway.]
          */
-       return sta_present && list_empty(&adev->pnp.ids) ?
+       return sta_present && !adev->pnp.type.platform_id ?
                         FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE;
  }
  
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h

index 219b90bc092297c753639f84972939710de25298..f15900132912a4349ecc5b6efe6a2d2e8ff6530f 100644 (file)
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -41,8 +41,10 @@ void acpi_gpe_apply_masked_gpes(void);
  void acpi_container_init(void);
  void acpi_memory_hotplug_init(void);
  #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+void pci_ioapic_remove(struct acpi_pci_root *root);
  int acpi_ioapic_remove(struct acpi_pci_root *root);
  #else
+static inline void pci_ioapic_remove(struct acpi_pci_root *root) { return; }
  static inline int acpi_ioapic_remove(struct acpi_pci_root *root) { return 0; }
  #endif
  #ifdef CONFIG_ACPI_DOCK
diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c

index 6d7ce6e12aaa6662b360c391f8c3b5e84a84ddc7..7e4fbf9a53a3ccd19488b2eb17de2d1be7dfe4de 100644 (file)
--- a/drivers/acpi/ioapic.c
+++ b/drivers/acpi/ioapic.c
@@ -45,6 +45,12 @@ static acpi_status setup_res(struct acpi_resource *acpi_res, void *data)
         struct resource *res = data;
         struct resource_win win;
  
+       /*
+        * We might assign this to 'res' later, make sure all pointers are
+        * cleared before the resource is added to the global list
+        */
+       memset(&win, 0, sizeof(win));
+
         res->flags = 0;
         if (acpi_dev_filter_resource_type(acpi_res, IORESOURCE_MEM))
                 return AE_OK;
@@ -206,24 +212,34 @@ int acpi_ioapic_add(acpi_handle root_handle)
         return ACPI_SUCCESS(status) && ACPI_SUCCESS(retval) ? 0 : -ENODEV;
  }
  
-int acpi_ioapic_remove(struct acpi_pci_root *root)
+void pci_ioapic_remove(struct acpi_pci_root *root)
  {
-       int retval = 0;
         struct acpi_pci_ioapic *ioapic, *tmp;
  
         mutex_lock(&ioapic_list_lock);
         list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) {
                 if (root->device->handle != ioapic->root_handle)
                         continue;
-
-               if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base))
-                       retval = -EBUSY;
-
                 if (ioapic->pdev) {
                         pci_release_region(ioapic->pdev, 0);
                         pci_disable_device(ioapic->pdev);
                         pci_dev_put(ioapic->pdev);
                 }
+       }
+       mutex_unlock(&ioapic_list_lock);
+}
+
+int acpi_ioapic_remove(struct acpi_pci_root *root)
+{
+       int retval = 0;
+       struct acpi_pci_ioapic *ioapic, *tmp;
+
+       mutex_lock(&ioapic_list_lock);
+       list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) {
+               if (root->device->handle != ioapic->root_handle)
+                       continue;
+               if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base))
+                       retval = -EBUSY;
                 if (ioapic->res.flags && ioapic->res.parent)
                         release_resource(&ioapic->res);
                 list_del(&ioapic->list);
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c

index 662036bdc65eca8d531886cc658fd9829cc60c00..c8ea9d698cd0f30546d731df6429b3f556140a76 100644 (file)
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1617,7 +1617,11 @@ static int cmp_map(const void *m0, const void *m1)
         const struct nfit_set_info_map *map0 = m0;
         const struct nfit_set_info_map *map1 = m1;
  
-       return map0->region_offset - map1->region_offset;
+       if (map0->region_offset < map1->region_offset)
+               return -1;
+       else if (map0->region_offset > map1->region_offset)
+               return 1;
+       return 0;
  }
  
  /* Retrieve the nth entry referencing this spa */
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c

index bf601d4df8cfcbb6e579b00cbff75efcb24d8071..919be0aa2578760d466031f866a0e8772adf7f97 100644 (file)
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -648,12 +648,12 @@ static void acpi_pci_root_remove(struct acpi_device *device)
  
         pci_stop_root_bus(root->bus);
  
-       WARN_ON(acpi_ioapic_remove(root));
-
+       pci_ioapic_remove(root);
         device_set_run_wake(root->bus->bridge, false);
         pci_acpi_remove_bus_pm_notifier(device);
  
         pci_remove_root_bus(root->bus);
+       WARN_ON(acpi_ioapic_remove(root));
  
         dmar_device_remove(device->handle);
  
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c

index fcd4ce6f78d5d387080343d96738c3b7275d0324..1c2b846c577604d0b471e87d19cecea6caa286f9 100644 (file)
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -200,6 +200,7 @@ static int acpi_power_get_list_state(struct list_head *list, int *state)
                 return -EINVAL;
  
         /* The state of the list is 'on' IFF all resources are 'on'. */
+       cur_state = 0;
         list_for_each_entry(entry, list, node) {
                 struct acpi_power_resource *resource = entry->resource;
                 acpi_handle handle = resource->device.handle;
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c

index 611a5585a9024a728c71e60ada951b3a73936708..b933061b6b607c467e20317412c63c78728396fc 100644 (file)
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -32,12 +32,12 @@ static struct acpi_table_madt *get_madt_table(void)
  }
  
  static int map_lapic_id(struct acpi_subtable_header *entry,
-                u32 acpi_id, phys_cpuid_t *apic_id, bool ignore_disabled)
+                u32 acpi_id, phys_cpuid_t *apic_id)
  {
         struct acpi_madt_local_apic *lapic =
                 container_of(entry, struct acpi_madt_local_apic, header);
  
-       if (ignore_disabled && !(lapic->lapic_flags & ACPI_MADT_ENABLED))
+       if (!(lapic->lapic_flags & ACPI_MADT_ENABLED))
                 return -ENODEV;
  
         if (lapic->processor_id != acpi_id)
@@ -48,13 +48,12 @@ static int map_lapic_id(struct acpi_subtable_header *entry,
  }
  
  static int map_x2apic_id(struct acpi_subtable_header *entry,
-               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
-               bool ignore_disabled)
+               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
  {
         struct acpi_madt_local_x2apic *apic =
                 container_of(entry, struct acpi_madt_local_x2apic, header);
  
-       if (ignore_disabled && !(apic->lapic_flags & ACPI_MADT_ENABLED))
+       if (!(apic->lapic_flags & ACPI_MADT_ENABLED))
                 return -ENODEV;
  
         if (device_declaration && (apic->uid == acpi_id)) {
@@ -66,13 +65,12 @@ static int map_x2apic_id(struct acpi_subtable_header *entry,
  }
  
  static int map_lsapic_id(struct acpi_subtable_header *entry,
-               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
-               bool ignore_disabled)
+               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
  {
         struct acpi_madt_local_sapic *lsapic =
                 container_of(entry, struct acpi_madt_local_sapic, header);
  
-       if (ignore_disabled && !(lsapic->lapic_flags & ACPI_MADT_ENABLED))
+       if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED))
                 return -ENODEV;
  
         if (device_declaration) {
@@ -89,13 +87,12 @@ static int map_lsapic_id(struct acpi_subtable_header *entry,
   * Retrieve the ARM CPU physical identifier (MPIDR)
   */
  static int map_gicc_mpidr(struct acpi_subtable_header *entry,
-               int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr,
-               bool ignore_disabled)
+               int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr)
  {
         struct acpi_madt_generic_interrupt *gicc =
             container_of(entry, struct acpi_madt_generic_interrupt, header);
  
-       if (ignore_disabled && !(gicc->flags & ACPI_MADT_ENABLED))
+       if (!(gicc->flags & ACPI_MADT_ENABLED))
                 return -ENODEV;
  
         /* device_declaration means Device object in DSDT, in the
@@ -112,7 +109,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry,
  }
  
  static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
-                                  int type, u32 acpi_id, bool ignore_disabled)
+                                  int type, u32 acpi_id)
  {
         unsigned long madt_end, entry;
         phys_cpuid_t phys_id = PHYS_CPUID_INVALID;      /* CPU hardware ID */
@@ -130,20 +127,16 @@ static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
                 struct acpi_subtable_header *header =
                         (struct acpi_subtable_header *)entry;
                 if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
-                       if (!map_lapic_id(header, acpi_id, &phys_id,
-                                         ignore_disabled))
+                       if (!map_lapic_id(header, acpi_id, &phys_id))
                                 break;
                 } else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) {
-                       if (!map_x2apic_id(header, type, acpi_id, &phys_id,
-                                          ignore_disabled))
+                       if (!map_x2apic_id(header, type, acpi_id, &phys_id))
                                 break;
                 } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
-                       if (!map_lsapic_id(header, type, acpi_id, &phys_id,
-                                          ignore_disabled))
+                       if (!map_lsapic_id(header, type, acpi_id, &phys_id))
                                 break;
                 } else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
-                       if (!map_gicc_mpidr(header, type, acpi_id, &phys_id,
-                                           ignore_disabled))
+                       if (!map_gicc_mpidr(header, type, acpi_id, &phys_id))
                                 break;
                 }
                 entry += header->length;
@@ -161,15 +154,14 @@ phys_cpuid_t __init acpi_map_madt_entry(u32 acpi_id)
         if (!madt)
                 return PHYS_CPUID_INVALID;
  
-       rv = map_madt_entry(madt, 1, acpi_id, true);
+       rv = map_madt_entry(madt, 1, acpi_id);
  
         acpi_put_table((struct acpi_table_header *)madt);
  
         return rv;
  }
  
-static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
-                                 bool ignore_disabled)
+static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
  {
         struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
         union acpi_object *obj;
@@ -190,38 +182,30 @@ static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
  
         header = (struct acpi_subtable_header *)obj->buffer.pointer;
         if (header->type == ACPI_MADT_TYPE_LOCAL_APIC)
-               map_lapic_id(header, acpi_id, &phys_id, ignore_disabled);
+               map_lapic_id(header, acpi_id, &phys_id);
         else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC)
-               map_lsapic_id(header, type, acpi_id, &phys_id, ignore_disabled);
+               map_lsapic_id(header, type, acpi_id, &phys_id);
         else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
-               map_x2apic_id(header, type, acpi_id, &phys_id, ignore_disabled);
+               map_x2apic_id(header, type, acpi_id, &phys_id);
         else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT)
-               map_gicc_mpidr(header, type, acpi_id, &phys_id,
-                              ignore_disabled);
+               map_gicc_mpidr(header, type, acpi_id, &phys_id);
  
  exit:
         kfree(buffer.pointer);
         return phys_id;
  }
  
-static phys_cpuid_t __acpi_get_phys_id(acpi_handle handle, int type,
-                                      u32 acpi_id, bool ignore_disabled)
+phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
  {
         phys_cpuid_t phys_id;
  
-       phys_id = map_mat_entry(handle, type, acpi_id, ignore_disabled);
+       phys_id = map_mat_entry(handle, type, acpi_id);
         if (invalid_phys_cpuid(phys_id))
-               phys_id = map_madt_entry(get_madt_table(), type, acpi_id,
-                                          ignore_disabled);
+               phys_id = map_madt_entry(get_madt_table(), type, acpi_id);
  
         return phys_id;
  }
  
-phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
-{
-       return __acpi_get_phys_id(handle, type, acpi_id, true);
-}
-
  int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id)
  {
  #ifdef CONFIG_SMP
@@ -278,79 +262,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
  }
  EXPORT_SYMBOL_GPL(acpi_get_cpuid);
  
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static bool __init
-map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid)
-{
-       int type, id;
-       u32 acpi_id;
-       acpi_status status;
-       acpi_object_type acpi_type;
-       unsigned long long tmp;
-       union acpi_object object = { 0 };
-       struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
-
-       status = acpi_get_type(handle, &acpi_type);
-       if (ACPI_FAILURE(status))
-               return false;
-
-       switch (acpi_type) {
-       case ACPI_TYPE_PROCESSOR:
-               status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
-               if (ACPI_FAILURE(status))
-                       return false;
-               acpi_id = object.processor.proc_id;
-
-               /* validate the acpi_id */
-               if(acpi_processor_validate_proc_id(acpi_id))
-                       return false;
-               break;
-       case ACPI_TYPE_DEVICE:
-               status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
-               if (ACPI_FAILURE(status))
-                       return false;
-               acpi_id = tmp;
-               break;
-       default:
-               return false;
-       }
-
-       type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
-
-       *phys_id = __acpi_get_phys_id(handle, type, acpi_id, false);
-       id = acpi_map_cpuid(*phys_id, acpi_id);
-
-       if (id < 0)
-               return false;
-       *cpuid = id;
-       return true;
-}
-
-static acpi_status __init
-set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context,
-                          void **rv)
-{
-       phys_cpuid_t phys_id;
-       int cpu_id;
-
-       if (!map_processor(handle, &phys_id, &cpu_id))
-               return AE_ERROR;
-
-       acpi_map_cpu2node(handle, cpu_id, phys_id);
-       return AE_OK;
-}
-
-void __init acpi_set_processor_mapping(void)
-{
-       /* Set persistent cpu <-> node mapping for all processors. */
-       acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
-                           ACPI_UINT32_MAX, set_processor_node_mapping,
-                           NULL, NULL, NULL);
-}
-#else
-void __init acpi_set_processor_mapping(void) {}
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
-
  #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
  static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base,
                          u64 *phys_addr, int *ioapic_id)
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c

index 192691880d55c9499e1d77e68058a56d8e5318f3..2433569b02ef5cf40dd82cebec6fc83186f4dc3b 100644 (file)
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1857,15 +1857,20 @@ static void acpi_bus_attach(struct acpi_device *device)
                 return;
  
         device->flags.match_driver = true;
-       if (!ret) {
-               ret = device_attach(&device->dev);
-               if (ret < 0)
-                       return;
-
-               if (!ret && device->pnp.type.platform_id)
-                       acpi_default_enumeration(device);
+       if (ret > 0) {
+               acpi_device_set_enumerated(device);
+               goto ok;
         }
  
+       ret = device_attach(&device->dev);
+       if (ret < 0)
+               return;
+
+       if (ret > 0 || !device->pnp.type.platform_id)
+               acpi_device_set_enumerated(device);
+       else
+               acpi_default_enumeration(device);
+
   ok:
         list_for_each_entry(child, &device->children, node)
                 acpi_bus_attach(child);
diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c

index 01c94669a2b0ad91976daf9f3c7ef3338b48e965..3afa8c1fa12702c251d5d3654e2026ba0ebfcd62 100644 (file)
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -30,7 +30,7 @@ static bool qdf2400_erratum_44_present(struct acpi_table_header *h)
                 return true;
  
         if (!memcmp(h->oem_table_id, "QDF2400 ", ACPI_OEM_TABLE_ID_SIZE) &&
-                       h->oem_revision == 0)
+                       h->oem_revision == 1)
                 return true;
  
         return false;
diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c

index 85d833289f28f85de9aa98efe52a05a921cdc3bd..4c96f3ac4976d9bbc306010ca4fd76bb7d33015e 100644 (file)
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -177,7 +177,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
         case AHCI_LS1043A:
                 if (!qpriv->ecc_addr)
                         return -EINVAL;
-               writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+               writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+                               qpriv->ecc_addr);
                 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
                 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
                 if (qpriv->is_dmacoherent)
@@ -194,7 +195,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
         case AHCI_LS1046A:
                 if (!qpriv->ecc_addr)
                         return -EINVAL;
-               writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+               writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+                               qpriv->ecc_addr);
                 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
                 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
                 if (qpriv->is_dmacoherent)
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c

index 2bd92dca3e6204027f6c1b5fb07ba519cbe039fa..274d6d7193d7caa9b57f111962aa6e245ebc8f7c 100644 (file)
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1482,7 +1482,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
                 break;
  
         default:
-               WARN_ON_ONCE(1);
                 return AC_ERR_SYSTEM;
         }
  
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c

index 46698232e6bff069293200a5da842e196a4a0730..19e6e539a061b93a6ac81975a69b3cdba7b1d699 100644 (file)
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -224,7 +224,6 @@ static DECLARE_TRANSPORT_CLASS(ata_port_class,
  
  static void ata_tport_release(struct device *dev)
  {
-       put_device(dev->parent);
  }
  
  /**
@@ -284,7 +283,7 @@ int ata_tport_add(struct device *parent,
         device_initialize(dev);
         dev->type = &ata_port_type;
  
-       dev->parent = get_device(parent);
+       dev->parent = parent;
         dev->release = ata_tport_release;
         dev_set_name(dev, "ata%d", ap->print_id);
         transport_setup_device(dev);
@@ -348,7 +347,6 @@ static DECLARE_TRANSPORT_CLASS(ata_link_class,
  
  static void ata_tlink_release(struct device *dev)
  {
-       put_device(dev->parent);
  }
  
  /**
@@ -410,7 +408,7 @@ int ata_tlink_add(struct ata_link *link)
         int error;
  
         device_initialize(dev);
-       dev->parent = get_device(&ap->tdev);
+       dev->parent = &ap->tdev;
         dev->release = ata_tlink_release;
         if (ata_is_host_link(link))
                 dev_set_name(dev, "link%d", ap->print_id);
@@ -589,7 +587,6 @@ static DECLARE_TRANSPORT_CLASS(ata_dev_class,
  
  static void ata_tdev_release(struct device *dev)
  {
-       put_device(dev->parent);
  }
  
  /**
@@ -662,7 +659,7 @@ static int ata_tdev_add(struct ata_device *ata_dev)
         int error;
  
         device_initialize(dev);
-       dev->parent = get_device(&link->tdev);
+       dev->parent = &link->tdev;
         dev->release = ata_tdev_release;
         if (ata_is_host_link(link))
                 dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno);
diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c

index 6c9aa95a9a050cc070ab222a382f5dabd55ec7ea..49d705c9f0f7b9c6b2ef2549769b6901438c2854 100644 (file)
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -278,11 +278,6 @@ static int atiixp_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
         };
         const struct ata_port_info *ppi[] = { &info, &info };
  
-       /* SB600/700 don't have secondary port wired */
-       if ((pdev->device == PCI_DEVICE_ID_ATI_IXP600_IDE) ||
-               (pdev->device == PCI_DEVICE_ID_ATI_IXP700_IDE))
-               ppi[1] = &ata_dummy_port_info;
-
         return ata_pci_bmdma_init_one(pdev, ppi, &atiixp_sht, NULL,
                                       ATA_HOST_PARALLEL_SCAN);
  }
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c

index 0636d84fbefe0acc889004b39e83c0137fc0f6a0..f3f538eec7b3bb85b682368ffb42496989c97263 100644 (file)
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -644,14 +644,16 @@ static void svia_configure(struct pci_dev *pdev, int board_id,
                 pci_write_config_byte(pdev, SATA_NATIVE_MODE, tmp8);
         }
  
-       /* enable IRQ on hotplug */
-       pci_read_config_byte(pdev, SVIA_MISC_3, &tmp8);
-       if ((tmp8 & SATA_HOTPLUG) != SATA_HOTPLUG) {
-               dev_dbg(&pdev->dev,
-                       "enabling SATA hotplug (0x%x)\n",
-                       (int) tmp8);
-               tmp8 |= SATA_HOTPLUG;
-               pci_write_config_byte(pdev, SVIA_MISC_3, tmp8);
+       if (board_id == vt6421) {
+               /* enable IRQ on hotplug */
+               pci_read_config_byte(pdev, SVIA_MISC_3, &tmp8);
+               if ((tmp8 & SATA_HOTPLUG) != SATA_HOTPLUG) {
+                       dev_dbg(&pdev->dev,
+                               "enabling SATA hotplug (0x%x)\n",
+                               (int) tmp8);
+                       tmp8 |= SATA_HOTPLUG;
+                       pci_write_config_byte(pdev, SVIA_MISC_3, tmp8);
+               }
         }
  
         /*
diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c

index bf43b5d2aafcafb2b0bc32652cacac6750c5bff9..83f1439e57fd8cb5d40e0a9d435a8a7fe62939c4 100644 (file)
--- a/drivers/auxdisplay/img-ascii-lcd.c
+++ b/drivers/auxdisplay/img-ascii-lcd.c
@@ -218,6 +218,7 @@ static const struct of_device_id img_ascii_lcd_matches[] = {
         { .compatible = "img,boston-lcd", .data = &boston_config },
         { .compatible = "mti,malta-lcd", .data = &malta_config },
         { .compatible = "mti,sead3-lcd", .data = &sead3_config },
+       { /* sentinel */ }
  };
  
  /**
diff --git a/drivers/base/core.c b/drivers/base/core.c

index 684bda4d14a187b41ff453bf33ad8df4774c977f..6bb60fb6a30b7b9b4fd42e2872261317b38c22b5 100644 (file)
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -639,11 +639,6 @@ int lock_device_hotplug_sysfs(void)
         return restart_syscall();
  }
  
-void assert_held_device_hotplug(void)
-{
-       lockdep_assert_held(&device_hotplug_lock);
-}
-
  #ifdef CONFIG_BLOCK
  static inline int device_is_not_partition(struct device *dev)
  {
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig

index f744de7a0f9b27744b52a565becff82707d0274e..19df4918e37ea6a59ef1ed5c4a4cd0110191886e 100644 (file)
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -312,22 +312,6 @@ config BLK_DEV_SKD
  
         Use device /dev/skd$N amd /dev/skd$Np$M.
  
-config BLK_DEV_OSD
-       tristate "OSD object-as-blkdev support"
-       depends on SCSI_OSD_ULD
-       ---help---
-         Saying Y or M here will allow the exporting of a single SCSI
-         OSD (object-based storage) object as a Linux block device.
-
-         For example, if you create a 2G object on an OSD device,
-         you can then use this module to present that 2G object as
-         a Linux block device.
-
-         To compile this driver as a module, choose M here: the
-         module will be called osdblk.
-
-         If unsure, say N.
-
  config BLK_DEV_SX8
         tristate "Promise SATA SX8 support"
         depends on PCI
@@ -434,23 +418,6 @@ config ATA_OVER_ETH
         This driver provides Support for ATA over Ethernet block
         devices like the Coraid EtherDrive (R) Storage Blade.
  
-config MG_DISK
-       tristate "mGine mflash, gflash support"
-       depends on ARM && GPIOLIB
-       help
-         mGine mFlash(gFlash) block device driver
-
-config MG_DISK_RES
-       int "Size of reserved area before MBR"
-       depends on MG_DISK
-       default 0
-       help
-         Define size of reserved area that usually used for boot. Unit is KB.
-         All of the block device operation will be taken this value as start
-         offset
-         Examples:
-                       1024 => 1 MB
-
  config SUNVDC
         tristate "Sun Virtual Disk Client support"
         depends on SUN_LDOMS
@@ -512,19 +479,7 @@ config VIRTIO_BLK_SCSI
           Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on
           virtio-blk devices.  This is only supported for the legacy
           virtio protocol and not enabled by default by any hypervisor.
-         Your probably want to virtio-scsi instead.
-
-config BLK_DEV_HD
-       bool "Very old hard disk (MFM/RLL/IDE) driver"
-       depends on HAVE_IDE
-       depends on !ARM || ARCH_RPC || BROKEN
-       help
-         This is a very old hard disk driver that lacks the enhanced
-         functionality of the newer ones.
-
-         It is required for systems with ancient MFM/RLL/ESDI drives.
-
-         If unsure, say N.
+         You probably want to use virtio-scsi instead.
  
  config BLK_DEV_RBD
         tristate "Rados block device (RBD)"
diff --git a/drivers/block/Makefile b/drivers/block/Makefile

index 1e9661e26f294fcbfec07a064adb6aad1789101d..ec8c36897b753f1c558c5db801807820515b5b35 100644 (file)
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -19,10 +19,8 @@ obj-$(CONFIG_BLK_CPQ_CISS_DA)  += cciss.o
  obj-$(CONFIG_BLK_DEV_DAC960)   += DAC960.o
  obj-$(CONFIG_XILINX_SYSACE)    += xsysace.o
  obj-$(CONFIG_CDROM_PKTCDVD)    += pktcdvd.o
-obj-$(CONFIG_MG_DISK)          += mg_disk.o
  obj-$(CONFIG_SUNVDC)           += sunvdc.o
  obj-$(CONFIG_BLK_DEV_SKD)      += skd.o
-obj-$(CONFIG_BLK_DEV_OSD)      += osdblk.o
  
  obj-$(CONFIG_BLK_DEV_UMEM)     += umem.o
  obj-$(CONFIG_BLK_DEV_NBD)      += nbd.o
@@ -30,7 +28,6 @@ obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
  obj-$(CONFIG_VIRTIO_BLK)       += virtio_blk.o
  
  obj-$(CONFIG_BLK_DEV_SX8)      += sx8.o
-obj-$(CONFIG_BLK_DEV_HD)       += hd.o
  
  obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += xen-blkfront.o
  obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += xen-blkback/
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c

index 2104b1b4ccda276dd324b12b843468c322a39ed5..fa69ecd52cb57cb226e1f9f177ef0c464ee3155a 100644 (file)
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -617,12 +617,12 @@ static void fd_error( void )
         if (!fd_request)
                 return;
  
-       fd_request->errors++;
-       if (fd_request->errors >= MAX_ERRORS) {
+       fd_request->error_count++;
+       if (fd_request->error_count >= MAX_ERRORS) {
                 printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
                 fd_end_request_cur(-EIO);
         }
-       else if (fd_request->errors == RECALIBRATE_ERRORS) {
+       else if (fd_request->error_count == RECALIBRATE_ERRORS) {
                 printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
                 if (SelectedDrive != -1)
                         SUD.track = -1;
@@ -1386,7 +1386,7 @@ static void setup_req_params( int drive )
         ReqData = ReqBuffer + 512 * ReqCnt;
  
         if (UseTrackbuffer)
-               read_track = (ReqCmd == READ && fd_request->errors == 0);
+               read_track = (ReqCmd == READ && fd_request->error_count == 0);
         else
                 read_track = 0;
  
@@ -1409,8 +1409,10 @@ static struct request *set_next_request(void)
                         fdc_queue = 0;
                 if (q) {
                         rq = blk_fetch_request(q);
-                       if (rq)
+                       if (rq) {
+                               rq->error_count = 0;
                                 break;
+                       }
                 }
         } while (fdc_queue != old_pos);
  
diff --git a/drivers/block/brd.c b/drivers/block/brd.c

index 3adc32a3153b2366d2d4deb652f22a1c61e06756..4ec84d504780d9b606efe3dfdfb6910d1380d0bc 100644 (file)
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -134,28 +134,6 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
         return page;
  }
  
-static void brd_free_page(struct brd_device *brd, sector_t sector)
-{
-       struct page *page;
-       pgoff_t idx;
-
-       spin_lock(&brd->brd_lock);
-       idx = sector >> PAGE_SECTORS_SHIFT;
-       page = radix_tree_delete(&brd->brd_pages, idx);
-       spin_unlock(&brd->brd_lock);
-       if (page)
-               __free_page(page);
-}
-
-static void brd_zero_page(struct brd_device *brd, sector_t sector)
-{
-       struct page *page;
-
-       page = brd_lookup_page(brd, sector);
-       if (page)
-               clear_highpage(page);
-}
-
  /*
   * Free all backing store pages and radix tree. This must only be called when
   * there are no other users of the device.
@@ -212,24 +190,6 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
         return 0;
  }
  
-static void discard_from_brd(struct brd_device *brd,
-                       sector_t sector, size_t n)
-{
-       while (n >= PAGE_SIZE) {
-               /*
-                * Don't want to actually discard pages here because
-                * re-allocating the pages can result in writeback
-                * deadlocks under heavy load.
-                */
-               if (0)
-                       brd_free_page(brd, sector);
-               else
-                       brd_zero_page(brd, sector);
-               sector += PAGE_SIZE >> SECTOR_SHIFT;
-               n -= PAGE_SIZE;
-       }
-}
-
  /*
   * Copy n bytes from src to the brd starting at sector. Does not sleep.
   */
@@ -338,14 +298,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
         if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
                 goto io_error;
  
-       if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
-               if (sector & ((PAGE_SIZE >> SECTOR_SHIFT) - 1) ||
-                   bio->bi_iter.bi_size & ~PAGE_MASK)
-                       goto io_error;
-               discard_from_brd(brd, sector, bio->bi_iter.bi_size);
-               goto out;
-       }
-
         bio_for_each_segment(bvec, bio, iter) {
                 unsigned int len = bvec.bv_len;
                 int err;
@@ -357,7 +309,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
                 sector += len >> SECTOR_SHIFT;
         }
  
-out:
         bio_endio(bio);
         return BLK_QC_T_NONE;
  io_error:
@@ -464,11 +415,6 @@ static struct brd_device *brd_alloc(int i)
          *  is harmless)
          */
         blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
-
-       brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
-       blk_queue_max_discard_sectors(brd->brd_queue, UINT_MAX);
-       brd->brd_queue->limits.discard_zeroes_data = 1;
-       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
  #ifdef CONFIG_BLK_DEV_RAM_DAX
         queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
  #endif
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c

index 8e1a4554951c0d4f9374bb63d0cddb83c2a210d0..cd375503f7b0d83558280e9865ee17c967b706a8 100644 (file)
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1864,8 +1864,7 @@ static void cciss_softirq_done(struct request *rq)
         /* set the residual count for pc requests */
         if (blk_rq_is_passthrough(rq))
                 scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
-
-       blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
+       blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0);
  
         spin_lock_irqsave(&h->lock, flags);
         cmd_free(h, c);
@@ -3140,18 +3139,19 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
  {
         int retry_cmd = 0;
         struct request *rq = cmd->rq;
+       struct scsi_request *sreq = scsi_req(rq);
  
-       rq->errors = 0;
+       sreq->result = 0;
  
         if (timeout)
-               rq->errors = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
+               sreq->result = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
  
         if (cmd->err_info->CommandStatus == 0)  /* no error has occurred */
                 goto after_error_processing;
  
         switch (cmd->err_info->CommandStatus) {
         case CMD_TARGET_STATUS:
-               rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
+               sreq->result = evaluate_target_status(h, cmd, &retry_cmd);
                 break;
         case CMD_DATA_UNDERRUN:
                 if (!blk_rq_is_passthrough(cmd->rq)) {
@@ -3169,7 +3169,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
         case CMD_INVALID:
                 dev_warn(&h->pdev->dev, "cciss: cmd %p is "
                        "reported invalid\n", cmd);
-               rq->errors = make_status_bytes(SAM_STAT_GOOD,
+               sreq->result = make_status_bytes(SAM_STAT_GOOD,
                         cmd->err_info->CommandStatus, DRIVER_OK,
                         blk_rq_is_passthrough(cmd->rq) ?
                                 DID_PASSTHROUGH : DID_ERROR);
@@ -3177,7 +3177,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
         case CMD_PROTOCOL_ERR:
                 dev_warn(&h->pdev->dev, "cciss: cmd %p has "
                        "protocol error\n", cmd);
-               rq->errors = make_status_bytes(SAM_STAT_GOOD,
+               sreq->result = make_status_bytes(SAM_STAT_GOOD,
                         cmd->err_info->CommandStatus, DRIVER_OK,
                         blk_rq_is_passthrough(cmd->rq) ?
                                 DID_PASSTHROUGH : DID_ERROR);
@@ -3185,7 +3185,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
         case CMD_HARDWARE_ERR:
                 dev_warn(&h->pdev->dev, "cciss: cmd %p had "
                        " hardware error\n", cmd);
-               rq->errors = make_status_bytes(SAM_STAT_GOOD,
+               sreq->result = make_status_bytes(SAM_STAT_GOOD,
                         cmd->err_info->CommandStatus, DRIVER_OK,
                         blk_rq_is_passthrough(cmd->rq) ?
                                 DID_PASSTHROUGH : DID_ERROR);
@@ -3193,7 +3193,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
         case CMD_CONNECTION_LOST:
                 dev_warn(&h->pdev->dev, "cciss: cmd %p had "
                        "connection lost\n", cmd);
-               rq->errors = make_status_bytes(SAM_STAT_GOOD,
+               sreq->result = make_status_bytes(SAM_STAT_GOOD,
                         cmd->err_info->CommandStatus, DRIVER_OK,
                         blk_rq_is_passthrough(cmd->rq) ?
                                 DID_PASSTHROUGH : DID_ERROR);
@@ -3201,7 +3201,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
         case CMD_ABORTED:
                 dev_warn(&h->pdev->dev, "cciss: cmd %p was "
                        "aborted\n", cmd);
-               rq->errors = make_status_bytes(SAM_STAT_GOOD,
+               sreq->result = make_status_bytes(SAM_STAT_GOOD,
                         cmd->err_info->CommandStatus, DRIVER_OK,
                         blk_rq_is_passthrough(cmd->rq) ?
                                 DID_PASSTHROUGH : DID_ABORT);
@@ -3209,7 +3209,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
         case CMD_ABORT_FAILED:
                 dev_warn(&h->pdev->dev, "cciss: cmd %p reports "
                        "abort failed\n", cmd);
-               rq->errors = make_status_bytes(SAM_STAT_GOOD,
+               sreq->result = make_status_bytes(SAM_STAT_GOOD,
                         cmd->err_info->CommandStatus, DRIVER_OK,
                         blk_rq_is_passthrough(cmd->rq) ?
                                 DID_PASSTHROUGH : DID_ERROR);
@@ -3224,21 +3224,21 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
                 } else
                         dev_warn(&h->pdev->dev,
                                 "%p retried too many times\n", cmd);
-               rq->errors = make_status_bytes(SAM_STAT_GOOD,
+               sreq->result = make_status_bytes(SAM_STAT_GOOD,
                         cmd->err_info->CommandStatus, DRIVER_OK,
                         blk_rq_is_passthrough(cmd->rq) ?
                                 DID_PASSTHROUGH : DID_ABORT);
                 break;
         case CMD_TIMEOUT:
                 dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd);
-               rq->errors = make_status_bytes(SAM_STAT_GOOD,
+               sreq->result = make_status_bytes(SAM_STAT_GOOD,
                         cmd->err_info->CommandStatus, DRIVER_OK,
                         blk_rq_is_passthrough(cmd->rq) ?
                                 DID_PASSTHROUGH : DID_ERROR);
                 break;
         case CMD_UNABORTABLE:
                 dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd);
-               rq->errors = make_status_bytes(SAM_STAT_GOOD,
+               sreq->result = make_status_bytes(SAM_STAT_GOOD,
                         cmd->err_info->CommandStatus, DRIVER_OK,
                         blk_rq_is_passthrough(cmd->rq) ?
                                 DID_PASSTHROUGH : DID_ERROR);
@@ -3247,7 +3247,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
                 dev_warn(&h->pdev->dev, "cmd %p returned "
                        "unknown status %x\n", cmd,
                        cmd->err_info->CommandStatus);
-               rq->errors = make_status_bytes(SAM_STAT_GOOD,
+               sreq->result = make_status_bytes(SAM_STAT_GOOD,
                         cmd->err_info->CommandStatus, DRIVER_OK,
                         blk_rq_is_passthrough(cmd->rq) ?
                                 DID_PASSTHROUGH : DID_ERROR);
@@ -3380,9 +3380,9 @@ static void do_cciss_request(struct request_queue *q)
                 if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
                         dev_warn(&h->pdev->dev,
                                 "%s: error mapping page for DMA\n", __func__);
-                       creq->errors = make_status_bytes(SAM_STAT_GOOD,
-                                                       0, DRIVER_OK,
-                                                       DID_SOFT_ERROR);
+                       scsi_req(creq)->result =
+                               make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK,
+                                                 DID_SOFT_ERROR);
                         cmd_free(h, c);
                         return;
                 }
@@ -3395,9 +3395,9 @@ static void do_cciss_request(struct request_queue *q)
                 if (cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex],
                         (seg - (h->max_cmd_sgentries - 1)) *
                                 sizeof(SGDescriptor_struct))) {
-                       creq->errors = make_status_bytes(SAM_STAT_GOOD,
-                                                       0, DRIVER_OK,
-                                                       DID_SOFT_ERROR);
+                       scsi_req(creq)->result =
+                               make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK,
+                                                 DID_SOFT_ERROR);
                         cmd_free(h, c);
                         return;
                 }
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c

index de5c3ee8a7906555f28acb98b6d34987e4b0d82c..494837e59f232fc1d2d76026ed8f09c581b2b05d 100644 (file)
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -236,9 +236,6 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re
         seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
         seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
         seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
-
-       if (f & EE_IS_TRIM)
-               __seq_print_rq_state_bit(m, f & EE_IS_TRIM_USE_ZEROOUT, &sep, "zero-out", "trim");
         seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
         seq_putc(m, '\n');
  }
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h

index 724d1c50fc5283526f08ed5c23e1f866704742d2..d5da45bb03a663ef33f7bfb9cc6bb378a724393d 100644 (file)
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -437,9 +437,6 @@ enum {
  
         /* is this a TRIM aka REQ_DISCARD? */
         __EE_IS_TRIM,
-       /* our lower level cannot handle trim,
-        * and we want to fall back to zeroout instead */
-       __EE_IS_TRIM_USE_ZEROOUT,
  
         /* In case a barrier failed,
          * we need to resubmit without the barrier flag. */
@@ -482,7 +479,6 @@ enum {
  #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
  #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
  #define EE_IS_TRIM             (1<<__EE_IS_TRIM)
-#define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT)
  #define EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
  #define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
  #define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
@@ -1561,8 +1557,6 @@ extern void start_resync_timer_fn(unsigned long data);
  extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
  
  /* drbd_receiver.c */
-extern int drbd_issue_discard_or_zero_out(struct drbd_device *device,
-               sector_t start, unsigned int nr_sectors, bool discard);
  extern int drbd_receiver(struct drbd_thread *thi);
  extern int drbd_ack_receiver(struct drbd_thread *thi);
  extern void drbd_send_ping_wf(struct work_struct *ws);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c

index 92c60cbd04ee8ce24d7e986dc3414ad57d637841..84455c365f578892e5ba2f7d0da714b974959ce6 100644 (file)
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -931,7 +931,6 @@ void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct r
                 p->qlim->io_min = cpu_to_be32(queue_io_min(q));
                 p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
                 p->qlim->discard_enabled = blk_queue_discard(q);
-               p->qlim->discard_zeroes_data = queue_discard_zeroes_data(q);
                 p->qlim->write_same_capable = !!q->limits.max_write_same_sectors;
         } else {
                 q = device->rq_queue;
@@ -941,7 +940,6 @@ void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct r
                 p->qlim->io_min = cpu_to_be32(queue_io_min(q));
                 p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
                 p->qlim->discard_enabled = 0;
-               p->qlim->discard_zeroes_data = 0;
                 p->qlim->write_same_capable = 0;
         }
  }
@@ -1668,7 +1666,8 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
                         (bio->bi_opf & REQ_FUA ? DP_FUA : 0) |
                         (bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
                         (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
-                       (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0);
+                       (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) |
+                       (bio_op(bio) == REQ_OP_WRITE_ZEROES ? DP_DISCARD : 0);
         else
                 return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0;
  }
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c

index 908c704e20aa7ba57be68849b96b7797e848d9e4..02255a0d68b9ac9dde77887c030fb0fededbf7e2 100644 (file)
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1199,10 +1199,6 @@ static void decide_on_discard_support(struct drbd_device *device,
         struct drbd_connection *connection = first_peer_device(device)->connection;
         bool can_do = b ? blk_queue_discard(b) : true;
  
-       if (can_do && b && !b->limits.discard_zeroes_data && !discard_zeroes_if_aligned) {
-               can_do = false;
-               drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n");
-       }
         if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
                 can_do = false;
                 drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
@@ -1217,10 +1213,12 @@ static void decide_on_discard_support(struct drbd_device *device,
                 blk_queue_discard_granularity(q, 512);
                 q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
                 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+               q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
         } else {
                 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
                 blk_queue_discard_granularity(q, 0);
                 q->limits.max_discard_sectors = 0;
+               q->limits.max_write_zeroes_sectors = 0;
         }
  }
  
@@ -1482,8 +1480,7 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
         if (disk_conf->al_extents > drbd_al_extents_max(nbc))
                 disk_conf->al_extents = drbd_al_extents_max(nbc);
  
-       if (!blk_queue_discard(q)
-           || (!q->limits.discard_zeroes_data && !disk_conf->discard_zeroes_if_aligned)) {
+       if (!blk_queue_discard(q)) {
                 if (disk_conf->rs_discard_granularity) {
                         disk_conf->rs_discard_granularity = 0; /* disable feature */
                         drbd_info(device, "rs_discard_granularity feature disabled\n");
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c

index aa6bf9692effecf576a4aa28124efef63b3b591f..1b0a2be24f39edc8e597ed4348545e08cb025c9c 100644 (file)
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1448,105 +1448,14 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
                 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
  }
  
-/*
- * We *may* ignore the discard-zeroes-data setting, if so configured.
- *
- * Assumption is that it "discard_zeroes_data=0" is only because the backend
- * may ignore partial unaligned discards.
- *
- * LVM/DM thin as of at least
- *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
- *   Library version: 1.02.93-RHEL7 (2015-01-28)
- *   Driver version:  4.29.0
- * still behaves this way.
- *
- * For unaligned (wrt. alignment and granularity) or too small discards,
- * we zero-out the initial (and/or) trailing unaligned partial chunks,
- * but discard all the aligned full chunks.
- *
- * At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1".
- */
-int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard)
-{
-       struct block_device *bdev = device->ldev->backing_bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
-       sector_t tmp, nr;
-       unsigned int max_discard_sectors, granularity;
-       int alignment;
-       int err = 0;
-
-       if (!discard)
-               goto zero_out;
-
-       /* Zero-sector (unknown) and one-sector granularities are the same.  */
-       granularity = max(q->limits.discard_granularity >> 9, 1U);
-       alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
-
-       max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
-       max_discard_sectors -= max_discard_sectors % granularity;
-       if (unlikely(!max_discard_sectors))
-               goto zero_out;
-
-       if (nr_sectors < granularity)
-               goto zero_out;
-
-       tmp = start;
-       if (sector_div(tmp, granularity) != alignment) {
-               if (nr_sectors < 2*granularity)
-                       goto zero_out;
-               /* start + gran - (start + gran - align) % gran */
-               tmp = start + granularity - alignment;
-               tmp = start + granularity - sector_div(tmp, granularity);
-
-               nr = tmp - start;
-               err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
-               nr_sectors -= nr;
-               start = tmp;
-       }
-       while (nr_sectors >= granularity) {
-               nr = min_t(sector_t, nr_sectors, max_discard_sectors);
-               err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
-               nr_sectors -= nr;
-               start += nr;
-       }
- zero_out:
-       if (nr_sectors) {
-               err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0);
-       }
-       return err != 0;
-}
-
-static bool can_do_reliable_discards(struct drbd_device *device)
-{
-       struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
-       struct disk_conf *dc;
-       bool can_do;
-
-       if (!blk_queue_discard(q))
-               return false;
-
-       if (q->limits.discard_zeroes_data)
-               return true;
-
-       rcu_read_lock();
-       dc = rcu_dereference(device->ldev->disk_conf);
-       can_do = dc->discard_zeroes_if_aligned;
-       rcu_read_unlock();
-       return can_do;
-}
-
  static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
  {
-       /* If the backend cannot discard, or does not guarantee
-        * read-back zeroes in discarded ranges, we fall back to
-        * zero-out.  Unless configuration specifically requested
-        * otherwise. */
-       if (!can_do_reliable_discards(device))
-               peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
+       struct block_device *bdev = device->ldev->backing_bdev;
  
-       if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
-           peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT)))
+       if (blkdev_issue_zeroout(bdev, peer_req->i.sector, peer_req->i.size >> 9,
+                       GFP_NOIO, 0))
                 peer_req->flags |= EE_WAS_ERROR;
+
         drbd_endio_write_sec_final(peer_req);
  }
  
@@ -2376,7 +2285,7 @@ static unsigned long wire_flags_to_bio_flags(u32 dpf)
  static unsigned long wire_flags_to_bio_op(u32 dpf)
  {
         if (dpf & DP_DISCARD)
-               return REQ_OP_DISCARD;
+               return REQ_OP_WRITE_ZEROES;
         else
                 return REQ_OP_WRITE;
  }
@@ -2567,7 +2476,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
         op_flags = wire_flags_to_bio_flags(dp_flags);
         if (pi->cmd == P_TRIM) {
                 D_ASSERT(peer_device, peer_req->i.size > 0);
-               D_ASSERT(peer_device, op == REQ_OP_DISCARD);
+               D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
                 D_ASSERT(peer_device, peer_req->pages == NULL);
         } else if (peer_req->pages == NULL) {
                 D_ASSERT(device, peer_req->i.size == 0);
@@ -4880,7 +4789,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
  
         if (get_ldev(device)) {
                 struct drbd_peer_request *peer_req;
-               const int op = REQ_OP_DISCARD;
+               const int op = REQ_OP_WRITE_ZEROES;
  
                 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
                                                size, 0, GFP_NOIO);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c

index 652114ae1a8aeba3abe05899e95139a2cd21753d..b5730e17b45584ad4109df8c1c6eac5a35e81a4f 100644 (file)
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -59,6 +59,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
         drbd_req_make_private_bio(req, bio_src);
         req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
                       | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
+                     | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_UNMAP : 0)
                       | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
         req->device = device;
         req->master_bio = bio_src;
@@ -1148,10 +1149,10 @@ static int drbd_process_write_request(struct drbd_request *req)
  
  static void drbd_process_discard_req(struct drbd_request *req)
  {
-       int err = drbd_issue_discard_or_zero_out(req->device,
-                               req->i.sector, req->i.size >> 9, true);
+       struct block_device *bdev = req->device->ldev->backing_bdev;
  
-       if (err)
+       if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
+                       GFP_NOIO, 0))
                 req->private_bio->bi_error = -EIO;
         bio_endio(req->private_bio);
  }
@@ -1180,7 +1181,8 @@ drbd_submit_req_private_bio(struct drbd_request *req)
         if (get_ldev(device)) {
                 if (drbd_insert_fault(device, type))
                         bio_io_error(bio);
-               else if (bio_op(bio) == REQ_OP_DISCARD)
+               else if (bio_op(bio) == REQ_OP_WRITE_ZEROES ||
+                        bio_op(bio) == REQ_OP_DISCARD)
                         drbd_process_discard_req(req);
                 else
                         generic_make_request(bio);
@@ -1234,7 +1236,8 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
         _drbd_start_io_acct(device, req);
  
         /* process discards always from our submitter thread */
-       if (bio_op(bio) & REQ_OP_DISCARD)
+       if ((bio_op(bio) & REQ_OP_WRITE_ZEROES) ||
+           (bio_op(bio) & REQ_OP_DISCARD))
                 goto queue_for_submitter_thread;
  
         if (rw == WRITE && req->private_bio && req->i.size
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c

index 3bff33f21435ce2cc0cf8c61920c829397447b80..1afcb4e02d8d98c0021dc38e2770306f30dc8883 100644 (file)
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -174,7 +174,8 @@ void drbd_peer_request_endio(struct bio *bio)
         struct drbd_peer_request *peer_req = bio->bi_private;
         struct drbd_device *device = peer_req->peer_device->device;
         bool is_write = bio_data_dir(bio) == WRITE;
-       bool is_discard = !!(bio_op(bio) == REQ_OP_DISCARD);
+       bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
+                         bio_op(bio) == REQ_OP_DISCARD;
  
         if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
                 drbd_warn(device, "%s: error=%d s=%llus\n",
@@ -249,6 +250,7 @@ void drbd_request_endio(struct bio *bio)
         /* to avoid recursion in __req_mod */
         if (unlikely(bio->bi_error)) {
                 switch (bio_op(bio)) {
+               case REQ_OP_WRITE_ZEROES:
                 case REQ_OP_DISCARD:
                         if (bio->bi_error == -EOPNOTSUPP)
                                 what = DISCARD_COMPLETED_NOTSUPP;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c

index 45b4384f650ccedeed20c2b4718fa1be49b87b06..60d4c765317833ec75ef6637104325b96084cc11 100644 (file)
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2805,8 +2805,10 @@ static int set_next_request(void)
                         fdc_queue = 0;
                 if (q) {
                         current_req = blk_fetch_request(q);
-                       if (current_req)
+                       if (current_req) {
+                               current_req->error_count = 0;
                                 break;
+                       }
                 }
         } while (fdc_queue != old_pos);
  
@@ -2866,7 +2868,7 @@ do_request:
                 _floppy = floppy_type + DP->autodetect[DRS->probed_format];
         } else
                 probing = 0;
-       errors = &(current_req->errors);
+       errors = &(current_req->error_count);
         tmp = make_raw_rw_request();
         if (tmp < 2) {
                 request_done(tmp);
@@ -4207,9 +4209,7 @@ static int __init do_floppy_init(void)
                 disks[drive]->fops = &floppy_fops;
                 sprintf(disks[drive]->disk_name, "fd%d", drive);
  
-               init_timer(&motor_off_timer[drive]);
-               motor_off_timer[drive].data = drive;
-               motor_off_timer[drive].function = motor_off_callback;
+               setup_timer(&motor_off_timer[drive], motor_off_callback, drive);
         }
  
         err = register_blkdev(FLOPPY_MAJOR, "fd");
diff --git a/drivers/block/hd.c b/drivers/block/hd.c

deleted file mode 100644 (file)

index 6043648..0000000
--- a/drivers/block/hd.c
+++ /dev/null
@@ -1,803 +0,0 @@
-/*
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- * This is the low-level hd interrupt support. It traverses the
- * request-list, using interrupts to jump between functions. As
- * all the functions are called within interrupts, we may not
- * sleep. Special care is recommended.
- *
- *  modified by Drew Eckhardt to check nr of hd's from the CMOS.
- *
- *  Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
- *  in the early extended-partition checks and added DM partitions
- *
- *  IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
- *  and general streamlining by Mark Lord.
- *
- *  Removed 99% of above. Use Mark's ide driver for those options.
- *  This is now a lightweight ST-506 driver. (Paul Gortmaker)
- *
- *  Modified 1995 Russell King for ARM processor.
- *
- *  Bugfix: max_sectors must be <= 255 or the wheels tend to come
- *  off in a hurry once you queue things up - Paul G. 02/2001
- */
-
-/* Uncomment the following if you want verbose error reports. */
-/* #define VERBOSE_ERRORS */
-
-#include <linux/blkdev.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/interrupt.h>
-#include <linux/timer.h>
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/genhd.h>
-#include <linux/string.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/blkpg.h>
-#include <linux/ata.h>
-#include <linux/hdreg.h>
-
-#define HD_IRQ 14
-
-#define REALLY_SLOW_IO
-#include <asm/io.h>
-#include <linux/uaccess.h>
-
-#ifdef __arm__
-#undef  HD_IRQ
-#endif
-#include <asm/irq.h>
-#ifdef __arm__
-#define HD_IRQ IRQ_HARDDISK
-#endif
-
-/* Hd controller regster ports */
-
-#define HD_DATA                0x1f0           /* _CTL when writing */
-#define HD_ERROR       0x1f1           /* see err-bits */
-#define HD_NSECTOR     0x1f2           /* nr of sectors to read/write */
-#define HD_SECTOR      0x1f3           /* starting sector */
-#define HD_LCYL                0x1f4           /* starting cylinder */
-#define HD_HCYL                0x1f5           /* high byte of starting cyl */
-#define HD_CURRENT     0x1f6           /* 101dhhhh , d=drive, hhhh=head */
-#define HD_STATUS      0x1f7           /* see status-bits */
-#define HD_FEATURE     HD_ERROR        /* same io address, read=error, write=feature */
-#define HD_PRECOMP     HD_FEATURE      /* obsolete use of this port - predates IDE */
-#define HD_COMMAND     HD_STATUS       /* same io address, read=status, write=cmd */
-
-#define HD_CMD         0x3f6           /* used for resets */
-#define HD_ALTSTATUS   0x3f6           /* same as HD_STATUS but doesn't clear irq */
-
-/* Bits of HD_STATUS */
-#define ERR_STAT               0x01
-#define INDEX_STAT             0x02
-#define ECC_STAT               0x04    /* Corrected error */
-#define DRQ_STAT               0x08
-#define SEEK_STAT              0x10
-#define SERVICE_STAT           SEEK_STAT
-#define WRERR_STAT             0x20
-#define READY_STAT             0x40
-#define BUSY_STAT              0x80
-
-/* Bits for HD_ERROR */
-#define MARK_ERR               0x01    /* Bad address mark */
-#define TRK0_ERR               0x02    /* couldn't find track 0 */
-#define ABRT_ERR               0x04    /* Command aborted */
-#define MCR_ERR                        0x08    /* media change request */
-#define ID_ERR                 0x10    /* ID field not found */
-#define MC_ERR                 0x20    /* media changed */
-#define ECC_ERR                        0x40    /* Uncorrectable ECC error */
-#define BBD_ERR                        0x80    /* pre-EIDE meaning:  block marked bad */
-#define ICRC_ERR               0x80    /* new meaning:  CRC error during transfer */
-
-static DEFINE_SPINLOCK(hd_lock);
-static struct request_queue *hd_queue;
-static struct request *hd_req;
-
-#define TIMEOUT_VALUE  (6*HZ)
-#define        HD_DELAY        0
-
-#define MAX_ERRORS     16      /* Max read/write errors/sector */
-#define RESET_FREQ      8      /* Reset controller every 8th retry */
-#define RECAL_FREQ      4      /* Recalibrate every 4th retry */
-#define MAX_HD         2
-
-#define STAT_OK                (READY_STAT|SEEK_STAT)
-#define OK_STATUS(s)   (((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK)
-
-static void recal_intr(void);
-static void bad_rw_intr(void);
-
-static int reset;
-static int hd_error;
-
-/*
- *  This struct defines the HD's and their types.
- */
-struct hd_i_struct {
-       unsigned int head, sect, cyl, wpcom, lzone, ctl;
-       int unit;
-       int recalibrate;
-       int special_op;
-};
-
-#ifdef HD_TYPE
-static struct hd_i_struct hd_info[] = { HD_TYPE };
-static int NR_HD = ARRAY_SIZE(hd_info);
-#else
-static struct hd_i_struct hd_info[MAX_HD];
-static int NR_HD;
-#endif
-
-static struct gendisk *hd_gendisk[MAX_HD];
-
-static struct timer_list device_timer;
-
-#define TIMEOUT_VALUE (6*HZ)
-
-#define SET_TIMER                                                      \
-       do {                                                            \
-               mod_timer(&device_timer, jiffies + TIMEOUT_VALUE);      \
-       } while (0)
-
-static void (*do_hd)(void) = NULL;
-#define SET_HANDLER(x) \
-if ((do_hd = (x)) != NULL) \
-       SET_TIMER; \
-else \
-       del_timer(&device_timer);
-
-
-#if (HD_DELAY > 0)
-
-#include <linux/i8253.h>
-
-unsigned long last_req;
-
-unsigned long read_timer(void)
-{
-       unsigned long t, flags;
-       int i;
-
-       raw_spin_lock_irqsave(&i8253_lock, flags);
-       t = jiffies * 11932;
-       outb_p(0, 0x43);
-       i = inb_p(0x40);
-       i |= inb(0x40) << 8;
-       raw_spin_unlock_irqrestore(&i8253_lock, flags);
-       return(t - i);
-}
-#endif
-
-static void __init hd_setup(char *str, int *ints)
-{
-       int hdind = 0;
-
-       if (ints[0] != 3)
-               return;
-       if (hd_info[0].head != 0)
-               hdind = 1;
-       hd_info[hdind].head = ints[2];
-       hd_info[hdind].sect = ints[3];
-       hd_info[hdind].cyl = ints[1];
-       hd_info[hdind].wpcom = 0;
-       hd_info[hdind].lzone = ints[1];
-       hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0);
-       NR_HD = hdind+1;
-}
-
-static bool hd_end_request(int err, unsigned int bytes)
-{
-       if (__blk_end_request(hd_req, err, bytes))
-               return true;
-       hd_req = NULL;
-       return false;
-}
-
-static bool hd_end_request_cur(int err)
-{
-       return hd_end_request(err, blk_rq_cur_bytes(hd_req));
-}
-
-static void dump_status(const char *msg, unsigned int stat)
-{
-       char *name = "hd?";
-       if (hd_req)
-               name = hd_req->rq_disk->disk_name;
-
-#ifdef VERBOSE_ERRORS
-       printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
-       if (stat & BUSY_STAT)   printk("Busy ");
-       if (stat & READY_STAT)  printk("DriveReady ");
-       if (stat & WRERR_STAT)  printk("WriteFault ");
-       if (stat & SEEK_STAT)   printk("SeekComplete ");
-       if (stat & DRQ_STAT)    printk("DataRequest ");
-       if (stat & ECC_STAT)    printk("CorrectedError ");
-       if (stat & INDEX_STAT)  printk("Index ");
-       if (stat & ERR_STAT)    printk("Error ");
-       printk("}\n");
-       if ((stat & ERR_STAT) == 0) {
-               hd_error = 0;
-       } else {
-               hd_error = inb(HD_ERROR);
-               printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff);
-               if (hd_error & BBD_ERR)         printk("BadSector ");
-               if (hd_error & ECC_ERR)         printk("UncorrectableError ");
-               if (hd_error & ID_ERR)          printk("SectorIdNotFound ");
-               if (hd_error & ABRT_ERR)        printk("DriveStatusError ");
-               if (hd_error & TRK0_ERR)        printk("TrackZeroNotFound ");
-               if (hd_error & MARK_ERR)        printk("AddrMarkNotFound ");
-               printk("}");
-               if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) {
-                       printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
-                               inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
-                       if (hd_req)
-                               printk(", sector=%ld", blk_rq_pos(hd_req));
-               }
-               printk("\n");
-       }
-#else
-       printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff);
-       if ((stat & ERR_STAT) == 0) {
-               hd_error = 0;
-       } else {
-               hd_error = inb(HD_ERROR);
-               printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff);
-       }
-#endif
-}
-
-static void check_status(void)
-{
-       int i = inb_p(HD_STATUS);
-
-       if (!OK_STATUS(i)) {
-               dump_status("check_status", i);
-               bad_rw_intr();
-       }
-}
-
-static int controller_busy(void)
-{
-       int retries = 100000;
-       unsigned char status;
-
-       do {
-               status = inb_p(HD_STATUS);
-       } while ((status & BUSY_STAT) && --retries);
-       return status;
-}
-
-static int status_ok(void)
-{
-       unsigned char status = inb_p(HD_STATUS);
-
-       if (status & BUSY_STAT)
-               return 1;       /* Ancient, but does it make sense??? */
-       if (status & WRERR_STAT)
-               return 0;
-       if (!(status & READY_STAT))
-               return 0;
-       if (!(status & SEEK_STAT))
-               return 0;
-       return 1;
-}
-
-static int controller_ready(unsigned int drive, unsigned int head)
-{
-       int retry = 100;
-
-       do {
-               if (controller_busy() & BUSY_STAT)
-                       return 0;
-               outb_p(0xA0 | (drive<<4) | head, HD_CURRENT);
-               if (status_ok())
-                       return 1;
-       } while (--retry);
-       return 0;
-}
-
-static void hd_out(struct hd_i_struct *disk,
-                  unsigned int nsect,
-                  unsigned int sect,
-                  unsigned int head,
-                  unsigned int cyl,
-                  unsigned int cmd,
-                  void (*intr_addr)(void))
-{
-       unsigned short port;
-
-#if (HD_DELAY > 0)
-       while (read_timer() - last_req < HD_DELAY)
-               /* nothing */;
-#endif
-       if (reset)
-               return;
-       if (!controller_ready(disk->unit, head)) {
-               reset = 1;
-               return;
-       }
-       SET_HANDLER(intr_addr);
-       outb_p(disk->ctl, HD_CMD);
-       port = HD_DATA;
-       outb_p(disk->wpcom >> 2, ++port);
-       outb_p(nsect, ++port);
-       outb_p(sect, ++port);
-       outb_p(cyl, ++port);
-       outb_p(cyl >> 8, ++port);
-       outb_p(0xA0 | (disk->unit << 4) | head, ++port);
-       outb_p(cmd, ++port);
-}
-
-static void hd_request (void);
-
-static int drive_busy(void)
-{
-       unsigned int i;
-       unsigned char c;
-
-       for (i = 0; i < 500000 ; i++) {
-               c = inb_p(HD_STATUS);
-               if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK)
-                       return 0;
-       }
-       dump_status("reset timed out", c);
-       return 1;
-}
-
-static void reset_controller(void)
-{
-       int     i;
-
-       outb_p(4, HD_CMD);
-       for (i = 0; i < 1000; i++) barrier();
-       outb_p(hd_info[0].ctl & 0x0f, HD_CMD);
-       for (i = 0; i < 1000; i++) barrier();
-       if (drive_busy())
-               printk("hd: controller still busy\n");
-       else if ((hd_error = inb(HD_ERROR)) != 1)
-               printk("hd: controller reset failed: %02x\n", hd_error);
-}
-
-static void reset_hd(void)
-{
-       static int i;
-
-repeat:
-       if (reset) {
-               reset = 0;
-               i = -1;
-               reset_controller();
-       } else {
-               check_status();
-               if (reset)
-                       goto repeat;
-       }
-       if (++i < NR_HD) {
-               struct hd_i_struct *disk = &hd_info[i];
-               disk->special_op = disk->recalibrate = 1;
-               hd_out(disk, disk->sect, disk->sect, disk->head-1,
-                       disk->cyl, ATA_CMD_INIT_DEV_PARAMS, &reset_hd);
-               if (reset)
-                       goto repeat;
-       } else
-               hd_request();
-}
-
-/*
- * Ok, don't know what to do with the unexpected interrupts: on some machines
- * doing a reset and a retry seems to result in an eternal loop. Right now I
- * ignore it, and just set the timeout.
- *
- * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the
- * drive enters "idle", "standby", or "sleep" mode, so if the status looks
- * "good", we just ignore the interrupt completely.
- */
-static void unexpected_hd_interrupt(void)
-{
-       unsigned int stat = inb_p(HD_STATUS);
-
-       if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) {
-               dump_status("unexpected interrupt", stat);
-               SET_TIMER;
-       }
-}
-
-/*
- * bad_rw_intr() now tries to be a bit smarter and does things
- * according to the error returned by the controller.
- * -Mika Liljeberg (liljeber@cs.Helsinki.FI)
- */
-static void bad_rw_intr(void)
-{
-       struct request *req = hd_req;
-
-       if (req != NULL) {
-               struct hd_i_struct *disk = req->rq_disk->private_data;
-               if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
-                       hd_end_request_cur(-EIO);
-                       disk->special_op = disk->recalibrate = 1;
-               } else if (req->errors % RESET_FREQ == 0)
-                       reset = 1;
-               else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0)
-                       disk->special_op = disk->recalibrate = 1;
-               /* Otherwise just retry */
-       }
-}
-
-static inline int wait_DRQ(void)
-{
-       int retries;
-       int stat;
-
-       for (retries = 0; retries < 100000; retries++) {
-               stat = inb_p(HD_STATUS);
-               if (stat & DRQ_STAT)
-                       return 0;
-       }
-       dump_status("wait_DRQ", stat);
-       return -1;
-}
-
-static void read_intr(void)
-{
-       struct request *req;
-       int i, retries = 100000;
-
-       do {
-               i = (unsigned) inb_p(HD_STATUS);
-               if (i & BUSY_STAT)
-                       continue;
-               if (!OK_STATUS(i))
-                       break;
-               if (i & DRQ_STAT)
-                       goto ok_to_read;
-       } while (--retries > 0);
-       dump_status("read_intr", i);
-       bad_rw_intr();
-       hd_request();
-       return;
-
-ok_to_read:
-       req = hd_req;
-       insw(HD_DATA, bio_data(req->bio), 256);
-#ifdef DEBUG
-       printk("%s: read: sector %ld, remaining = %u, buffer=%p\n",
-              req->rq_disk->disk_name, blk_rq_pos(req) + 1,
-              blk_rq_sectors(req) - 1, bio_data(req->bio)+512);
-#endif
-       if (hd_end_request(0, 512)) {
-               SET_HANDLER(&read_intr);
-               return;
-       }
-
-       (void) inb_p(HD_STATUS);
-#if (HD_DELAY > 0)
-       last_req = read_timer();
-#endif
-       hd_request();
-}
-
-static void write_intr(void)
-{
-       struct request *req = hd_req;
-       int i;
-       int retries = 100000;
-
-       do {
-               i = (unsigned) inb_p(HD_STATUS);
-               if (i & BUSY_STAT)
-                       continue;
-               if (!OK_STATUS(i))
-                       break;
-               if ((blk_rq_sectors(req) <= 1) || (i & DRQ_STAT))
-                       goto ok_to_write;
-       } while (--retries > 0);
-       dump_status("write_intr", i);
-       bad_rw_intr();
-       hd_request();
-       return;
-
-ok_to_write:
-       if (hd_end_request(0, 512)) {
-               SET_HANDLER(&write_intr);
-               outsw(HD_DATA, bio_data(req->bio), 256);
-               return;
-       }
-
-#if (HD_DELAY > 0)
-       last_req = read_timer();
-#endif
-       hd_request();
-}
-
-static void recal_intr(void)
-{
-       check_status();
-#if (HD_DELAY > 0)
-       last_req = read_timer();
-#endif
-       hd_request();
-}
-
-/*
- * This is another of the error-routines I don't know what to do with. The
- * best idea seems to just set reset, and start all over again.
- */
-static void hd_times_out(unsigned long dummy)
-{
-       char *name;
-
-       do_hd = NULL;
-
-       if (!hd_req)
-               return;
-
-       spin_lock_irq(hd_queue->queue_lock);
-       reset = 1;
-       name = hd_req->rq_disk->disk_name;
-       printk("%s: timeout\n", name);
-       if (++hd_req->errors >= MAX_ERRORS) {
-#ifdef DEBUG
-               printk("%s: too many errors\n", name);
-#endif
-               hd_end_request_cur(-EIO);
-       }
-       hd_request();
-       spin_unlock_irq(hd_queue->queue_lock);
-}
-
-static int do_special_op(struct hd_i_struct *disk, struct request *req)
-{
-       if (disk->recalibrate) {
-               disk->recalibrate = 0;
-               hd_out(disk, disk->sect, 0, 0, 0, ATA_CMD_RESTORE, &recal_intr);
-               return reset;
-       }
-       if (disk->head > 16) {
-               printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name);
-               hd_end_request_cur(-EIO);
-       }
-       disk->special_op = 0;
-       return 1;
-}
-
-/*
- * The driver enables interrupts as much as possible.  In order to do this,
- * (a) the device-interrupt is disabled before entering hd_request(),
- * and (b) the timeout-interrupt is disabled before the sti().
- *
- * Interrupts are still masked (by default) whenever we are exchanging
- * data/cmds with a drive, because some drives seem to have very poor
- * tolerance for latency during I/O. The IDE driver has support to unmask
- * interrupts for non-broken hardware, so use that driver if required.
- */
-static void hd_request(void)
-{
-       unsigned int block, nsect, sec, track, head, cyl;
-       struct hd_i_struct *disk;
-       struct request *req;
-
-       if (do_hd)
-               return;
-repeat:
-       del_timer(&device_timer);
-
-       if (!hd_req) {
-               hd_req = blk_fetch_request(hd_queue);
-               if (!hd_req) {
-                       do_hd = NULL;
-                       return;
-               }
-       }
-       req = hd_req;
-
-       if (reset) {
-               reset_hd();
-               return;
-       }
-       disk = req->rq_disk->private_data;
-       block = blk_rq_pos(req);
-       nsect = blk_rq_sectors(req);
-       if (block >= get_capacity(req->rq_disk) ||
-           ((block+nsect) > get_capacity(req->rq_disk))) {
-               printk("%s: bad access: block=%d, count=%d\n",
-                       req->rq_disk->disk_name, block, nsect);
-               hd_end_request_cur(-EIO);
-               goto repeat;
-       }
-
-       if (disk->special_op) {
-               if (do_special_op(disk, req))
-                       goto repeat;
-               return;
-       }
-       sec   = block % disk->sect + 1;
-       track = block / disk->sect;
-       head  = track % disk->head;
-       cyl   = track / disk->head;
-#ifdef DEBUG
-       printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n",
-               req->rq_disk->disk_name,
-               req_data_dir(req) == READ ? "read" : "writ",
-               cyl, head, sec, nsect, bio_data(req->bio));
-#endif
-
-       switch (req_op(req)) {
-       case REQ_OP_READ:
-               hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ,
-                       &read_intr);
-               if (reset)
-                       goto repeat;
-               break;
-       case REQ_OP_WRITE:
-               hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE,
-                       &write_intr);
-               if (reset)
-                       goto repeat;
-               if (wait_DRQ()) {
-                       bad_rw_intr();
-                       goto repeat;
-               }
-               outsw(HD_DATA, bio_data(req->bio), 256);
-               break;
-       default:
-               printk("unknown hd-command\n");
-               hd_end_request_cur(-EIO);
-               break;
-       }
-}
-
-static void do_hd_request(struct request_queue *q)
-{
-       hd_request();
-}
-
-static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-       struct hd_i_struct *disk = bdev->bd_disk->private_data;
-
-       geo->heads = disk->head;
-       geo->sectors = disk->sect;
-       geo->cylinders = disk->cyl;
-       return 0;
-}
-
-/*
- * Releasing a block device means we sync() it, so that it can safely
- * be forgotten about...
- */
-
-static irqreturn_t hd_interrupt(int irq, void *dev_id)
-{
-       void (*handler)(void) = do_hd;
-
-       spin_lock(hd_queue->queue_lock);
-
-       do_hd = NULL;
-       del_timer(&device_timer);
-       if (!handler)
-               handler = unexpected_hd_interrupt;
-       handler();
-
-       spin_unlock(hd_queue->queue_lock);
-
-       return IRQ_HANDLED;
-}
-
-static const struct block_device_operations hd_fops = {
-       .getgeo =       hd_getgeo,
-};
-
-static int __init hd_init(void)
-{
-       int drive;
-
-       if (register_blkdev(HD_MAJOR, "hd"))
-               return -1;
-
-       hd_queue = blk_init_queue(do_hd_request, &hd_lock);
-       if (!hd_queue) {
-               unregister_blkdev(HD_MAJOR, "hd");
-               return -ENOMEM;
-       }
-
-       blk_queue_max_hw_sectors(hd_queue, 255);
-       init_timer(&device_timer);
-       device_timer.function = hd_times_out;
-       blk_queue_logical_block_size(hd_queue, 512);
-
-       if (!NR_HD) {
-               /*
-                * We don't know anything about the drive.  This means
-                * that you *MUST* specify the drive parameters to the
-                * kernel yourself.
-                *
-                * If we were on an i386, we used to read this info from
-                * the BIOS or CMOS.  This doesn't work all that well,
-                * since this assumes that this is a primary or secondary
-                * drive, and if we're using this legacy driver, it's
-                * probably an auxiliary controller added to recover
-                * legacy data off an ST-506 drive.  Either way, it's
-                * definitely safest to have the user explicitly specify
-                * the information.
-                */
-               printk("hd: no drives specified - use hd=cyl,head,sectors"
-                       " on kernel command line\n");
-               goto out;
-       }
-
-       for (drive = 0 ; drive < NR_HD ; drive++) {
-               struct gendisk *disk = alloc_disk(64);
-               struct hd_i_struct *p = &hd_info[drive];
-               if (!disk)
-                       goto Enomem;
-               disk->major = HD_MAJOR;
-               disk->first_minor = drive << 6;
-               disk->fops = &hd_fops;
-               sprintf(disk->disk_name, "hd%c", 'a'+drive);
-               disk->private_data = p;
-               set_capacity(disk, p->head * p->sect * p->cyl);
-               disk->queue = hd_queue;
-               p->unit = drive;
-               hd_gendisk[drive] = disk;
-               printk("%s: %luMB, CHS=%d/%d/%d\n",
-                       disk->disk_name, (unsigned long)get_capacity(disk)/2048,
-                       p->cyl, p->head, p->sect);
-       }
-
-       if (request_irq(HD_IRQ, hd_interrupt, 0, "hd", NULL)) {
-               printk("hd: unable to get IRQ%d for the hard disk driver\n",
-                       HD_IRQ);
-               goto out1;
-       }
-       if (!request_region(HD_DATA, 8, "hd")) {
-               printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA);
-               goto out2;
-       }
-       if (!request_region(HD_CMD, 1, "hd(cmd)")) {
-               printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD);
-               goto out3;
-       }
-
-       /* Let them fly */
-       for (drive = 0; drive < NR_HD; drive++)
-               add_disk(hd_gendisk[drive]);
-
-       return 0;
-
-out3:
-       release_region(HD_DATA, 8);
-out2:
-       free_irq(HD_IRQ, NULL);
-out1:
-       for (drive = 0; drive < NR_HD; drive++)
-               put_disk(hd_gendisk[drive]);
-       NR_HD = 0;
-out:
-       del_timer(&device_timer);
-       unregister_blkdev(HD_MAJOR, "hd");
-       blk_cleanup_queue(hd_queue);
-       return -1;
-Enomem:
-       while (drive--)
-               put_disk(hd_gendisk[drive]);
-       goto out;
-}
-
-static int __init parse_hd_setup(char *line)
-{
-       int ints[6];
-
-       (void) get_options(line, ARRAY_SIZE(ints), ints);
-       hd_setup(NULL, ints);
-
-       return 1;
-}
-__setup("hd=", parse_hd_setup);
-
-late_initcall(hd_init);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c

index 0ecb6461ed81e20b5483b0622850d0cb64e3b642..994403efee19de163238b62e722fb6a478b5f7d5 100644 (file)
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -445,32 +445,27 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq)
         return ret;
  }
  
-static inline void handle_partial_read(struct loop_cmd *cmd, long bytes)
+static void lo_complete_rq(struct request *rq)
  {
-       if (bytes < 0 || op_is_write(req_op(cmd->rq)))
-               return;
+       struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
  
-       if (unlikely(bytes < blk_rq_bytes(cmd->rq))) {
+       if (unlikely(req_op(cmd->rq) == REQ_OP_READ && cmd->use_aio &&
+                    cmd->ret >= 0 && cmd->ret < blk_rq_bytes(cmd->rq))) {
                 struct bio *bio = cmd->rq->bio;
  
-               bio_advance(bio, bytes);
+               bio_advance(bio, cmd->ret);
                 zero_fill_bio(bio);
         }
+
+       blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0);
  }
  
  static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
  {
         struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
-       struct request *rq = cmd->rq;
-
-       handle_partial_read(cmd, ret);
  
-       if (ret > 0)
-               ret = 0;
-       else if (ret < 0)
-               ret = -EIO;
-
-       blk_mq_complete_request(rq, ret);
+       cmd->ret = ret;
+       blk_mq_complete_request(cmd->rq);
  }
  
  static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
@@ -528,6 +523,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
         case REQ_OP_FLUSH:
                 return lo_req_flush(lo, rq);
         case REQ_OP_DISCARD:
+       case REQ_OP_WRITE_ZEROES:
                 return lo_discard(lo, rq, pos);
         case REQ_OP_WRITE:
                 if (lo->transfer)
@@ -826,7 +822,7 @@ static void loop_config_discard(struct loop_device *lo)
                 q->limits.discard_granularity = 0;
                 q->limits.discard_alignment = 0;
                 blk_queue_max_discard_sectors(q, 0);
-               q->limits.discard_zeroes_data = 0;
+               blk_queue_max_write_zeroes_sectors(q, 0);
                 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
                 return;
         }
@@ -834,7 +830,7 @@ static void loop_config_discard(struct loop_device *lo)
         q->limits.discard_granularity = inode->i_sb->s_blocksize;
         q->limits.discard_alignment = 0;
         blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
-       q->limits.discard_zeroes_data = 1;
+       blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
  }
  
@@ -1660,6 +1656,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
         switch (req_op(cmd->rq)) {
         case REQ_OP_FLUSH:
         case REQ_OP_DISCARD:
+       case REQ_OP_WRITE_ZEROES:
                 cmd->use_aio = false;
                 break;
         default:
@@ -1686,8 +1683,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
         ret = do_req_filebacked(lo, cmd->rq);
   failed:
         /* complete non-aio request */
-       if (!cmd->use_aio || ret)
-               blk_mq_complete_request(cmd->rq, ret ? -EIO : 0);
+       if (!cmd->use_aio || ret) {
+               cmd->ret = ret ? -EIO : 0;
+               blk_mq_complete_request(cmd->rq);
+       }
  }
  
  static void loop_queue_work(struct kthread_work *work)
@@ -1710,9 +1709,10 @@ static int loop_init_request(void *data, struct request *rq,
         return 0;
  }
  
-static struct blk_mq_ops loop_mq_ops = {
+static const struct blk_mq_ops loop_mq_ops = {
         .queue_rq       = loop_queue_rq,
         .init_request   = loop_init_request,
+       .complete       = lo_complete_rq,
  };
  
  static int loop_add(struct loop_device **l, int i)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h

index fb2237c73e618ed78eaa5ea2835817e9a53222cb..fecd3f97ef8c7cd9f825e6c58777a1a2bc6f3461 100644 (file)
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -70,6 +70,7 @@ struct loop_cmd {
         struct request *rq;
         struct list_head list;
         bool use_aio;           /* use AIO interface to handle I/O */
+       long ret;
         struct kiocb iocb;
  };
  
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c

deleted file mode 100644 (file)

index 286f276..0000000
--- a/drivers/block/mg_disk.c
+++ /dev/null
@@ -1,1112 +0,0 @@
-/*
- *  drivers/block/mg_disk.c
- *
- *  Support for the mGine m[g]flash IO mode.
- *  Based on legacy hd.c
- *
- * (c) 2008 mGine Co.,LTD
- * (c) 2008 unsik Kim <donari75@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/blkdev.h>
-#include <linux/hdreg.h>
-#include <linux/ata.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/gpio.h>
-#include <linux/mg_disk.h>
-#include <linux/slab.h>
-
-#define MG_RES_SEC (CONFIG_MG_DISK_RES << 1)
-
-/* name for block device */
-#define MG_DISK_NAME "mgd"
-
-#define MG_DISK_MAJ 0
-#define MG_DISK_MAX_PART 16
-#define MG_SECTOR_SIZE 512
-#define MG_MAX_SECTS 256
-
-/* Register offsets */
-#define MG_BUFF_OFFSET                 0x8000
-#define MG_REG_OFFSET                  0xC000
-#define MG_REG_FEATURE                 (MG_REG_OFFSET + 2)     /* write case */
-#define MG_REG_ERROR                   (MG_REG_OFFSET + 2)     /* read case */
-#define MG_REG_SECT_CNT                        (MG_REG_OFFSET + 4)
-#define MG_REG_SECT_NUM                        (MG_REG_OFFSET + 6)
-#define MG_REG_CYL_LOW                 (MG_REG_OFFSET + 8)
-#define MG_REG_CYL_HIGH                        (MG_REG_OFFSET + 0xA)
-#define MG_REG_DRV_HEAD                        (MG_REG_OFFSET + 0xC)
-#define MG_REG_COMMAND                 (MG_REG_OFFSET + 0xE)   /* write case */
-#define MG_REG_STATUS                  (MG_REG_OFFSET + 0xE)   /* read  case */
-#define MG_REG_DRV_CTRL                        (MG_REG_OFFSET + 0x10)
-#define MG_REG_BURST_CTRL              (MG_REG_OFFSET + 0x12)
-
-/* handy status */
-#define MG_STAT_READY  (ATA_DRDY | ATA_DSC)
-#define MG_READY_OK(s) (((s) & (MG_STAT_READY | (ATA_BUSY | ATA_DF | \
-                                ATA_ERR))) == MG_STAT_READY)
-
-/* error code for others */
-#define MG_ERR_NONE            0
-#define MG_ERR_TIMEOUT         0x100
-#define MG_ERR_INIT_STAT       0x101
-#define MG_ERR_TRANSLATION     0x102
-#define MG_ERR_CTRL_RST                0x103
-#define MG_ERR_INV_STAT                0x104
-#define MG_ERR_RSTOUT          0x105
-
-#define MG_MAX_ERRORS  6       /* Max read/write errors */
-
-/* command */
-#define MG_CMD_RD 0x20
-#define MG_CMD_WR 0x30
-#define MG_CMD_SLEEP 0x99
-#define MG_CMD_WAKEUP 0xC3
-#define MG_CMD_ID 0xEC
-#define MG_CMD_WR_CONF 0x3C
-#define MG_CMD_RD_CONF 0x40
-
-/* operation mode */
-#define MG_OP_CASCADE (1 << 0)
-#define MG_OP_CASCADE_SYNC_RD (1 << 1)
-#define MG_OP_CASCADE_SYNC_WR (1 << 2)
-#define MG_OP_INTERLEAVE (1 << 3)
-
-/* synchronous */
-#define MG_BURST_LAT_4 (3 << 4)
-#define MG_BURST_LAT_5 (4 << 4)
-#define MG_BURST_LAT_6 (5 << 4)
-#define MG_BURST_LAT_7 (6 << 4)
-#define MG_BURST_LAT_8 (7 << 4)
-#define MG_BURST_LEN_4 (1 << 1)
-#define MG_BURST_LEN_8 (2 << 1)
-#define MG_BURST_LEN_16 (3 << 1)
-#define MG_BURST_LEN_32 (4 << 1)
-#define MG_BURST_LEN_CONT (0 << 1)
-
-/* timeout value (unit: ms) */
-#define MG_TMAX_CONF_TO_CMD    1
-#define MG_TMAX_WAIT_RD_DRQ    10
-#define MG_TMAX_WAIT_WR_DRQ    500
-#define MG_TMAX_RST_TO_BUSY    10
-#define MG_TMAX_HDRST_TO_RDY   500
-#define MG_TMAX_SWRST_TO_RDY   500
-#define MG_TMAX_RSTOUT         3000
-
-#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST)
-
-/* main structure for mflash driver */
-struct mg_host {
-       struct device *dev;
-
-       struct request_queue *breq;
-       struct request *req;
-       spinlock_t lock;
-       struct gendisk *gd;
-
-       struct timer_list timer;
-       void (*mg_do_intr) (struct mg_host *);
-
-       u16 id[ATA_ID_WORDS];
-
-       u16 cyls;
-       u16 heads;
-       u16 sectors;
-       u32 n_sectors;
-       u32 nres_sectors;
-
-       void __iomem *dev_base;
-       unsigned int irq;
-       unsigned int rst;
-       unsigned int rstout;
-
-       u32 major;
-       u32 error;
-};
-
-/*
- * Debugging macro and defines
- */
-#undef DO_MG_DEBUG
-#ifdef DO_MG_DEBUG
-#  define MG_DBG(fmt, args...) \
-       printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args)
-#else /* CONFIG_MG_DEBUG */
-#  define MG_DBG(fmt, args...) do { } while (0)
-#endif /* CONFIG_MG_DEBUG */
-
-static void mg_request(struct request_queue *);
-
-static bool mg_end_request(struct mg_host *host, int err, unsigned int nr_bytes)
-{
-       if (__blk_end_request(host->req, err, nr_bytes))
-               return true;
-
-       host->req = NULL;
-       return false;
-}
-
-static bool mg_end_request_cur(struct mg_host *host, int err)
-{
-       return mg_end_request(host, err, blk_rq_cur_bytes(host->req));
-}
-
-static void mg_dump_status(const char *msg, unsigned int stat,
-               struct mg_host *host)
-{
-       char *name = MG_DISK_NAME;
-
-       if (host->req)
-               name = host->req->rq_disk->disk_name;
-
-       printk(KERN_ERR "%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
-       if (stat & ATA_BUSY)
-               printk("Busy ");
-       if (stat & ATA_DRDY)
-               printk("DriveReady ");
-       if (stat & ATA_DF)
-               printk("WriteFault ");
-       if (stat & ATA_DSC)
-               printk("SeekComplete ");
-       if (stat & ATA_DRQ)
-               printk("DataRequest ");
-       if (stat & ATA_CORR)
-               printk("CorrectedError ");
-       if (stat & ATA_ERR)
-               printk("Error ");
-       printk("}\n");
-       if ((stat & ATA_ERR) == 0) {
-               host->error = 0;
-       } else {
-               host->error = inb((unsigned long)host->dev_base + MG_REG_ERROR);
-               printk(KERN_ERR "%s: %s: error=0x%02x { ", name, msg,
-                               host->error & 0xff);
-               if (host->error & ATA_BBK)
-                       printk("BadSector ");
-               if (host->error & ATA_UNC)
-                       printk("UncorrectableError ");
-               if (host->error & ATA_IDNF)
-                       printk("SectorIdNotFound ");
-               if (host->error & ATA_ABORTED)
-                       printk("DriveStatusError ");
-               if (host->error & ATA_AMNF)
-                       printk("AddrMarkNotFound ");
-               printk("}");
-               if (host->error & (ATA_BBK | ATA_UNC | ATA_IDNF | ATA_AMNF)) {
-                       if (host->req)
-                               printk(", sector=%u",
-                                      (unsigned int)blk_rq_pos(host->req));
-               }
-               printk("\n");
-       }
-}
-
-static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec)
-{
-       u8 status;
-       unsigned long expire, cur_jiffies;
-       struct mg_drv_data *prv_data = host->dev->platform_data;
-
-       host->error = MG_ERR_NONE;
-       expire = jiffies + msecs_to_jiffies(msec);
-
-       /* These 2 times dummy status read prevents reading invalid
-        * status. A very little time (3 times of mflash operating clk)
-        * is required for busy bit is set. Use dummy read instead of
-        * busy wait, because mflash's PLL is machine dependent.
-        */
-       if (prv_data->use_polling) {
-               status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-               status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-       }
-
-       status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-
-       do {
-               cur_jiffies = jiffies;
-               if (status & ATA_BUSY) {
-                       if (expect == ATA_BUSY)
-                               break;
-               } else {
-                       /* Check the error condition! */
-                       if (status & ATA_ERR) {
-                               mg_dump_status("mg_wait", status, host);
-                               break;
-                       }
-
-                       if (expect == MG_STAT_READY)
-                               if (MG_READY_OK(status))
-                                       break;
-
-                       if (expect == ATA_DRQ)
-                               if (status & ATA_DRQ)
-                                       break;
-               }
-               if (!msec) {
-                       mg_dump_status("not ready", status, host);
-                       return MG_ERR_INV_STAT;
-               }
-
-               status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-       } while (time_before(cur_jiffies, expire));
-
-       if (time_after_eq(cur_jiffies, expire) && msec)
-               host->error = MG_ERR_TIMEOUT;
-
-       return host->error;
-}
-
-static unsigned int mg_wait_rstout(u32 rstout, u32 msec)
-{
-       unsigned long expire;
-
-       expire = jiffies + msecs_to_jiffies(msec);
-       while (time_before(jiffies, expire)) {
-               if (gpio_get_value(rstout) == 1)
-                       return MG_ERR_NONE;
-               msleep(10);
-       }
-
-       return MG_ERR_RSTOUT;
-}
-
-static void mg_unexpected_intr(struct mg_host *host)
-{
-       u32 status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-
-       mg_dump_status("mg_unexpected_intr", status, host);
-}
-
-static irqreturn_t mg_irq(int irq, void *dev_id)
-{
-       struct mg_host *host = dev_id;
-       void (*handler)(struct mg_host *) = host->mg_do_intr;
-
-       spin_lock(&host->lock);
-
-       host->mg_do_intr = NULL;
-       del_timer(&host->timer);
-       if (!handler)
-               handler = mg_unexpected_intr;
-       handler(host);
-
-       spin_unlock(&host->lock);
-
-       return IRQ_HANDLED;
-}
-
-/* local copy of ata_id_string() */
-static void mg_id_string(const u16 *id, unsigned char *s,
-                        unsigned int ofs, unsigned int len)
-{
-       unsigned int c;
-
-       BUG_ON(len & 1);
-
-       while (len > 0) {
-               c = id[ofs] >> 8;
-               *s = c;
-               s++;
-
-               c = id[ofs] & 0xff;
-               *s = c;
-               s++;
-
-               ofs++;
-               len -= 2;
-       }
-}
-
-/* local copy of ata_id_c_string() */
-static void mg_id_c_string(const u16 *id, unsigned char *s,
-                          unsigned int ofs, unsigned int len)
-{
-       unsigned char *p;
-
-       mg_id_string(id, s, ofs, len - 1);
-
-       p = s + strnlen(s, len - 1);
-       while (p > s && p[-1] == ' ')
-               p--;
-       *p = '\0';
-}
-
-static int mg_get_disk_id(struct mg_host *host)
-{
-       u32 i;
-       s32 err;
-       const u16 *id = host->id;
-       struct mg_drv_data *prv_data = host->dev->platform_data;
-       char fwrev[ATA_ID_FW_REV_LEN + 1];
-       char model[ATA_ID_PROD_LEN + 1];
-       char serial[ATA_ID_SERNO_LEN + 1];
-
-       if (!prv_data->use_polling)
-               outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-
-       outb(MG_CMD_ID, (unsigned long)host->dev_base + MG_REG_COMMAND);
-       err = mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_RD_DRQ);
-       if (err)
-               return err;
-
-       for (i = 0; i < (MG_SECTOR_SIZE >> 1); i++)
-               host->id[i] = le16_to_cpu(inw((unsigned long)host->dev_base +
-                                       MG_BUFF_OFFSET + i * 2));
-
-       outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
-       err = mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD);
-       if (err)
-               return err;
-
-       if ((id[ATA_ID_FIELD_VALID] & 1) == 0)
-               return MG_ERR_TRANSLATION;
-
-       host->n_sectors = ata_id_u32(id, ATA_ID_LBA_CAPACITY);
-       host->cyls = id[ATA_ID_CYLS];
-       host->heads = id[ATA_ID_HEADS];
-       host->sectors = id[ATA_ID_SECTORS];
-
-       if (MG_RES_SEC && host->heads && host->sectors) {
-               /* modify cyls, n_sectors */
-               host->cyls = (host->n_sectors - MG_RES_SEC) /
-                       host->heads / host->sectors;
-               host->nres_sectors = host->n_sectors - host->cyls *
-                       host->heads * host->sectors;
-               host->n_sectors -= host->nres_sectors;
-       }
-
-       mg_id_c_string(id, fwrev, ATA_ID_FW_REV, sizeof(fwrev));
-       mg_id_c_string(id, model, ATA_ID_PROD, sizeof(model));
-       mg_id_c_string(id, serial, ATA_ID_SERNO, sizeof(serial));
-       printk(KERN_INFO "mg_disk: model: %s\n", model);
-       printk(KERN_INFO "mg_disk: firm: %.8s\n", fwrev);
-       printk(KERN_INFO "mg_disk: serial: %s\n", serial);
-       printk(KERN_INFO "mg_disk: %d + reserved %d sectors\n",
-                       host->n_sectors, host->nres_sectors);
-
-       if (!prv_data->use_polling)
-               outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-
-       return err;
-}
-
-
-static int mg_disk_init(struct mg_host *host)
-{
-       struct mg_drv_data *prv_data = host->dev->platform_data;
-       s32 err;
-       u8 init_status;
-
-       /* hdd rst low */
-       gpio_set_value(host->rst, 0);
-       err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY);
-       if (err)
-               return err;
-
-       /* hdd rst high */
-       gpio_set_value(host->rst, 1);
-       err = mg_wait(host, MG_STAT_READY, MG_TMAX_HDRST_TO_RDY);
-       if (err)
-               return err;
-
-       /* soft reset on */
-       outb(ATA_SRST | (prv_data->use_polling ? ATA_NIEN : 0),
-                       (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-       err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY);
-       if (err)
-               return err;
-
-       /* soft reset off */
-       outb(prv_data->use_polling ? ATA_NIEN : 0,
-                       (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-       err = mg_wait(host, MG_STAT_READY, MG_TMAX_SWRST_TO_RDY);
-       if (err)
-               return err;
-
-       init_status = inb((unsigned long)host->dev_base + MG_REG_STATUS) & 0xf;
-
-       if (init_status == 0xf)
-               return MG_ERR_INIT_STAT;
-
-       return err;
-}
-
-static void mg_bad_rw_intr(struct mg_host *host)
-{
-       if (host->req)
-               if (++host->req->errors >= MG_MAX_ERRORS ||
-                   host->error == MG_ERR_TIMEOUT)
-                       mg_end_request_cur(host, -EIO);
-}
-
-static unsigned int mg_out(struct mg_host *host,
-               unsigned int sect_num,
-               unsigned int sect_cnt,
-               unsigned int cmd,
-               void (*intr_addr)(struct mg_host *))
-{
-       struct mg_drv_data *prv_data = host->dev->platform_data;
-
-       if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
-               return host->error;
-
-       if (!prv_data->use_polling) {
-               host->mg_do_intr = intr_addr;
-               mod_timer(&host->timer, jiffies + 3 * HZ);
-       }
-       if (MG_RES_SEC)
-               sect_num += MG_RES_SEC;
-       outb((u8)sect_cnt, (unsigned long)host->dev_base + MG_REG_SECT_CNT);
-       outb((u8)sect_num, (unsigned long)host->dev_base + MG_REG_SECT_NUM);
-       outb((u8)(sect_num >> 8), (unsigned long)host->dev_base +
-                       MG_REG_CYL_LOW);
-       outb((u8)(sect_num >> 16), (unsigned long)host->dev_base +
-                       MG_REG_CYL_HIGH);
-       outb((u8)((sect_num >> 24) | ATA_LBA | ATA_DEVICE_OBS),
-                       (unsigned long)host->dev_base + MG_REG_DRV_HEAD);
-       outb(cmd, (unsigned long)host->dev_base + MG_REG_COMMAND);
-       return MG_ERR_NONE;
-}
-
-static void mg_read_one(struct mg_host *host, struct request *req)
-{
-       u16 *buff = (u16 *)bio_data(req->bio);
-       u32 i;
-
-       for (i = 0; i < MG_SECTOR_SIZE >> 1; i++)
-               *buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET +
-                             (i << 1));
-}
-
-static void mg_read(struct request *req)
-{
-       struct mg_host *host = req->rq_disk->private_data;
-
-       if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req),
-                  MG_CMD_RD, NULL) != MG_ERR_NONE)
-               mg_bad_rw_intr(host);
-
-       MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-              blk_rq_sectors(req), blk_rq_pos(req), bio_data(req->bio));
-
-       do {
-               if (mg_wait(host, ATA_DRQ,
-                           MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) {
-                       mg_bad_rw_intr(host);
-                       return;
-               }
-
-               mg_read_one(host, req);
-
-               outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base +
-                               MG_REG_COMMAND);
-       } while (mg_end_request(host, 0, MG_SECTOR_SIZE));
-}
-
-static void mg_write_one(struct mg_host *host, struct request *req)
-{
-       u16 *buff = (u16 *)bio_data(req->bio);
-       u32 i;
-
-       for (i = 0; i < MG_SECTOR_SIZE >> 1; i++)
-               outw(*buff++, (unsigned long)host->dev_base + MG_BUFF_OFFSET +
-                    (i << 1));
-}
-
-static void mg_write(struct request *req)
-{
-       struct mg_host *host = req->rq_disk->private_data;
-       unsigned int rem = blk_rq_sectors(req);
-
-       if (mg_out(host, blk_rq_pos(req), rem,
-                  MG_CMD_WR, NULL) != MG_ERR_NONE) {
-               mg_bad_rw_intr(host);
-               return;
-       }
-
-       MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-              rem, blk_rq_pos(req), bio_data(req->bio));
-
-       if (mg_wait(host, ATA_DRQ,
-                   MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
-               mg_bad_rw_intr(host);
-               return;
-       }
-
-       do {
-               mg_write_one(host, req);
-
-               outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
-                               MG_REG_COMMAND);
-
-               rem--;
-               if (rem > 1 && mg_wait(host, ATA_DRQ,
-                                       MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
-                       mg_bad_rw_intr(host);
-                       return;
-               } else if (mg_wait(host, MG_STAT_READY,
-                                       MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
-                       mg_bad_rw_intr(host);
-                       return;
-               }
-       } while (mg_end_request(host, 0, MG_SECTOR_SIZE));
-}
-
-static void mg_read_intr(struct mg_host *host)
-{
-       struct request *req = host->req;
-       u32 i;
-
-       /* check status */
-       do {
-               i = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-               if (i & ATA_BUSY)
-                       break;
-               if (!MG_READY_OK(i))
-                       break;
-               if (i & ATA_DRQ)
-                       goto ok_to_read;
-       } while (0);
-       mg_dump_status("mg_read_intr", i, host);
-       mg_bad_rw_intr(host);
-       mg_request(host->breq);
-       return;
-
-ok_to_read:
-       mg_read_one(host, req);
-
-       MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-              blk_rq_pos(req), blk_rq_sectors(req) - 1, bio_data(req->bio));
-
-       /* send read confirm */
-       outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
-
-       if (mg_end_request(host, 0, MG_SECTOR_SIZE)) {
-               /* set handler if read remains */
-               host->mg_do_intr = mg_read_intr;
-               mod_timer(&host->timer, jiffies + 3 * HZ);
-       } else /* goto next request */
-               mg_request(host->breq);
-}
-
-static void mg_write_intr(struct mg_host *host)
-{
-       struct request *req = host->req;
-       u32 i;
-       bool rem;
-
-       /* check status */
-       do {
-               i = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-               if (i & ATA_BUSY)
-                       break;
-               if (!MG_READY_OK(i))
-                       break;
-               if ((blk_rq_sectors(req) <= 1) || (i & ATA_DRQ))
-                       goto ok_to_write;
-       } while (0);
-       mg_dump_status("mg_write_intr", i, host);
-       mg_bad_rw_intr(host);
-       mg_request(host->breq);
-       return;
-
-ok_to_write:
-       if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) {
-               /* write 1 sector and set handler if remains */
-               mg_write_one(host, req);
-               MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-                      blk_rq_pos(req), blk_rq_sectors(req), bio_data(req->bio));
-               host->mg_do_intr = mg_write_intr;
-               mod_timer(&host->timer, jiffies + 3 * HZ);
-       }
-
-       /* send write confirm */
-       outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
-
-       if (!rem)
-               mg_request(host->breq);
-}
-
-static void mg_times_out(unsigned long data)
-{
-       struct mg_host *host = (struct mg_host *)data;
-       char *name;
-
-       spin_lock_irq(&host->lock);
-
-       if (!host->req)
-               goto out_unlock;
-
-       host->mg_do_intr = NULL;
-
-       name = host->req->rq_disk->disk_name;
-       printk(KERN_DEBUG "%s: timeout\n", name);
-
-       host->error = MG_ERR_TIMEOUT;
-       mg_bad_rw_intr(host);
-
-out_unlock:
-       mg_request(host->breq);
-       spin_unlock_irq(&host->lock);
-}
-
-static void mg_request_poll(struct request_queue *q)
-{
-       struct mg_host *host = q->queuedata;
-
-       while (1) {
-               if (!host->req) {
-                       host->req = blk_fetch_request(q);
-                       if (!host->req)
-                               break;
-               }
-
-               switch (req_op(host->req)) {
-               case REQ_OP_READ:
-                       mg_read(host->req);
-                       break;
-               case REQ_OP_WRITE:
-                       mg_write(host->req);
-                       break;
-               default:
-                       mg_end_request_cur(host, -EIO);
-                       break;
-               }
-       }
-}
-
-static unsigned int mg_issue_req(struct request *req,
-               struct mg_host *host,
-               unsigned int sect_num,
-               unsigned int sect_cnt)
-{
-       switch (req_op(host->req)) {
-       case REQ_OP_READ:
-               if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr)
-                               != MG_ERR_NONE) {
-                       mg_bad_rw_intr(host);
-                       return host->error;
-               }
-               break;
-       case REQ_OP_WRITE:
-               /* TODO : handler */
-               outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-               if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr)
-                               != MG_ERR_NONE) {
-                       mg_bad_rw_intr(host);
-                       return host->error;
-               }
-               del_timer(&host->timer);
-               mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ);
-               outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-               if (host->error) {
-                       mg_bad_rw_intr(host);
-                       return host->error;
-               }
-               mg_write_one(host, req);
-               mod_timer(&host->timer, jiffies + 3 * HZ);
-               outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
-                               MG_REG_COMMAND);
-               break;
-       default:
-               mg_end_request_cur(host, -EIO);
-               break;
-       }
-       return MG_ERR_NONE;
-}
-
-/* This function also called from IRQ context */
-static void mg_request(struct request_queue *q)
-{
-       struct mg_host *host = q->queuedata;
-       struct request *req;
-       u32 sect_num, sect_cnt;
-
-       while (1) {
-               if (!host->req) {
-                       host->req = blk_fetch_request(q);
-                       if (!host->req)
-                               break;
-               }
-               req = host->req;
-
-               /* check unwanted request call */
-               if (host->mg_do_intr)
-                       return;
-
-               del_timer(&host->timer);
-
-               sect_num = blk_rq_pos(req);
-               /* deal whole segments */
-               sect_cnt = blk_rq_sectors(req);
-
-               /* sanity check */
-               if (sect_num >= get_capacity(req->rq_disk) ||
-                               ((sect_num + sect_cnt) >
-                                get_capacity(req->rq_disk))) {
-                       printk(KERN_WARNING
-                                       "%s: bad access: sector=%d, count=%d\n",
-                                       req->rq_disk->disk_name,
-                                       sect_num, sect_cnt);
-                       mg_end_request_cur(host, -EIO);
-                       continue;
-               }
-
-               if (!mg_issue_req(req, host, sect_num, sect_cnt))
-                       return;
-       }
-}
-
-static int mg_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-       struct mg_host *host = bdev->bd_disk->private_data;
-
-       geo->cylinders = (unsigned short)host->cyls;
-       geo->heads = (unsigned char)host->heads;
-       geo->sectors = (unsigned char)host->sectors;
-       return 0;
-}
-
-static const struct block_device_operations mg_disk_ops = {
-       .getgeo = mg_getgeo
-};
-
-#ifdef CONFIG_PM_SLEEP
-static int mg_suspend(struct device *dev)
-{
-       struct mg_drv_data *prv_data = dev->platform_data;
-       struct mg_host *host = prv_data->host;
-
-       if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
-               return -EIO;
-
-       if (!prv_data->use_polling)
-               outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-
-       outb(MG_CMD_SLEEP, (unsigned long)host->dev_base + MG_REG_COMMAND);
-       /* wait until mflash deep sleep */
-       msleep(1);
-
-       if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) {
-               if (!prv_data->use_polling)
-                       outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-               return -EIO;
-       }
-
-       return 0;
-}
-
-static int mg_resume(struct device *dev)
-{
-       struct mg_drv_data *prv_data = dev->platform_data;
-       struct mg_host *host = prv_data->host;
-
-       if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
-               return -EIO;
-
-       outb(MG_CMD_WAKEUP, (unsigned long)host->dev_base + MG_REG_COMMAND);
-       /* wait until mflash wakeup */
-       msleep(1);
-
-       if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
-               return -EIO;
-
-       if (!prv_data->use_polling)
-               outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-
-       return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(mg_pm, mg_suspend, mg_resume);
-
-static int mg_probe(struct platform_device *plat_dev)
-{
-       struct mg_host *host;
-       struct resource *rsc;
-       struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
-       int err = 0;
-
-       if (!prv_data) {
-               printk(KERN_ERR "%s:%d fail (no driver_data)\n",
-                               __func__, __LINE__);
-               err = -EINVAL;
-               goto probe_err;
-       }
-
-       /* alloc mg_host */
-       host = kzalloc(sizeof(struct mg_host), GFP_KERNEL);
-       if (!host) {
-               printk(KERN_ERR "%s:%d fail (no memory for mg_host)\n",
-                               __func__, __LINE__);
-               err = -ENOMEM;
-               goto probe_err;
-       }
-       host->major = MG_DISK_MAJ;
-
-       /* link each other */
-       prv_data->host = host;
-       host->dev = &plat_dev->dev;
-
-       /* io remap */
-       rsc = platform_get_resource(plat_dev, IORESOURCE_MEM, 0);
-       if (!rsc) {
-               printk(KERN_ERR "%s:%d platform_get_resource fail\n",
-                               __func__, __LINE__);
-               err = -EINVAL;
-               goto probe_err_2;
-       }
-       host->dev_base = ioremap(rsc->start, resource_size(rsc));
-       if (!host->dev_base) {
-               printk(KERN_ERR "%s:%d ioremap fail\n",
-                               __func__, __LINE__);
-               err = -EIO;
-               goto probe_err_2;
-       }
-       MG_DBG("dev_base = 0x%x\n", (u32)host->dev_base);
-
-       /* get reset pin */
-       rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO,
-                       MG_RST_PIN);
-       if (!rsc) {
-               printk(KERN_ERR "%s:%d get reset pin fail\n",
-                               __func__, __LINE__);
-               err = -EIO;
-               goto probe_err_3;
-       }
-       host->rst = rsc->start;
-
-       /* init rst pin */
-       err = gpio_request(host->rst, MG_RST_PIN);
-       if (err)
-               goto probe_err_3;
-       gpio_direction_output(host->rst, 1);
-
-       /* reset out pin */
-       if (!(prv_data->dev_attr & MG_DEV_MASK)) {
-               err = -EINVAL;
-               goto probe_err_3a;
-       }
-
-       if (prv_data->dev_attr != MG_BOOT_DEV) {
-               rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO,
-                               MG_RSTOUT_PIN);
-               if (!rsc) {
-                       printk(KERN_ERR "%s:%d get reset-out pin fail\n",
-                                       __func__, __LINE__);
-                       err = -EIO;
-                       goto probe_err_3a;
-               }
-               host->rstout = rsc->start;
-               err = gpio_request(host->rstout, MG_RSTOUT_PIN);
-               if (err)
-                       goto probe_err_3a;
-               gpio_direction_input(host->rstout);
-       }
-
-       /* disk reset */
-       if (prv_data->dev_attr == MG_STORAGE_DEV) {
-               /* If POR seq. not yet finished, wait */
-               err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT);
-               if (err)
-                       goto probe_err_3b;
-               err = mg_disk_init(host);
-               if (err) {
-                       printk(KERN_ERR "%s:%d fail (err code : %d)\n",
-                                       __func__, __LINE__, err);
-                       err = -EIO;
-                       goto probe_err_3b;
-               }
-       }
-
-       /* get irq resource */
-       if (!prv_data->use_polling) {
-               host->irq = platform_get_irq(plat_dev, 0);
-               if (host->irq == -ENXIO) {
-                       err = host->irq;
-                       goto probe_err_3b;
-               }
-               err = request_irq(host->irq, mg_irq,
-                               IRQF_TRIGGER_RISING,
-                               MG_DEV_NAME, host);
-               if (err) {
-                       printk(KERN_ERR "%s:%d fail (request_irq err=%d)\n",
-                                       __func__, __LINE__, err);
-                       goto probe_err_3b;
-               }
-
-       }
-
-       /* get disk id */
-       err = mg_get_disk_id(host);
-       if (err) {
-               printk(KERN_ERR "%s:%d fail (err code : %d)\n",
-                               __func__, __LINE__, err);
-               err = -EIO;
-               goto probe_err_4;
-       }
-
-       err = register_blkdev(host->major, MG_DISK_NAME);
-       if (err < 0) {
-               printk(KERN_ERR "%s:%d register_blkdev fail (err code : %d)\n",
-                               __func__, __LINE__, err);
-               goto probe_err_4;
-       }
-       if (!host->major)
-               host->major = err;
-
-       spin_lock_init(&host->lock);
-
-       if (prv_data->use_polling)
-               host->breq = blk_init_queue(mg_request_poll, &host->lock);
-       else
-               host->breq = blk_init_queue(mg_request, &host->lock);
-
-       if (!host->breq) {
-               err = -ENOMEM;
-               printk(KERN_ERR "%s:%d (blk_init_queue) fail\n",
-                               __func__, __LINE__);
-               goto probe_err_5;
-       }
-       host->breq->queuedata = host;
-
-       /* mflash is random device, thanx for the noop */
-       err = elevator_change(host->breq, "noop");
-       if (err) {
-               printk(KERN_ERR "%s:%d (elevator_init) fail\n",
-                               __func__, __LINE__);
-               goto probe_err_6;
-       }
-       blk_queue_max_hw_sectors(host->breq, MG_MAX_SECTS);
-       blk_queue_logical_block_size(host->breq, MG_SECTOR_SIZE);
-
-       init_timer(&host->timer);
-       host->timer.function = mg_times_out;
-       host->timer.data = (unsigned long)host;
-
-       host->gd = alloc_disk(MG_DISK_MAX_PART);
-       if (!host->gd) {
-               printk(KERN_ERR "%s:%d (alloc_disk) fail\n",
-                               __func__, __LINE__);
-               err = -ENOMEM;
-               goto probe_err_7;
-       }
-       host->gd->major = host->major;
-       host->gd->first_minor = 0;
-       host->gd->fops = &mg_disk_ops;
-       host->gd->queue = host->breq;
-       host->gd->private_data = host;
-       sprintf(host->gd->disk_name, MG_DISK_NAME"a");
-
-       set_capacity(host->gd, host->n_sectors);
-
-       add_disk(host->gd);
-
-       return err;
-
-probe_err_7:
-       del_timer_sync(&host->timer);
-probe_err_6:
-       blk_cleanup_queue(host->breq);
-probe_err_5:
-       unregister_blkdev(host->major, MG_DISK_NAME);
-probe_err_4:
-       if (!prv_data->use_polling)
-               free_irq(host->irq, host);
-probe_err_3b:
-       gpio_free(host->rstout);
-probe_err_3a:
-       gpio_free(host->rst);
-probe_err_3:
-       iounmap(host->dev_base);
-probe_err_2:
-       kfree(host);
-probe_err:
-       return err;
-}
-
-static int mg_remove(struct platform_device *plat_dev)
-{
-       struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
-       struct mg_host *host = prv_data->host;
-       int err = 0;
-
-       /* delete timer */
-       del_timer_sync(&host->timer);
-
-       /* remove disk */
-       if (host->gd) {
-               del_gendisk(host->gd);
-               put_disk(host->gd);
-       }
-       /* remove queue */
-       if (host->breq)
-               blk_cleanup_queue(host->breq);
-
-       /* unregister blk device */
-       unregister_blkdev(host->major, MG_DISK_NAME);
-
-       /* free irq */
-       if (!prv_data->use_polling)
-               free_irq(host->irq, host);
-
-       /* free reset-out pin */
-       if (prv_data->dev_attr != MG_BOOT_DEV)
-               gpio_free(host->rstout);
-
-       /* free rst pin */
-       if (host->rst)
-               gpio_free(host->rst);
-
-       /* unmap io */
-       if (host->dev_base)
-               iounmap(host->dev_base);
-
-       /* free mg_host */
-       kfree(host);
-
-       return err;
-}
-
-static struct platform_driver mg_disk_driver = {
-       .probe = mg_probe,
-       .remove = mg_remove,
-       .driver = {
-               .name = MG_DEV_NAME,
-               .pm = &mg_pm,
-       }
-};
-
-/****************************************************************************
- *
- * Module stuff
- *
- ****************************************************************************/
-
-static int __init mg_init(void)
-{
-       printk(KERN_INFO "mGine mflash driver, (c) 2008 mGine Co.\n");
-       return platform_driver_register(&mg_disk_driver);
-}
-
-static void __exit mg_exit(void)
-{
-       printk(KERN_INFO "mflash driver : bye bye\n");
-       platform_driver_unregister(&mg_disk_driver);
-}
-
-module_init(mg_init);
-module_exit(mg_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("unsik Kim <donari75@gmail.com>");
-MODULE_DESCRIPTION("mGine m[g]flash device driver");
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c

index f96ab717534c4c8fe60e830c4020c6bd0517cf07..02804cc79d82879f0bce6f9386eb9324b9289242 100644 (file)
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -169,6 +169,25 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
         return false; /* device present */
  }
  
+/* we have to use runtime tag to setup command header */
+static void mtip_init_cmd_header(struct request *rq)
+{
+       struct driver_data *dd = rq->q->queuedata;
+       struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
+       u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
+
+       /* Point the command headers at the command tables. */
+       cmd->command_header = dd->port->command_list +
+                               (sizeof(struct mtip_cmd_hdr) * rq->tag);
+       cmd->command_header_dma = dd->port->command_list_dma +
+                               (sizeof(struct mtip_cmd_hdr) * rq->tag);
+
+       if (host_cap_64)
+               cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
+
+       cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
+}
+
  static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
  {
         struct request *rq;
@@ -180,6 +199,9 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
         if (IS_ERR(rq))
                 return NULL;
  
+       /* Internal cmd isn't submitted via .queue_rq */
+       mtip_init_cmd_header(rq);
+
         return blk_mq_rq_to_pdu(rq);
  }
  
@@ -241,7 +263,8 @@ static void mtip_async_complete(struct mtip_port *port,
  
         rq = mtip_rq_from_tag(dd, tag);
  
-       blk_mq_complete_request(rq, status);
+       cmd->status = status;
+       blk_mq_complete_request(rq);
  }
  
  /*
@@ -2910,18 +2933,19 @@ static void mtip_softirq_done_fn(struct request *rq)
         if (unlikely(cmd->unaligned))
                 up(&dd->port->cmd_slot_unal);
  
-       blk_mq_end_request(rq, rq->errors);
+       blk_mq_end_request(rq, cmd->status);
  }
  
  static void mtip_abort_cmd(struct request *req, void *data,
                                                         bool reserved)
  {
+       struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
         struct driver_data *dd = data;
  
         dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
  
         clear_bit(req->tag, dd->port->cmds_to_issue);
-       req->errors = -EIO;
+       cmd->status = -EIO;
         mtip_softirq_done_fn(req);
  }
  
@@ -3807,6 +3831,8 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
         struct request *rq = bd->rq;
         int ret;
  
+       mtip_init_cmd_header(rq);
+
         if (unlikely(mtip_check_unal_depth(hctx, rq)))
                 return BLK_MQ_RQ_QUEUE_BUSY;
  
@@ -3816,7 +3842,6 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
         if (likely(!ret))
                 return BLK_MQ_RQ_QUEUE_OK;
  
-       rq->errors = ret;
         return BLK_MQ_RQ_QUEUE_ERROR;
  }
  
@@ -3838,7 +3863,6 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx,
  {
         struct driver_data *dd = data;
         struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
-       u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
  
         /*
          * For flush requests, request_idx starts at the end of the
@@ -3855,17 +3879,6 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx,
  
         memset(cmd->command, 0, CMD_DMA_ALLOC_SZ);
  
-       /* Point the command headers at the command tables. */
-       cmd->command_header = dd->port->command_list +
-                               (sizeof(struct mtip_cmd_hdr) * request_idx);
-       cmd->command_header_dma = dd->port->command_list_dma +
-                               (sizeof(struct mtip_cmd_hdr) * request_idx);
-
-       if (host_cap_64)
-               cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
-
-       cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
-
         sg_init_table(cmd->sg, MTIP_MAX_SG);
         return 0;
  }
@@ -3889,7 +3902,7 @@ exit_handler:
         return BLK_EH_RESET_TIMER;
  }
  
-static struct blk_mq_ops mtip_mq_ops = {
+static const struct blk_mq_ops mtip_mq_ops = {
         .queue_rq       = mtip_queue_rq,
         .init_request   = mtip_init_cmd,
         .exit_request   = mtip_free_cmd,
@@ -3969,7 +3982,7 @@ static int mtip_block_initialize(struct driver_data *dd)
         dd->tags.reserved_tags = 1;
         dd->tags.cmd_size = sizeof(struct mtip_cmd);
         dd->tags.numa_node = dd->numa_node;
-       dd->tags.flags = BLK_MQ_F_SHOULD_MERGE;
+       dd->tags.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_NO_SCHED;
         dd->tags.driver_data = dd;
         dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS;
  
@@ -4025,7 +4038,6 @@ skip_create_disk:
                 dd->queue->limits.discard_granularity = 4096;
                 blk_queue_max_discard_sectors(dd->queue,
                         MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
-               dd->queue->limits.discard_zeroes_data = 0;
         }
  
         /* Set the capacity of the device in 512 byte sectors. */
@@ -4107,9 +4119,11 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
         struct driver_data *dd = (struct driver_data *)data;
         struct mtip_cmd *cmd;
  
-       if (likely(!reserv))
-               blk_mq_complete_request(rq, -ENODEV);
-       else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) {
+       if (likely(!reserv)) {
+               cmd = blk_mq_rq_to_pdu(rq);
+               cmd->status = -ENODEV;
+               blk_mq_complete_request(rq);
+       } else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) {
  
                 cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
                 if (cmd->comp_func)
@@ -4162,7 +4176,7 @@ static int mtip_block_remove(struct driver_data *dd)
                 dev_info(&dd->pdev->dev, "device %s surprise removal\n",
                                                 dd->disk->disk_name);
  
-       blk_mq_freeze_queue_start(dd->queue);
+       blk_freeze_queue_start(dd->queue);
         blk_mq_stop_hw_queues(dd->queue);
         blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
  
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h

index 7617888f79449d55ac9402bfef7f01788ef138cf..57b41528a8248b27410b327a90853ac7ccdf645f 100644 (file)
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -352,6 +352,7 @@ struct mtip_cmd {
         int retries; /* The number of retries left for this command. */
  
         int direction; /* Data transfer direction */
+       int status;
  };
  
  /* Structure used to describe a port. */
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c

index 7e4287bc19e52991a82cf218906fe7384112921b..ac376b9b852d69677f292eba1afb15df419fe769 100644 (file)
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -40,47 +40,82 @@
  #include <asm/types.h>
  
  #include <linux/nbd.h>
+#include <linux/nbd-netlink.h>
+#include <net/genetlink.h>
  
  static DEFINE_IDR(nbd_index_idr);
  static DEFINE_MUTEX(nbd_index_mutex);
+static int nbd_total_devices = 0;
  
  struct nbd_sock {
         struct socket *sock;
         struct mutex tx_lock;
+       struct request *pending;
+       int sent;
+       bool dead;
+       int fallback_index;
+       int cookie;
+};
+
+struct recv_thread_args {
+       struct work_struct work;
+       struct nbd_device *nbd;
+       int index;
+};
+
+struct link_dead_args {
+       struct work_struct work;
+       int index;
  };
  
  #define NBD_TIMEDOUT                   0
  #define NBD_DISCONNECT_REQUESTED       1
  #define NBD_DISCONNECTED               2
-#define NBD_RUNNING                    3
+#define NBD_HAS_PID_FILE               3
+#define NBD_HAS_CONFIG_REF             4
+#define NBD_BOUND                      5
+#define NBD_DESTROY_ON_DISCONNECT      6
  
-struct nbd_device {
+struct nbd_config {
         u32 flags;
         unsigned long runtime_flags;
-       struct nbd_sock **socks;
-       int magic;
+       u64 dead_conn_timeout;
  
-       struct blk_mq_tag_set tag_set;
-
-       struct mutex config_lock;
-       struct gendisk *disk;
+       struct nbd_sock **socks;
         int num_connections;
+       atomic_t live_connections;
+       wait_queue_head_t conn_wait;
+
         atomic_t recv_threads;
         wait_queue_head_t recv_wq;
         loff_t blksize;
         loff_t bytesize;
-
-       struct task_struct *task_recv;
-       struct task_struct *task_setup;
-
  #if IS_ENABLED(CONFIG_DEBUG_FS)
         struct dentry *dbg_dir;
  #endif
  };
  
+struct nbd_device {
+       struct blk_mq_tag_set tag_set;
+
+       int index;
+       refcount_t config_refs;
+       refcount_t refs;
+       struct nbd_config *config;
+       struct mutex config_lock;
+       struct gendisk *disk;
+
+       struct list_head list;
+       struct task_struct *task_recv;
+       struct task_struct *task_setup;
+};
+
  struct nbd_cmd {
         struct nbd_device *nbd;
+       int index;
+       int cookie;
         struct completion send_complete;
+       int status;
  };
  
  #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -98,18 +133,16 @@ static int part_shift;
  
  static int nbd_dev_dbg_init(struct nbd_device *nbd);
  static void nbd_dev_dbg_close(struct nbd_device *nbd);
-
+static void nbd_config_put(struct nbd_device *nbd);
+static void nbd_connect_reply(struct genl_info *info, int index);
+static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
+static void nbd_dead_link_work(struct work_struct *work);
  
  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
  {
         return disk_to_dev(nbd->disk);
  }
  
-static bool nbd_is_connected(struct nbd_device *nbd)
-{
-       return !!nbd->task_recv;
-}
-
  static const char *nbdcmd_to_ascii(int cmd)
  {
         switch (cmd) {
@@ -122,43 +155,104 @@ static const char *nbdcmd_to_ascii(int cmd)
         return "invalid";
  }
  
-static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
+static ssize_t pid_show(struct device *dev,
+                       struct device_attribute *attr, char *buf)
  {
-       bd_set_size(bdev, 0);
-       set_capacity(nbd->disk, 0);
-       kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
+       struct gendisk *disk = dev_to_disk(dev);
+       struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
  
-       return 0;
+       return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
+}
+
+static struct device_attribute pid_attr = {
+       .attr = { .name = "pid", .mode = S_IRUGO},
+       .show = pid_show,
+};
+
+static void nbd_dev_remove(struct nbd_device *nbd)
+{
+       struct gendisk *disk = nbd->disk;
+       if (disk) {
+               del_gendisk(disk);
+               blk_cleanup_queue(disk->queue);
+               blk_mq_free_tag_set(&nbd->tag_set);
+               disk->private_data = NULL;
+               put_disk(disk);
+       }
+       kfree(nbd);
+}
+
+static void nbd_put(struct nbd_device *nbd)
+{
+       if (refcount_dec_and_mutex_lock(&nbd->refs,
+                                       &nbd_index_mutex)) {
+               idr_remove(&nbd_index_idr, nbd->index);
+               mutex_unlock(&nbd_index_mutex);
+               nbd_dev_remove(nbd);
+       }
+}
+
+static int nbd_disconnected(struct nbd_config *config)
+{
+       return test_bit(NBD_DISCONNECTED, &config->runtime_flags) ||
+               test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+}
+
+static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
+                               int notify)
+{
+       if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) {
+               struct link_dead_args *args;
+               args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO);
+               if (args) {
+                       INIT_WORK(&args->work, nbd_dead_link_work);
+                       args->index = nbd->index;
+                       queue_work(system_wq, &args->work);
+               }
+       }
+       if (!nsock->dead) {
+               kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
+               atomic_dec(&nbd->config->live_connections);
+       }
+       nsock->dead = true;
+       nsock->pending = NULL;
+       nsock->sent = 0;
  }
  
-static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
+static void nbd_size_clear(struct nbd_device *nbd)
  {
-       blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize);
-       blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize);
-       bd_set_size(bdev, nbd->bytesize);
-       set_capacity(nbd->disk, nbd->bytesize >> 9);
+       if (nbd->config->bytesize) {
+               set_capacity(nbd->disk, 0);
+               kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
+       }
+}
+
+static void nbd_size_update(struct nbd_device *nbd)
+{
+       struct nbd_config *config = nbd->config;
+       blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
+       blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
+       set_capacity(nbd->disk, config->bytesize >> 9);
         kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
  }
  
-static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
-                       loff_t blocksize, loff_t nr_blocks)
+static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
+                        loff_t nr_blocks)
  {
-       nbd->blksize = blocksize;
-       nbd->bytesize = blocksize * nr_blocks;
-       if (nbd_is_connected(nbd))
-               nbd_size_update(nbd, bdev);
+       struct nbd_config *config = nbd->config;
+       config->blksize = blocksize;
+       config->bytesize = blocksize * nr_blocks;
+       nbd_size_update(nbd);
  }
  
-static void nbd_end_request(struct nbd_cmd *cmd)
+static void nbd_complete_rq(struct request *req)
  {
-       struct nbd_device *nbd = cmd->nbd;
-       struct request *req = blk_mq_rq_from_pdu(cmd);
-       int error = req->errors ? -EIO : 0;
+       struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
  
-       dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd,
-               error ? "failed" : "done");
+       dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", cmd,
+               cmd->status ? "failed" : "done");
  
-       blk_mq_complete_request(req, error);
+       blk_mq_end_request(req, cmd->status);
  }
  
  /*
@@ -166,17 +260,18 @@ static void nbd_end_request(struct nbd_cmd *cmd)
   */
  static void sock_shutdown(struct nbd_device *nbd)
  {
+       struct nbd_config *config = nbd->config;
         int i;
  
-       if (nbd->num_connections == 0)
+       if (config->num_connections == 0)
                 return;
-       if (test_and_set_bit(NBD_DISCONNECTED, &nbd->runtime_flags))
+       if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags))
                 return;
  
-       for (i = 0; i < nbd->num_connections; i++) {
-               struct nbd_sock *nsock = nbd->socks[i];
+       for (i = 0; i < config->num_connections; i++) {
+               struct nbd_sock *nsock = config->socks[i];
                 mutex_lock(&nsock->tx_lock);
-               kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
+               nbd_mark_nsock_dead(nbd, nsock, 0);
                 mutex_unlock(&nsock->tx_lock);
         }
         dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
@@ -187,14 +282,58 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
  {
         struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
         struct nbd_device *nbd = cmd->nbd;
+       struct nbd_config *config;
  
-       dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
-       set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
-       req->errors++;
+       if (!refcount_inc_not_zero(&nbd->config_refs)) {
+               cmd->status = -EIO;
+               return BLK_EH_HANDLED;
+       }
  
-       mutex_lock(&nbd->config_lock);
+       /* If we are waiting on our dead timer then we could get timeout
+        * callbacks for our request.  For this we just want to reset the timer
+        * and let the queue side take care of everything.
+        */
+       if (!completion_done(&cmd->send_complete)) {
+               nbd_config_put(nbd);
+               return BLK_EH_RESET_TIMER;
+       }
+       config = nbd->config;
+
+       if (config->num_connections > 1) {
+               dev_err_ratelimited(nbd_to_dev(nbd),
+                                   "Connection timed out, retrying\n");
+               /*
+                * Hooray we have more connections, requeue this IO, the submit
+                * path will put it on a real connection.
+                */
+               if (config->socks && config->num_connections > 1) {
+                       if (cmd->index < config->num_connections) {
+                               struct nbd_sock *nsock =
+                                       config->socks[cmd->index];
+                               mutex_lock(&nsock->tx_lock);
+                               /* We can have multiple outstanding requests, so
+                                * we don't want to mark the nsock dead if we've
+                                * already reconnected with a new socket, so
+                                * only mark it dead if its the same socket we
+                                * were sent out on.
+                                */
+                               if (cmd->cookie == nsock->cookie)
+                                       nbd_mark_nsock_dead(nbd, nsock, 1);
+                               mutex_unlock(&nsock->tx_lock);
+                       }
+                       blk_mq_requeue_request(req, true);
+                       nbd_config_put(nbd);
+                       return BLK_EH_NOT_HANDLED;
+               }
+       } else {
+               dev_err_ratelimited(nbd_to_dev(nbd),
+                                   "Connection timed out\n");
+       }
+       set_bit(NBD_TIMEDOUT, &config->runtime_flags);
+       cmd->status = -EIO;
         sock_shutdown(nbd);
-       mutex_unlock(&nbd->config_lock);
+       nbd_config_put(nbd);
+
         return BLK_EH_HANDLED;
  }
  
@@ -202,9 +341,10 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
   *  Send or receive packet.
   */
  static int sock_xmit(struct nbd_device *nbd, int index, int send,
-                    struct iov_iter *iter, int msg_flags)
+                    struct iov_iter *iter, int msg_flags, int *sent)
  {
-       struct socket *sock = nbd->socks[index]->sock;
+       struct nbd_config *config = nbd->config;
+       struct socket *sock = config->socks[index]->sock;
         int result;
         struct msghdr msg;
         unsigned long pflags = current->flags;
@@ -237,6 +377,8 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
                                 result = -EPIPE; /* short read */
                         break;
                 }
+               if (sent)
+                       *sent += result;
         } while (msg_data_left(&msg));
  
         tsk_restore_flags(current, pflags, PF_MEMALLOC);
@@ -248,6 +390,8 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
  static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
  {
         struct request *req = blk_mq_rq_from_pdu(cmd);
+       struct nbd_config *config = nbd->config;
+       struct nbd_sock *nsock = config->socks[index];
         int result;
         struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
         struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
@@ -256,6 +400,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
         struct bio *bio;
         u32 type;
         u32 tag = blk_mq_unique_tag(req);
+       int sent = nsock->sent, skip = 0;
  
         iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
  
@@ -277,12 +422,25 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
         }
  
         if (rq_data_dir(req) == WRITE &&
-           (nbd->flags & NBD_FLAG_READ_ONLY)) {
+           (config->flags & NBD_FLAG_READ_ONLY)) {
                 dev_err_ratelimited(disk_to_dev(nbd->disk),
                                     "Write on read-only\n");
                 return -EIO;
         }
  
+       /* We did a partial send previously, and we at least sent the whole
+        * request struct, so just go and send the rest of the pages in the
+        * request.
+        */
+       if (sent) {
+               if (sent >= sizeof(request)) {
+                       skip = sent - sizeof(request);
+                       goto send_pages;
+               }
+               iov_iter_advance(&from, sent);
+       }
+       cmd->index = index;
+       cmd->cookie = nsock->cookie;
         request.type = htonl(type);
         if (type != NBD_CMD_FLUSH) {
                 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
@@ -294,15 +452,27 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
                 cmd, nbdcmd_to_ascii(type),
                 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
         result = sock_xmit(nbd, index, 1, &from,
-                       (type == NBD_CMD_WRITE) ? MSG_MORE : 0);
+                       (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
         if (result <= 0) {
+               if (result == -ERESTARTSYS) {
+                       /* If we havne't sent anything we can just return BUSY,
+                        * however if we have sent something we need to make
+                        * sure we only allow this req to be sent until we are
+                        * completely done.
+                        */
+                       if (sent) {
+                               nsock->pending = req;
+                               nsock->sent = sent;
+                       }
+                       return BLK_MQ_RQ_QUEUE_BUSY;
+               }
                 dev_err_ratelimited(disk_to_dev(nbd->disk),
                         "Send control failed (result %d)\n", result);
-               return -EIO;
+               return -EAGAIN;
         }
-
+send_pages:
         if (type != NBD_CMD_WRITE)
-               return 0;
+               goto out;
  
         bio = req->bio;
         while (bio) {
@@ -318,12 +488,29 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
                                 cmd, bvec.bv_len);
                         iov_iter_bvec(&from, ITER_BVEC | WRITE,
                                       &bvec, 1, bvec.bv_len);
-                       result = sock_xmit(nbd, index, 1, &from, flags);
+                       if (skip) {
+                               if (skip >= iov_iter_count(&from)) {
+                                       skip -= iov_iter_count(&from);
+                                       continue;
+                               }
+                               iov_iter_advance(&from, skip);
+                               skip = 0;
+                       }
+                       result = sock_xmit(nbd, index, 1, &from, flags, &sent);
                         if (result <= 0) {
+                               if (result == -ERESTARTSYS) {
+                                       /* We've already sent the header, we
+                                        * have no choice but to set pending and
+                                        * return BUSY.
+                                        */
+                                       nsock->pending = req;
+                                       nsock->sent = sent;
+                                       return BLK_MQ_RQ_QUEUE_BUSY;
+                               }
                                 dev_err(disk_to_dev(nbd->disk),
                                         "Send data failed (result %d)\n",
                                         result);
-                               return -EIO;
+                               return -EAGAIN;
                         }
                         /*
                          * The completion might already have come in,
@@ -336,12 +523,16 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
                 }
                 bio = next;
         }
+out:
+       nsock->pending = NULL;
+       nsock->sent = 0;
         return 0;
  }
  
  /* NULL returned = something went wrong, inform userspace */
  static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
  {
+       struct nbd_config *config = nbd->config;
         int result;
         struct nbd_reply reply;
         struct nbd_cmd *cmd;
@@ -353,10 +544,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
  
         reply.magic = 0;
         iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
-       result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL);
+       result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
         if (result <= 0) {
-               if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
-                   !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
+               if (!nbd_disconnected(config))
                         dev_err(disk_to_dev(nbd->disk),
                                 "Receive control failed (result %d)\n", result);
                 return ERR_PTR(result);
@@ -383,7 +573,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
         if (ntohl(reply.error)) {
                 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
                         ntohl(reply.error));
-               req->errors++;
+               cmd->status = -EIO;
                 return cmd;
         }
  
@@ -395,12 +585,23 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
                 rq_for_each_segment(bvec, req, iter) {
                         iov_iter_bvec(&to, ITER_BVEC | READ,
                                       &bvec, 1, bvec.bv_len);
-                       result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL);
+                       result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
                         if (result <= 0) {
                                 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
                                         result);
-                               req->errors++;
-                               return cmd;
+                               /*
+                                * If we've disconnected or we only have 1
+                                * connection then we need to make sure we
+                                * complete this request, otherwise error out
+                                * and let the timeout stuff handle resubmitting
+                                * this request onto another connection.
+                                */
+                               if (nbd_disconnected(config) ||
+                                   config->num_connections <= 1) {
+                                       cmd->status = -EIO;
+                                       return cmd;
+                               }
+                               return ERR_PTR(-EIO);
                         }
                         dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
                                 cmd, bvec.bv_len);
@@ -412,54 +613,34 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
         return cmd;
  }
  
-static ssize_t pid_show(struct device *dev,
-                       struct device_attribute *attr, char *buf)
-{
-       struct gendisk *disk = dev_to_disk(dev);
-       struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
-
-       return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
-}
-
-static struct device_attribute pid_attr = {
-       .attr = { .name = "pid", .mode = S_IRUGO},
-       .show = pid_show,
-};
-
-struct recv_thread_args {
-       struct work_struct work;
-       struct nbd_device *nbd;
-       int index;
-};
-
  static void recv_work(struct work_struct *work)
  {
         struct recv_thread_args *args = container_of(work,
                                                      struct recv_thread_args,
                                                      work);
         struct nbd_device *nbd = args->nbd;
+       struct nbd_config *config = nbd->config;
         struct nbd_cmd *cmd;
         int ret = 0;
  
-       BUG_ON(nbd->magic != NBD_MAGIC);
         while (1) {
                 cmd = nbd_read_stat(nbd, args->index);
                 if (IS_ERR(cmd)) {
+                       struct nbd_sock *nsock = config->socks[args->index];
+
+                       mutex_lock(&nsock->tx_lock);
+                       nbd_mark_nsock_dead(nbd, nsock, 1);
+                       mutex_unlock(&nsock->tx_lock);
                         ret = PTR_ERR(cmd);
                         break;
                 }
  
-               nbd_end_request(cmd);
+               blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
         }
-
-       /*
-        * We got an error, shut everybody down if this wasn't the result of a
-        * disconnect request.
-        */
-       if (ret && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
-               sock_shutdown(nbd);
-       atomic_dec(&nbd->recv_threads);
-       wake_up(&nbd->recv_wq);
+       atomic_dec(&config->recv_threads);
+       wake_up(&config->recv_wq);
+       nbd_config_put(nbd);
+       kfree(args);
  }
  
  static void nbd_clear_req(struct request *req, void *data, bool reserved)
@@ -469,68 +650,154 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
         if (!blk_mq_request_started(req))
                 return;
         cmd = blk_mq_rq_to_pdu(req);
-       req->errors++;
-       nbd_end_request(cmd);
+       cmd->status = -EIO;
+       blk_mq_complete_request(req);
  }
  
  static void nbd_clear_que(struct nbd_device *nbd)
  {
-       BUG_ON(nbd->magic != NBD_MAGIC);
-
+       blk_mq_stop_hw_queues(nbd->disk->queue);
         blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
+       blk_mq_start_hw_queues(nbd->disk->queue);
         dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
  }
  
+static int find_fallback(struct nbd_device *nbd, int index)
+{
+       struct nbd_config *config = nbd->config;
+       int new_index = -1;
+       struct nbd_sock *nsock = config->socks[index];
+       int fallback = nsock->fallback_index;
+
+       if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+               return new_index;
  
-static void nbd_handle_cmd(struct nbd_cmd *cmd, int index)
+       if (config->num_connections <= 1) {
+               dev_err_ratelimited(disk_to_dev(nbd->disk),
+                                   "Attempted send on invalid socket\n");
+               return new_index;
+       }
+
+       if (fallback >= 0 && fallback < config->num_connections &&
+           !config->socks[fallback]->dead)
+               return fallback;
+
+       if (nsock->fallback_index < 0 ||
+           nsock->fallback_index >= config->num_connections ||
+           config->socks[nsock->fallback_index]->dead) {
+               int i;
+               for (i = 0; i < config->num_connections; i++) {
+                       if (i == index)
+                               continue;
+                       if (!config->socks[i]->dead) {
+                               new_index = i;
+                               break;
+                       }
+               }
+               nsock->fallback_index = new_index;
+               if (new_index < 0) {
+                       dev_err_ratelimited(disk_to_dev(nbd->disk),
+                                           "Dead connection, failed to find a fallback\n");
+                       return new_index;
+               }
+       }
+       new_index = nsock->fallback_index;
+       return new_index;
+}
+
+static int wait_for_reconnect(struct nbd_device *nbd)
+{
+       struct nbd_config *config = nbd->config;
+       if (!config->dead_conn_timeout)
+               return 0;
+       if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+               return 0;
+       wait_event_interruptible_timeout(config->conn_wait,
+                                        atomic_read(&config->live_connections),
+                                        config->dead_conn_timeout);
+       return atomic_read(&config->live_connections);
+}
+
+static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
  {
         struct request *req = blk_mq_rq_from_pdu(cmd);
         struct nbd_device *nbd = cmd->nbd;
+       struct nbd_config *config;
         struct nbd_sock *nsock;
+       int ret;
  
-       if (index >= nbd->num_connections) {
+       if (!refcount_inc_not_zero(&nbd->config_refs)) {
                 dev_err_ratelimited(disk_to_dev(nbd->disk),
-                                   "Attempted send on invalid socket\n");
-               goto error_out;
+                                   "Socks array is empty\n");
+               return -EINVAL;
         }
+       config = nbd->config;
  
-       if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) {
+       if (index >= config->num_connections) {
                 dev_err_ratelimited(disk_to_dev(nbd->disk),
-                                   "Attempted send on closed socket\n");
-               goto error_out;
+                                   "Attempted send on invalid socket\n");
+               nbd_config_put(nbd);
+               return -EINVAL;
         }
-
-       req->errors = 0;
-
-       nsock = nbd->socks[index];
+       cmd->status = 0;
+again:
+       nsock = config->socks[index];
         mutex_lock(&nsock->tx_lock);
-       if (unlikely(!nsock->sock)) {
+       if (nsock->dead) {
+               int old_index = index;
+               index = find_fallback(nbd, index);
                 mutex_unlock(&nsock->tx_lock);
-               dev_err_ratelimited(disk_to_dev(nbd->disk),
-                                   "Attempted send on closed socket\n");
-               goto error_out;
+               if (index < 0) {
+                       if (wait_for_reconnect(nbd)) {
+                               index = old_index;
+                               goto again;
+                       }
+                       /* All the sockets should already be down at this point,
+                        * we just want to make sure that DISCONNECTED is set so
+                        * any requests that come in that were queue'ed waiting
+                        * for the reconnect timer don't trigger the timer again
+                        * and instead just error out.
+                        */
+                       sock_shutdown(nbd);
+                       nbd_config_put(nbd);
+                       return -EIO;
+               }
+               goto again;
         }
  
-       if (nbd_send_cmd(nbd, cmd, index) != 0) {
+       /* Handle the case that we have a pending request that was partially
+        * transmitted that _has_ to be serviced first.  We need to call requeue
+        * here so that it gets put _after_ the request that is already on the
+        * dispatch list.
+        */
+       if (unlikely(nsock->pending && nsock->pending != req)) {
+               blk_mq_requeue_request(req, true);
+               ret = 0;
+               goto out;
+       }
+       /*
+        * Some failures are related to the link going down, so anything that
+        * returns EAGAIN can be retried on a different socket.
+        */
+       ret = nbd_send_cmd(nbd, cmd, index);
+       if (ret == -EAGAIN) {
                 dev_err_ratelimited(disk_to_dev(nbd->disk),
-                                   "Request send failed\n");
-               req->errors++;
-               nbd_end_request(cmd);
+                                   "Request send failed trying another connection\n");
+               nbd_mark_nsock_dead(nbd, nsock, 1);
+               mutex_unlock(&nsock->tx_lock);
+               goto again;
         }
-
+out:
         mutex_unlock(&nsock->tx_lock);
-
-       return;
-
-error_out:
-       req->errors++;
-       nbd_end_request(cmd);
+       nbd_config_put(nbd);
+       return ret;
  }
  
  static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
                         const struct blk_mq_queue_data *bd)
  {
         struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+       int ret;
  
         /*
          * Since we look at the bio's to send the request over the network we
@@ -543,15 +810,26 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
          */
         init_completion(&cmd->send_complete);
         blk_mq_start_request(bd->rq);
-       nbd_handle_cmd(cmd, hctx->queue_num);
+
+       /* We can be called directly from the user space process, which means we
+        * could possibly have signals pending so our sendmsg will fail.  In
+        * this case we need to return that we are busy, otherwise error out as
+        * appropriate.
+        */
+       ret = nbd_handle_cmd(cmd, hctx->queue_num);
+       if (ret < 0)
+               ret = BLK_MQ_RQ_QUEUE_ERROR;
+       if (!ret)
+               ret = BLK_MQ_RQ_QUEUE_OK;
         complete(&cmd->send_complete);
  
-       return BLK_MQ_RQ_QUEUE_OK;
+       return ret;
  }
  
-static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
-                         unsigned long arg)
+static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
+                         bool netlink)
  {
+       struct nbd_config *config = nbd->config;
         struct socket *sock;
         struct nbd_sock **socks;
         struct nbd_sock *nsock;
@@ -561,62 +839,132 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
         if (!sock)
                 return err;
  
-       if (!nbd->task_setup)
+       if (!netlink && !nbd->task_setup &&
+           !test_bit(NBD_BOUND, &config->runtime_flags))
                 nbd->task_setup = current;
-       if (nbd->task_setup != current) {
+
+       if (!netlink &&
+           (nbd->task_setup != current ||
+            test_bit(NBD_BOUND, &config->runtime_flags))) {
                 dev_err(disk_to_dev(nbd->disk),
                         "Device being setup by another task");
-               return -EINVAL;
+               sockfd_put(sock);
+               return -EBUSY;
         }
  
-       socks = krealloc(nbd->socks, (nbd->num_connections + 1) *
+       socks = krealloc(config->socks, (config->num_connections + 1) *
                          sizeof(struct nbd_sock *), GFP_KERNEL);
-       if (!socks)
+       if (!socks) {
+               sockfd_put(sock);
                 return -ENOMEM;
+       }
         nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
-       if (!nsock)
+       if (!nsock) {
+               sockfd_put(sock);
                 return -ENOMEM;
+       }
  
-       nbd->socks = socks;
+       config->socks = socks;
  
+       nsock->fallback_index = -1;
+       nsock->dead = false;
         mutex_init(&nsock->tx_lock);
         nsock->sock = sock;
-       socks[nbd->num_connections++] = nsock;
+       nsock->pending = NULL;
+       nsock->sent = 0;
+       nsock->cookie = 0;
+       socks[config->num_connections++] = nsock;
+       atomic_inc(&config->live_connections);
  
-       if (max_part)
-               bdev->bd_invalidated = 1;
         return 0;
  }
  
+static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
+{
+       struct nbd_config *config = nbd->config;
+       struct socket *sock, *old;
+       struct recv_thread_args *args;
+       int i;
+       int err;
+
+       sock = sockfd_lookup(arg, &err);
+       if (!sock)
+               return err;
+
+       args = kzalloc(sizeof(*args), GFP_KERNEL);
+       if (!args) {
+               sockfd_put(sock);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < config->num_connections; i++) {
+               struct nbd_sock *nsock = config->socks[i];
+
+               if (!nsock->dead)
+                       continue;
+
+               mutex_lock(&nsock->tx_lock);
+               if (!nsock->dead) {
+                       mutex_unlock(&nsock->tx_lock);
+                       continue;
+               }
+               sk_set_memalloc(sock->sk);
+               atomic_inc(&config->recv_threads);
+               refcount_inc(&nbd->config_refs);
+               old = nsock->sock;
+               nsock->fallback_index = -1;
+               nsock->sock = sock;
+               nsock->dead = false;
+               INIT_WORK(&args->work, recv_work);
+               args->index = i;
+               args->nbd = nbd;
+               nsock->cookie++;
+               mutex_unlock(&nsock->tx_lock);
+               sockfd_put(old);
+
+               /* We take the tx_mutex in an error path in the recv_work, so we
+                * need to queue_work outside of the tx_mutex.
+                */
+               queue_work(recv_workqueue, &args->work);
+
+               atomic_inc(&config->live_connections);
+               wake_up(&config->conn_wait);
+               return 0;
+       }
+       sockfd_put(sock);
+       kfree(args);
+       return -ENOSPC;
+}
+
  /* Reset all properties of an NBD device */
  static void nbd_reset(struct nbd_device *nbd)
  {
-       nbd->runtime_flags = 0;
-       nbd->blksize = 1024;
-       nbd->bytesize = 0;
-       set_capacity(nbd->disk, 0);
-       nbd->flags = 0;
+       nbd->config = NULL;
         nbd->tag_set.timeout = 0;
         queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
  }
  
  static void nbd_bdev_reset(struct block_device *bdev)
  {
-       set_device_ro(bdev, false);
-       bdev->bd_inode->i_size = 0;
+       if (bdev->bd_openers > 1)
+               return;
+       bd_set_size(bdev, 0);
         if (max_part > 0) {
                 blkdev_reread_part(bdev);
                 bdev->bd_invalidated = 1;
         }
  }
  
-static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
+static void nbd_parse_flags(struct nbd_device *nbd)
  {
-       if (nbd->flags & NBD_FLAG_READ_ONLY)
-               set_device_ro(bdev, true);
-       if (nbd->flags & NBD_FLAG_SEND_TRIM)
+       struct nbd_config *config = nbd->config;
+       if (config->flags & NBD_FLAG_READ_ONLY)
+               set_disk_ro(nbd->disk, true);
+       else
+               set_disk_ro(nbd->disk, false);
+       if (config->flags & NBD_FLAG_SEND_TRIM)
                 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
-       if (nbd->flags & NBD_FLAG_SEND_FLUSH)
+       if (config->flags & NBD_FLAG_SEND_FLUSH)
                 blk_queue_write_cache(nbd->disk->queue, true, false);
         else
                 blk_queue_write_cache(nbd->disk->queue, false, false);
@@ -624,6 +972,7 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
  
  static void send_disconnects(struct nbd_device *nbd)
  {
+       struct nbd_config *config = nbd->config;
         struct nbd_request request = {
                 .magic = htonl(NBD_REQUEST_MAGIC),
                 .type = htonl(NBD_CMD_DISC),
@@ -632,163 +981,184 @@ static void send_disconnects(struct nbd_device *nbd)
         struct iov_iter from;
         int i, ret;
  
-       for (i = 0; i < nbd->num_connections; i++) {
+       for (i = 0; i < config->num_connections; i++) {
                 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
-               ret = sock_xmit(nbd, i, 1, &from, 0);
+               ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
                 if (ret <= 0)
                         dev_err(disk_to_dev(nbd->disk),
                                 "Send disconnect failed %d\n", ret);
         }
  }
  
-static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev)
+static int nbd_disconnect(struct nbd_device *nbd)
  {
-       dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
-       if (!nbd->socks)
-               return -EINVAL;
-
-       mutex_unlock(&nbd->config_lock);
-       fsync_bdev(bdev);
-       mutex_lock(&nbd->config_lock);
-
-       /* Check again after getting mutex back.  */
-       if (!nbd->socks)
-               return -EINVAL;
+       struct nbd_config *config = nbd->config;
  
+       dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
         if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
-                             &nbd->runtime_flags))
+                             &config->runtime_flags))
                 send_disconnects(nbd);
         return 0;
  }
  
-static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
+static void nbd_clear_sock(struct nbd_device *nbd)
  {
         sock_shutdown(nbd);
         nbd_clear_que(nbd);
-       kill_bdev(bdev);
-       nbd_bdev_reset(bdev);
-       /*
-        * We want to give the run thread a chance to wait for everybody
-        * to clean up and then do it's own cleanup.
-        */
-       if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) &&
-           nbd->num_connections) {
-               int i;
+       nbd->task_setup = NULL;
+}
  
-               for (i = 0; i < nbd->num_connections; i++) {
-                       sockfd_put(nbd->socks[i]->sock);
-                       kfree(nbd->socks[i]);
+static void nbd_config_put(struct nbd_device *nbd)
+{
+       if (refcount_dec_and_mutex_lock(&nbd->config_refs,
+                                       &nbd->config_lock)) {
+               struct nbd_config *config = nbd->config;
+               nbd_dev_dbg_close(nbd);
+               nbd_size_clear(nbd);
+               if (test_and_clear_bit(NBD_HAS_PID_FILE,
+                                      &config->runtime_flags))
+                       device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+               nbd->task_recv = NULL;
+               nbd_clear_sock(nbd);
+               if (config->num_connections) {
+                       int i;
+                       for (i = 0; i < config->num_connections; i++) {
+                               sockfd_put(config->socks[i]->sock);
+                               kfree(config->socks[i]);
+                       }
+                       kfree(config->socks);
                 }
-               kfree(nbd->socks);
-               nbd->socks = NULL;
-               nbd->num_connections = 0;
-       }
-       nbd->task_setup = NULL;
+               nbd_reset(nbd);
  
-       return 0;
+               mutex_unlock(&nbd->config_lock);
+               nbd_put(nbd);
+               module_put(THIS_MODULE);
+       }
  }
  
-static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev)
+static int nbd_start_device(struct nbd_device *nbd)
  {
-       struct recv_thread_args *args;
-       int num_connections = nbd->num_connections;
+       struct nbd_config *config = nbd->config;
+       int num_connections = config->num_connections;
         int error = 0, i;
  
         if (nbd->task_recv)
                 return -EBUSY;
-       if (!nbd->socks)
+       if (!config->socks)
                 return -EINVAL;
         if (num_connections > 1 &&
-           !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
+           !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) {
                 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
-               error = -EINVAL;
-               goto out_err;
+               return -EINVAL;
         }
  
-       set_bit(NBD_RUNNING, &nbd->runtime_flags);
-       blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
-       args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
-       if (!args) {
-               error = -ENOMEM;
-               goto out_err;
-       }
+       blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
         nbd->task_recv = current;
-       mutex_unlock(&nbd->config_lock);
  
-       nbd_parse_flags(nbd, bdev);
+       nbd_parse_flags(nbd);
  
         error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
         if (error) {
                 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
-               goto out_recv;
+               return error;
         }
-
-       nbd_size_update(nbd, bdev);
+       set_bit(NBD_HAS_PID_FILE, &config->runtime_flags);
  
         nbd_dev_dbg_init(nbd);
         for (i = 0; i < num_connections; i++) {
-               sk_set_memalloc(nbd->socks[i]->sock->sk);
-               atomic_inc(&nbd->recv_threads);
-               INIT_WORK(&args[i].work, recv_work);
-               args[i].nbd = nbd;
-               args[i].index = i;
-               queue_work(recv_workqueue, &args[i].work);
-       }
-       wait_event_interruptible(nbd->recv_wq,
-                                atomic_read(&nbd->recv_threads) == 0);
-       for (i = 0; i < num_connections; i++)
-               flush_work(&args[i].work);
-       nbd_dev_dbg_close(nbd);
-       nbd_size_clear(nbd, bdev);
-       device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
-out_recv:
-       mutex_lock(&nbd->config_lock);
-       nbd->task_recv = NULL;
-out_err:
-       clear_bit(NBD_RUNNING, &nbd->runtime_flags);
-       nbd_clear_sock(nbd, bdev);
+               struct recv_thread_args *args;
  
+               args = kzalloc(sizeof(*args), GFP_KERNEL);
+               if (!args) {
+                       sock_shutdown(nbd);
+                       return -ENOMEM;
+               }
+               sk_set_memalloc(config->socks[i]->sock->sk);
+               atomic_inc(&config->recv_threads);
+               refcount_inc(&nbd->config_refs);
+               INIT_WORK(&args->work, recv_work);
+               args->nbd = nbd;
+               args->index = i;
+               queue_work(recv_workqueue, &args->work);
+       }
+       return error;
+}
+
+static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
+{
+       struct nbd_config *config = nbd->config;
+       int ret;
+
+       ret = nbd_start_device(nbd);
+       if (ret)
+               return ret;
+
+       bd_set_size(bdev, config->bytesize);
+       if (max_part)
+               bdev->bd_invalidated = 1;
+       mutex_unlock(&nbd->config_lock);
+       ret = wait_event_interruptible(config->recv_wq,
+                                        atomic_read(&config->recv_threads) == 0);
+       if (ret)
+               sock_shutdown(nbd);
+       mutex_lock(&nbd->config_lock);
+       bd_set_size(bdev, 0);
         /* user requested, ignore socket errors */
-       if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
-               error = 0;
-       if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
-               error = -ETIMEDOUT;
+       if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
+               ret = 0;
+       if (test_bit(NBD_TIMEDOUT, &config->runtime_flags))
+               ret = -ETIMEDOUT;
+       return ret;
+}
  
-       nbd_reset(nbd);
-       return error;
+static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
+                                struct block_device *bdev)
+{
+       sock_shutdown(nbd);
+       kill_bdev(bdev);
+       nbd_bdev_reset(bdev);
+       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+                              &nbd->config->runtime_flags))
+               nbd_config_put(nbd);
  }
  
  /* Must be called with config_lock held */
  static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
                        unsigned int cmd, unsigned long arg)
  {
+       struct nbd_config *config = nbd->config;
+
         switch (cmd) {
         case NBD_DISCONNECT:
-               return nbd_disconnect(nbd, bdev);
+               return nbd_disconnect(nbd);
         case NBD_CLEAR_SOCK:
-               return nbd_clear_sock(nbd, bdev);
+               nbd_clear_sock_ioctl(nbd, bdev);
+               return 0;
         case NBD_SET_SOCK:
-               return nbd_add_socket(nbd, bdev, arg);
+               return nbd_add_socket(nbd, arg, false);
         case NBD_SET_BLKSIZE:
-               nbd_size_set(nbd, bdev, arg,
-                            div_s64(nbd->bytesize, arg));
+               nbd_size_set(nbd, arg,
+                            div_s64(config->bytesize, arg));
                 return 0;
         case NBD_SET_SIZE:
-               nbd_size_set(nbd, bdev, nbd->blksize,
-                            div_s64(arg, nbd->blksize));
+               nbd_size_set(nbd, config->blksize,
+                            div_s64(arg, config->blksize));
                 return 0;
         case NBD_SET_SIZE_BLOCKS:
-               nbd_size_set(nbd, bdev, nbd->blksize, arg);
+               nbd_size_set(nbd, config->blksize, arg);
                 return 0;
         case NBD_SET_TIMEOUT:
-               nbd->tag_set.timeout = arg * HZ;
+               if (arg) {
+                       nbd->tag_set.timeout = arg * HZ;
+                       blk_queue_rq_timeout(nbd->disk->queue, arg * HZ);
+               }
                 return 0;
  
         case NBD_SET_FLAGS:
-               nbd->flags = arg;
+               config->flags = arg;
                 return 0;
         case NBD_DO_IT:
-               return nbd_start_device(nbd, bdev);
+               return nbd_start_device_ioctl(nbd, bdev);
         case NBD_CLEAR_QUE:
                 /*
                  * This is for compatibility only.  The queue is always cleared
@@ -809,23 +1179,92 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
                      unsigned int cmd, unsigned long arg)
  {
         struct nbd_device *nbd = bdev->bd_disk->private_data;
-       int error;
+       struct nbd_config *config = nbd->config;
+       int error = -EINVAL;
  
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
  
-       BUG_ON(nbd->magic != NBD_MAGIC);
-
         mutex_lock(&nbd->config_lock);
-       error = __nbd_ioctl(bdev, nbd, cmd, arg);
-       mutex_unlock(&nbd->config_lock);
  
+       /* Don't allow ioctl operations on a nbd device that was created with
+        * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
+        */
+       if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+           (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
+               error = __nbd_ioctl(bdev, nbd, cmd, arg);
+       else
+               dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n");
+       mutex_unlock(&nbd->config_lock);
         return error;
  }
  
+static struct nbd_config *nbd_alloc_config(void)
+{
+       struct nbd_config *config;
+
+       config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
+       if (!config)
+               return NULL;
+       atomic_set(&config->recv_threads, 0);
+       init_waitqueue_head(&config->recv_wq);
+       init_waitqueue_head(&config->conn_wait);
+       config->blksize = 1024;
+       atomic_set(&config->live_connections, 0);
+       try_module_get(THIS_MODULE);
+       return config;
+}
+
+static int nbd_open(struct block_device *bdev, fmode_t mode)
+{
+       struct nbd_device *nbd;
+       int ret = 0;
+
+       mutex_lock(&nbd_index_mutex);
+       nbd = bdev->bd_disk->private_data;
+       if (!nbd) {
+               ret = -ENXIO;
+               goto out;
+       }
+       if (!refcount_inc_not_zero(&nbd->refs)) {
+               ret = -ENXIO;
+               goto out;
+       }
+       if (!refcount_inc_not_zero(&nbd->config_refs)) {
+               struct nbd_config *config;
+
+               mutex_lock(&nbd->config_lock);
+               if (refcount_inc_not_zero(&nbd->config_refs)) {
+                       mutex_unlock(&nbd->config_lock);
+                       goto out;
+               }
+               config = nbd->config = nbd_alloc_config();
+               if (!config) {
+                       ret = -ENOMEM;
+                       mutex_unlock(&nbd->config_lock);
+                       goto out;
+               }
+               refcount_set(&nbd->config_refs, 1);
+               refcount_inc(&nbd->refs);
+               mutex_unlock(&nbd->config_lock);
+       }
+out:
+       mutex_unlock(&nbd_index_mutex);
+       return ret;
+}
+
+static void nbd_release(struct gendisk *disk, fmode_t mode)
+{
+       struct nbd_device *nbd = disk->private_data;
+       nbd_config_put(nbd);
+       nbd_put(nbd);
+}
+
  static const struct block_device_operations nbd_fops =
  {
         .owner =        THIS_MODULE,
+       .open =         nbd_open,
+       .release =      nbd_release,
         .ioctl =        nbd_ioctl,
         .compat_ioctl = nbd_ioctl,
  };
@@ -857,7 +1296,7 @@ static const struct file_operations nbd_dbg_tasks_ops = {
  static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
  {
         struct nbd_device *nbd = s->private;
-       u32 flags = nbd->flags;
+       u32 flags = nbd->config->flags;
  
         seq_printf(s, "Hex: 0x%08x\n\n", flags);
  
@@ -890,6 +1329,7 @@ static const struct file_operations nbd_dbg_flags_ops = {
  static int nbd_dev_dbg_init(struct nbd_device *nbd)
  {
         struct dentry *dir;
+       struct nbd_config *config = nbd->config;
  
         if (!nbd_dbg_dir)
                 return -EIO;
@@ -900,12 +1340,12 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
                         nbd_name(nbd));
                 return -EIO;
         }
-       nbd->dbg_dir = dir;
+       config->dbg_dir = dir;
  
         debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
-       debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
+       debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize);
         debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
-       debugfs_create_u64("blocksize", 0444, dir, &nbd->blksize);
+       debugfs_create_u64("blocksize", 0444, dir, &config->blksize);
         debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
  
         return 0;
@@ -913,7 +1353,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
  
  static void nbd_dev_dbg_close(struct nbd_device *nbd)
  {
-       debugfs_remove_recursive(nbd->dbg_dir);
+       debugfs_remove_recursive(nbd->config->dbg_dir);
  }
  
  static int nbd_dbg_init(void)
@@ -965,25 +1405,13 @@ static int nbd_init_request(void *data, struct request *rq,
         return 0;
  }
  
-static struct blk_mq_ops nbd_mq_ops = {
+static const struct blk_mq_ops nbd_mq_ops = {
         .queue_rq       = nbd_queue_rq,
+       .complete       = nbd_complete_rq,
         .init_request   = nbd_init_request,
         .timeout        = nbd_xmit_timeout,
  };
  
-static void nbd_dev_remove(struct nbd_device *nbd)
-{
-       struct gendisk *disk = nbd->disk;
-       nbd->magic = 0;
-       if (disk) {
-               del_gendisk(disk);
-               blk_cleanup_queue(disk->queue);
-               blk_mq_free_tag_set(&nbd->tag_set);
-               put_disk(disk);
-       }
-       kfree(nbd);
-}
-
  static int nbd_dev_add(int index)
  {
         struct nbd_device *nbd;
@@ -1012,6 +1440,7 @@ static int nbd_dev_add(int index)
         if (err < 0)
                 goto out_free_disk;
  
+       nbd->index = index;
         nbd->disk = disk;
         nbd->tag_set.ops = &nbd_mq_ops;
         nbd->tag_set.nr_hw_queues = 1;
@@ -1040,20 +1469,23 @@ static int nbd_dev_add(int index)
         queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
         disk->queue->limits.discard_granularity = 512;
         blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
-       disk->queue->limits.discard_zeroes_data = 0;
+       blk_queue_max_segment_size(disk->queue, UINT_MAX);
+       blk_queue_max_segments(disk->queue, USHRT_MAX);
         blk_queue_max_hw_sectors(disk->queue, 65536);
         disk->queue->limits.max_sectors = 256;
  
-       nbd->magic = NBD_MAGIC;
         mutex_init(&nbd->config_lock);
+       refcount_set(&nbd->config_refs, 0);
+       refcount_set(&nbd->refs, 1);
+       INIT_LIST_HEAD(&nbd->list);
         disk->major = NBD_MAJOR;
         disk->first_minor = index << part_shift;
         disk->fops = &nbd_fops;
         disk->private_data = nbd;
         sprintf(disk->disk_name, "nbd%d", index);
-       init_waitqueue_head(&nbd->recv_wq);
         nbd_reset(nbd);
         add_disk(disk);
+       nbd_total_devices++;
         return index;
  
  out_free_tags:
@@ -1068,10 +1500,535 @@ out:
         return err;
  }
  
-/*
- * And here should be modules and kernel interface 
- *  (Just smiley confuses emacs :-)
+static int find_free_cb(int id, void *ptr, void *data)
+{
+       struct nbd_device *nbd = ptr;
+       struct nbd_device **found = data;
+
+       if (!refcount_read(&nbd->config_refs)) {
+               *found = nbd;
+               return 1;
+       }
+       return 0;
+}
+
+/* Netlink interface. */
+static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = {
+       [NBD_ATTR_INDEX]                =       { .type = NLA_U32 },
+       [NBD_ATTR_SIZE_BYTES]           =       { .type = NLA_U64 },
+       [NBD_ATTR_BLOCK_SIZE_BYTES]     =       { .type = NLA_U64 },
+       [NBD_ATTR_TIMEOUT]              =       { .type = NLA_U64 },
+       [NBD_ATTR_SERVER_FLAGS]         =       { .type = NLA_U64 },
+       [NBD_ATTR_CLIENT_FLAGS]         =       { .type = NLA_U64 },
+       [NBD_ATTR_SOCKETS]              =       { .type = NLA_NESTED},
+       [NBD_ATTR_DEAD_CONN_TIMEOUT]    =       { .type = NLA_U64 },
+       [NBD_ATTR_DEVICE_LIST]          =       { .type = NLA_NESTED},
+};
+
+static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = {
+       [NBD_SOCK_FD]                   =       { .type = NLA_U32 },
+};
+
+/* We don't use this right now since we don't parse the incoming list, but we
+ * still want it here so userspace knows what to expect.
   */
+static struct nla_policy __attribute__((unused))
+nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
+       [NBD_DEVICE_INDEX]              =       { .type = NLA_U32 },
+       [NBD_DEVICE_CONNECTED]          =       { .type = NLA_U8 },
+};
+
+static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
+{
+       struct nbd_device *nbd = NULL;
+       struct nbd_config *config;
+       int index = -1;
+       int ret;
+       bool put_dev = false;
+
+       if (!netlink_capable(skb, CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (info->attrs[NBD_ATTR_INDEX])
+               index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
+       if (!info->attrs[NBD_ATTR_SOCKETS]) {
+               printk(KERN_ERR "nbd: must specify at least one socket\n");
+               return -EINVAL;
+       }
+       if (!info->attrs[NBD_ATTR_SIZE_BYTES]) {
+               printk(KERN_ERR "nbd: must specify a size in bytes for the device\n");
+               return -EINVAL;
+       }
+again:
+       mutex_lock(&nbd_index_mutex);
+       if (index == -1) {
+               ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd);
+               if (ret == 0) {
+                       int new_index;
+                       new_index = nbd_dev_add(-1);
+                       if (new_index < 0) {
+                               mutex_unlock(&nbd_index_mutex);
+                               printk(KERN_ERR "nbd: failed to add new device\n");
+                               return ret;
+                       }
+                       nbd = idr_find(&nbd_index_idr, new_index);
+               }
+       } else {
+               nbd = idr_find(&nbd_index_idr, index);
+       }
+       if (!nbd) {
+               printk(KERN_ERR "nbd: couldn't find device at index %d\n",
+                      index);
+               mutex_unlock(&nbd_index_mutex);
+               return -EINVAL;
+       }
+       if (!refcount_inc_not_zero(&nbd->refs)) {
+               mutex_unlock(&nbd_index_mutex);
+               if (index == -1)
+                       goto again;
+               printk(KERN_ERR "nbd: device at index %d is going down\n",
+                      index);
+               return -EINVAL;
+       }
+       mutex_unlock(&nbd_index_mutex);
+
+       mutex_lock(&nbd->config_lock);
+       if (refcount_read(&nbd->config_refs)) {
+               mutex_unlock(&nbd->config_lock);
+               nbd_put(nbd);
+               if (index == -1)
+                       goto again;
+               printk(KERN_ERR "nbd: nbd%d already in use\n", index);
+               return -EBUSY;
+       }
+       if (WARN_ON(nbd->config)) {
+               mutex_unlock(&nbd->config_lock);
+               nbd_put(nbd);
+               return -EINVAL;
+       }
+       config = nbd->config = nbd_alloc_config();
+       if (!nbd->config) {
+               mutex_unlock(&nbd->config_lock);
+               nbd_put(nbd);
+               printk(KERN_ERR "nbd: couldn't allocate config\n");
+               return -ENOMEM;
+       }
+       refcount_set(&nbd->config_refs, 1);
+       set_bit(NBD_BOUND, &config->runtime_flags);
+
+       if (info->attrs[NBD_ATTR_SIZE_BYTES]) {
+               u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
+               nbd_size_set(nbd, config->blksize,
+                            div64_u64(bytes, config->blksize));
+       }
+       if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) {
+               u64 bsize =
+                       nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
+               nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize));
+       }
+       if (info->attrs[NBD_ATTR_TIMEOUT]) {
+               u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
+               nbd->tag_set.timeout = timeout * HZ;
+               blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
+       }
+       if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
+               config->dead_conn_timeout =
+                       nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
+               config->dead_conn_timeout *= HZ;
+       }
+       if (info->attrs[NBD_ATTR_SERVER_FLAGS])
+               config->flags =
+                       nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]);
+       if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
+               u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
+               if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
+                       set_bit(NBD_DESTROY_ON_DISCONNECT,
+                               &config->runtime_flags);
+                       put_dev = true;
+               }
+       }
+
+       if (info->attrs[NBD_ATTR_SOCKETS]) {
+               struct nlattr *attr;
+               int rem, fd;
+
+               nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
+                                   rem) {
+                       struct nlattr *socks[NBD_SOCK_MAX+1];
+
+                       if (nla_type(attr) != NBD_SOCK_ITEM) {
+                               printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr,
+                                              nbd_sock_policy);
+                       if (ret != 0) {
+                               printk(KERN_ERR "nbd: error processing sock list\n");
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       if (!socks[NBD_SOCK_FD])
+                               continue;
+                       fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
+                       ret = nbd_add_socket(nbd, fd, true);
+                       if (ret)
+                               goto out;
+               }
+       }
+       ret = nbd_start_device(nbd);
+out:
+       mutex_unlock(&nbd->config_lock);
+       if (!ret) {
+               set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags);
+               refcount_inc(&nbd->config_refs);
+               nbd_connect_reply(info, nbd->index);
+       }
+       nbd_config_put(nbd);
+       if (put_dev)
+               nbd_put(nbd);
+       return ret;
+}
+
+static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
+{
+       struct nbd_device *nbd;
+       int index;
+
+       if (!netlink_capable(skb, CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (!info->attrs[NBD_ATTR_INDEX]) {
+               printk(KERN_ERR "nbd: must specify an index to disconnect\n");
+               return -EINVAL;
+       }
+       index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
+       mutex_lock(&nbd_index_mutex);
+       nbd = idr_find(&nbd_index_idr, index);
+       if (!nbd) {
+               mutex_unlock(&nbd_index_mutex);
+               printk(KERN_ERR "nbd: couldn't find device at index %d\n",
+                      index);
+               return -EINVAL;
+       }
+       if (!refcount_inc_not_zero(&nbd->refs)) {
+               mutex_unlock(&nbd_index_mutex);
+               printk(KERN_ERR "nbd: device at index %d is going down\n",
+                      index);
+               return -EINVAL;
+       }
+       mutex_unlock(&nbd_index_mutex);
+       if (!refcount_inc_not_zero(&nbd->config_refs)) {
+               nbd_put(nbd);
+               return 0;
+       }
+       mutex_lock(&nbd->config_lock);
+       nbd_disconnect(nbd);
+       mutex_unlock(&nbd->config_lock);
+       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+                              &nbd->config->runtime_flags))
+               nbd_config_put(nbd);
+       nbd_config_put(nbd);
+       nbd_put(nbd);
+       return 0;
+}
+
+static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
+{
+       struct nbd_device *nbd = NULL;
+       struct nbd_config *config;
+       int index;
+       int ret = -EINVAL;
+       bool put_dev = false;
+
+       if (!netlink_capable(skb, CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (!info->attrs[NBD_ATTR_INDEX]) {
+               printk(KERN_ERR "nbd: must specify a device to reconfigure\n");
+               return -EINVAL;
+       }
+       index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
+       mutex_lock(&nbd_index_mutex);
+       nbd = idr_find(&nbd_index_idr, index);
+       if (!nbd) {
+               mutex_unlock(&nbd_index_mutex);
+               printk(KERN_ERR "nbd: couldn't find a device at index %d\n",
+                      index);
+               return -EINVAL;
+       }
+       if (!refcount_inc_not_zero(&nbd->refs)) {
+               mutex_unlock(&nbd_index_mutex);
+               printk(KERN_ERR "nbd: device at index %d is going down\n",
+                      index);
+               return -EINVAL;
+       }
+       mutex_unlock(&nbd_index_mutex);
+
+       if (!refcount_inc_not_zero(&nbd->config_refs)) {
+               dev_err(nbd_to_dev(nbd),
+                       "not configured, cannot reconfigure\n");
+               nbd_put(nbd);
+               return -EINVAL;
+       }
+
+       mutex_lock(&nbd->config_lock);
+       config = nbd->config;
+       if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+           !nbd->task_recv) {
+               dev_err(nbd_to_dev(nbd),
+                       "not configured, cannot reconfigure\n");
+               goto out;
+       }
+
+       if (info->attrs[NBD_ATTR_TIMEOUT]) {
+               u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
+               nbd->tag_set.timeout = timeout * HZ;
+               blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
+       }
+       if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
+               config->dead_conn_timeout =
+                       nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
+               config->dead_conn_timeout *= HZ;
+       }
+       if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
+               u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
+               if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
+                       if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+                                             &config->runtime_flags))
+                               put_dev = true;
+               } else {
+                       if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+                                              &config->runtime_flags))
+                               refcount_inc(&nbd->refs);
+               }
+       }
+
+       if (info->attrs[NBD_ATTR_SOCKETS]) {
+               struct nlattr *attr;
+               int rem, fd;
+
+               nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
+                                   rem) {
+                       struct nlattr *socks[NBD_SOCK_MAX+1];
+
+                       if (nla_type(attr) != NBD_SOCK_ITEM) {
+                               printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr,
+                                              nbd_sock_policy);
+                       if (ret != 0) {
+                               printk(KERN_ERR "nbd: error processing sock list\n");
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       if (!socks[NBD_SOCK_FD])
+                               continue;
+                       fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
+                       ret = nbd_reconnect_socket(nbd, fd);
+                       if (ret) {
+                               if (ret == -ENOSPC)
+                                       ret = 0;
+                               goto out;
+                       }
+                       dev_info(nbd_to_dev(nbd), "reconnected socket\n");
+               }
+       }
+out:
+       mutex_unlock(&nbd->config_lock);
+       nbd_config_put(nbd);
+       nbd_put(nbd);
+       if (put_dev)
+               nbd_put(nbd);
+       return ret;
+}
+
+static const struct genl_ops nbd_connect_genl_ops[] = {
+       {
+               .cmd    = NBD_CMD_CONNECT,
+               .policy = nbd_attr_policy,
+               .doit   = nbd_genl_connect,
+       },
+       {
+               .cmd    = NBD_CMD_DISCONNECT,
+               .policy = nbd_attr_policy,
+               .doit   = nbd_genl_disconnect,
+       },
+       {
+               .cmd    = NBD_CMD_RECONFIGURE,
+               .policy = nbd_attr_policy,
+               .doit   = nbd_genl_reconfigure,
+       },
+       {
+               .cmd    = NBD_CMD_STATUS,
+               .policy = nbd_attr_policy,
+               .doit   = nbd_genl_status,
+       },
+};
+
+static const struct genl_multicast_group nbd_mcast_grps[] = {
+       { .name = NBD_GENL_MCAST_GROUP_NAME, },
+};
+
+static struct genl_family nbd_genl_family __ro_after_init = {
+       .hdrsize        = 0,
+       .name           = NBD_GENL_FAMILY_NAME,
+       .version        = NBD_GENL_VERSION,
+       .module         = THIS_MODULE,
+       .ops            = nbd_connect_genl_ops,
+       .n_ops          = ARRAY_SIZE(nbd_connect_genl_ops),
+       .maxattr        = NBD_ATTR_MAX,
+       .mcgrps         = nbd_mcast_grps,
+       .n_mcgrps       = ARRAY_SIZE(nbd_mcast_grps),
+};
+
+static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply)
+{
+       struct nlattr *dev_opt;
+       u8 connected = 0;
+       int ret;
+
+       /* This is a little racey, but for status it's ok.  The
+        * reason we don't take a ref here is because we can't
+        * take a ref in the index == -1 case as we would need
+        * to put under the nbd_index_mutex, which could
+        * deadlock if we are configured to remove ourselves
+        * once we're disconnected.
+        */
+       if (refcount_read(&nbd->config_refs))
+               connected = 1;
+       dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM);
+       if (!dev_opt)
+               return -EMSGSIZE;
+       ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index);
+       if (ret)
+               return -EMSGSIZE;
+       ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED,
+                        connected);
+       if (ret)
+               return -EMSGSIZE;
+       nla_nest_end(reply, dev_opt);
+       return 0;
+}
+
+static int status_cb(int id, void *ptr, void *data)
+{
+       struct nbd_device *nbd = ptr;
+       return populate_nbd_status(nbd, (struct sk_buff *)data);
+}
+
+static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info)
+{
+       struct nlattr *dev_list;
+       struct sk_buff *reply;
+       void *reply_head;
+       size_t msg_size;
+       int index = -1;
+       int ret = -ENOMEM;
+
+       if (info->attrs[NBD_ATTR_INDEX])
+               index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
+
+       mutex_lock(&nbd_index_mutex);
+
+       msg_size = nla_total_size(nla_attr_size(sizeof(u32)) +
+                                 nla_attr_size(sizeof(u8)));
+       msg_size *= (index == -1) ? nbd_total_devices : 1;
+
+       reply = genlmsg_new(msg_size, GFP_KERNEL);
+       if (!reply)
+               goto out;
+       reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0,
+                                      NBD_CMD_STATUS);
+       if (!reply_head) {
+               nlmsg_free(reply);
+               goto out;
+       }
+
+       dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST);
+       if (index == -1) {
+               ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
+               if (ret) {
+                       nlmsg_free(reply);
+                       goto out;
+               }
+       } else {
+               struct nbd_device *nbd;
+               nbd = idr_find(&nbd_index_idr, index);
+               if (nbd) {
+                       ret = populate_nbd_status(nbd, reply);
+                       if (ret) {
+                               nlmsg_free(reply);
+                               goto out;
+                       }
+               }
+       }
+       nla_nest_end(reply, dev_list);
+       genlmsg_end(reply, reply_head);
+       genlmsg_reply(reply, info);
+       ret = 0;
+out:
+       mutex_unlock(&nbd_index_mutex);
+       return ret;
+}
+
+static void nbd_connect_reply(struct genl_info *info, int index)
+{
+       struct sk_buff *skb;
+       void *msg_head;
+       int ret;
+
+       skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
+       if (!skb)
+               return;
+       msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0,
+                                    NBD_CMD_CONNECT);
+       if (!msg_head) {
+               nlmsg_free(skb);
+               return;
+       }
+       ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
+       if (ret) {
+               nlmsg_free(skb);
+               return;
+       }
+       genlmsg_end(skb, msg_head);
+       genlmsg_reply(skb, info);
+}
+
+static void nbd_mcast_index(int index)
+{
+       struct sk_buff *skb;
+       void *msg_head;
+       int ret;
+
+       skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
+       if (!skb)
+               return;
+       msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0,
+                                    NBD_CMD_LINK_DEAD);
+       if (!msg_head) {
+               nlmsg_free(skb);
+               return;
+       }
+       ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
+       if (ret) {
+               nlmsg_free(skb);
+               return;
+       }
+       genlmsg_end(skb, msg_head);
+       genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL);
+}
+
+static void nbd_dead_link_work(struct work_struct *work)
+{
+       struct link_dead_args *args = container_of(work, struct link_dead_args,
+                                                  work);
+       nbd_mcast_index(args->index);
+       kfree(args);
+}
  
  static int __init nbd_init(void)
  {
@@ -1114,6 +2071,11 @@ static int __init nbd_init(void)
                 return -EIO;
         }
  
+       if (genl_register_family(&nbd_genl_family)) {
+               unregister_blkdev(NBD_MAJOR, "nbd");
+               destroy_workqueue(recv_workqueue);
+               return -EINVAL;
+       }
         nbd_dbg_init();
  
         mutex_lock(&nbd_index_mutex);
@@ -1125,17 +2087,34 @@ static int __init nbd_init(void)
  
  static int nbd_exit_cb(int id, void *ptr, void *data)
  {
+       struct list_head *list = (struct list_head *)data;
         struct nbd_device *nbd = ptr;
-       nbd_dev_remove(nbd);
+
+       list_add_tail(&nbd->list, list);
         return 0;
  }
  
  static void __exit nbd_cleanup(void)
  {
+       struct nbd_device *nbd;
+       LIST_HEAD(del_list);
+
         nbd_dbg_close();
  
-       idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL);
+       mutex_lock(&nbd_index_mutex);
+       idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list);
+       mutex_unlock(&nbd_index_mutex);
+
+       while (!list_empty(&del_list)) {
+               nbd = list_first_entry(&del_list, struct nbd_device, list);
+               list_del_init(&nbd->list);
+               if (refcount_read(&nbd->refs) != 1)
+                       printk(KERN_ERR "nbd: possibly leaking a device\n");
+               nbd_put(nbd);
+       }
+
         idr_destroy(&nbd_index_idr);
+       genl_unregister_family(&nbd_genl_family);
         destroy_workqueue(recv_workqueue);
         unregister_blkdev(NBD_MAJOR, "nbd");
  }
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c

index 6f2e565bccc59e8f3610f1651a385de885443e4c..d946e1eeac8ef0dafbf3510f3aaa57925ea5f07c 100644 (file)
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -117,6 +117,10 @@ static bool use_lightnvm;
  module_param(use_lightnvm, bool, S_IRUGO);
  MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
  
+static bool blocking;
+module_param(blocking, bool, S_IRUGO);
+MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
+
  static int irqmode = NULL_IRQ_SOFTIRQ;
  
  static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -277,7 +281,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
         case NULL_IRQ_SOFTIRQ:
                 switch (queue_mode)  {
                 case NULL_Q_MQ:
-                       blk_mq_complete_request(cmd->rq, cmd->rq->errors);
+                       blk_mq_complete_request(cmd->rq);
                         break;
                 case NULL_Q_RQ:
                         blk_complete_request(cmd->rq);
@@ -357,6 +361,8 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
  {
         struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
  
+       might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
+
         if (irqmode == NULL_IRQ_TIMER) {
                 hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
                 cmd->timer.function = null_cmd_timer_expired;
@@ -392,7 +398,7 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
         return 0;
  }
  
-static struct blk_mq_ops null_mq_ops = {
+static const struct blk_mq_ops null_mq_ops = {
         .queue_rq       = null_queue_rq,
         .init_hctx      = null_init_hctx,
         .complete       = null_softirq_done_fn,
@@ -437,14 +443,7 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
         if (IS_ERR(rq))
                 return -ENOMEM;
  
-       rq->__sector = bio->bi_iter.bi_sector;
-       rq->ioprio = bio_prio(bio);
-
-       if (bio_has_data(bio))
-               rq->nr_phys_segments = bio_phys_segments(q, bio);
-
-       rq->__data_len = bio->bi_iter.bi_size;
-       rq->bio = rq->biotail = bio;
+       blk_init_request_from_bio(rq, bio);
  
         rq->end_io_data = rqd;
  
@@ -724,6 +723,9 @@ static int null_add_dev(void)
                 nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
                 nullb->tag_set.driver_data = nullb;
  
+               if (blocking)
+                       nullb->tag_set.flags |= BLK_MQ_F_BLOCKING;
+
                 rv = blk_mq_alloc_tag_set(&nullb->tag_set);
                 if (rv)
                         goto out_cleanup_queues;
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c

deleted file mode 100644 (file)

index 8127b82..0000000
--- a/drivers/block/osdblk.c
+++ /dev/null
@@ -1,693 +0,0 @@
-
-/*
-   osdblk.c -- Export a single SCSI OSD object as a Linux block device
-
-
-   Copyright 2009 Red Hat, Inc.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; see the file COPYING.  If not, write to
-   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-
-
-   Instructions for use
-   --------------------
-
-   1) Map a Linux block device to an existing OSD object.
-
-      In this example, we will use partition id 1234, object id 5678,
-      OSD device /dev/osd1.
-
-      $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add
-
-
-   2) List all active blkdev<->object mappings.
-
-      In this example, we have performed step #1 twice, creating two blkdevs,
-      mapped to two separate OSD objects.
-
-      $ cat /sys/class/osdblk/list
-      0 174 1234 5678 /dev/osd1
-      1 179 1994 897123 /dev/osd0
-
-      The columns, in order, are:
-      - blkdev unique id
-      - blkdev assigned major
-      - OSD object partition id
-      - OSD object id
-      - OSD device
-
-
-   3) Remove an active blkdev<->object mapping.
-
-      In this example, we remove the mapping with blkdev unique id 1.
-
-      $ echo 1 > /sys/class/osdblk/remove
-
-
-   NOTE:  The actual creation and deletion of OSD objects is outside the scope
-   of this driver.
-
- */
-
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <scsi/osd_initiator.h>
-#include <scsi/osd_attributes.h>
-#include <scsi/osd_sec.h>
-#include <scsi/scsi_device.h>
-
-#define DRV_NAME "osdblk"
-#define PFX DRV_NAME ": "
-
-/* #define _OSDBLK_DEBUG */
-#ifdef _OSDBLK_DEBUG
-#define OSDBLK_DEBUG(fmt, a...) \
-       printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a)
-#else
-#define OSDBLK_DEBUG(fmt, a...) \
-       do { if (0) printk(fmt, ##a); } while (0)
-#endif
-
-MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
-MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko");
-MODULE_LICENSE("GPL");
-
-struct osdblk_device;
-
-enum {
-       OSDBLK_MINORS_PER_MAJOR = 256,          /* max minors per blkdev */
-       OSDBLK_MAX_REQ          = 32,           /* max parallel requests */
-       OSDBLK_OP_TIMEOUT       = 4 * 60,       /* sync OSD req timeout */
-};
-
-struct osdblk_request {
-       struct request          *rq;            /* blk layer request */
-       struct bio              *bio;           /* cloned bio */
-       struct osdblk_device    *osdev;         /* associated blkdev */
-};
-
-struct osdblk_device {
-       int                     id;             /* blkdev unique id */
-
-       int                     major;          /* blkdev assigned major */
-       struct gendisk          *disk;          /* blkdev's gendisk and rq */
-       struct request_queue    *q;
-
-       struct osd_dev          *osd;           /* associated OSD */
-
-       char                    name[32];       /* blkdev name, e.g. osdblk34 */
-
-       spinlock_t              lock;           /* queue lock */
-
-       struct osd_obj_id       obj;            /* OSD partition, obj id */
-       uint8_t                 obj_cred[OSD_CAP_LEN]; /* OSD cred */
-
-       struct osdblk_request   req[OSDBLK_MAX_REQ]; /* request table */
-
-       struct list_head        node;
-
-       char                    osd_path[0];    /* OSD device path */
-};
-
-static struct class *class_osdblk;             /* /sys/class/osdblk */
-static DEFINE_MUTEX(ctl_mutex);        /* Serialize open/close/setup/teardown */
-static LIST_HEAD(osdblkdev_list);
-
-static const struct block_device_operations osdblk_bd_ops = {
-       .owner          = THIS_MODULE,
-};
-
-static const struct osd_attr g_attr_logical_length = ATTR_DEF(
-       OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
-
-static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN],
-                                  const struct osd_obj_id *obj)
-{
-       osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
-}
-
-/* copied from exofs; move to libosd? */
-/*
- * Perform a synchronous OSD operation.  copied from exofs; move to libosd?
- */
-static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
-{
-       int ret;
-
-       or->timeout = timeout;
-       ret = osd_finalize_request(or, 0, credential, NULL);
-       if (ret)
-               return ret;
-
-       ret = osd_execute_request(or);
-
-       /* osd_req_decode_sense(or, ret); */
-       return ret;
-}
-
-/*
- * Perform an asynchronous OSD operation.  copied from exofs; move to libosd?
- */
-static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done,
-                  void *caller_context, u8 *cred)
-{
-       int ret;
-
-       ret = osd_finalize_request(or, 0, cred, NULL);
-       if (ret)
-               return ret;
-
-       ret = osd_execute_request_async(or, async_done, caller_context);
-
-       return ret;
-}
-
-/* copied from exofs; move to libosd? */
-static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
-{
-       struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
-       void *iter = NULL;
-       int nelem;
-
-       do {
-               nelem = 1;
-               osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
-               if ((cur_attr.attr_page == attr->attr_page) &&
-                   (cur_attr.attr_id == attr->attr_id)) {
-                       attr->len = cur_attr.len;
-                       attr->val_ptr = cur_attr.val_ptr;
-                       return 0;
-               }
-       } while (iter);
-
-       return -EIO;
-}
-
-static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out)
-{
-       struct osd_request *or;
-       struct osd_attr attr;
-       int ret;
-
-       /* start request */
-       or = osd_start_request(osdev->osd, GFP_KERNEL);
-       if (!or)
-               return -ENOMEM;
-
-       /* create a get-attributes(length) request */
-       osd_req_get_attributes(or, &osdev->obj);
-
-       osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
-
-       /* execute op synchronously */
-       ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred);
-       if (ret)
-               goto out;
-
-       /* extract length from returned attribute info */
-       attr = g_attr_logical_length;
-       ret = extract_attr_from_req(or, &attr);
-       if (ret)
-               goto out;
-
-       *size_out = get_unaligned_be64(attr.val_ptr);
-
-out:
-       osd_end_request(or);
-       return ret;
-
-}
-
-static void osdblk_osd_complete(struct osd_request *or, void *private)
-{
-       struct osdblk_request *orq = private;
-       struct osd_sense_info osi;
-       int ret = osd_req_decode_sense(or, &osi);
-
-       if (ret) {
-               ret = -EIO;
-               OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret);
-       }
-
-       /* complete OSD request */
-       osd_end_request(or);
-
-       /* complete request passed to osdblk by block layer */
-       __blk_end_request_all(orq->rq, ret);
-}
-
-static void bio_chain_put(struct bio *chain)
-{
-       struct bio *tmp;
-
-       while (chain) {
-               tmp = chain;
-               chain = chain->bi_next;
-
-               bio_put(tmp);
-       }
-}
-
-static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
-{
-       struct bio *tmp, *new_chain = NULL, *tail = NULL;
-
-       while (old_chain) {
-               tmp = bio_clone_kmalloc(old_chain, gfpmask);
-               if (!tmp)
-                       goto err_out;
-
-               tmp->bi_bdev = NULL;
-               gfpmask &= ~__GFP_DIRECT_RECLAIM;
-               tmp->bi_next = NULL;
-
-               if (!new_chain)
-                       new_chain = tail = tmp;
-               else {
-                       tail->bi_next = tmp;
-                       tail = tmp;
-               }
-
-               old_chain = old_chain->bi_next;
-       }
-
-       return new_chain;
-
-err_out:
-       OSDBLK_DEBUG("bio_chain_clone with err\n");
-       bio_chain_put(new_chain);
-       return NULL;
-}
-
-static void osdblk_rq_fn(struct request_queue *q)
-{
-       struct osdblk_device *osdev = q->queuedata;
-
-       while (1) {
-               struct request *rq;
-               struct osdblk_request *orq;
-               struct osd_request *or;
-               struct bio *bio;
-               bool do_write, do_flush;
-
-               /* peek at request from block layer */
-               rq = blk_fetch_request(q);
-               if (!rq)
-                       break;
-
-               /* deduce our operation (read, write, flush) */
-               /* I wish the block layer simplified cmd_type/cmd_flags/cmd[]
-                * into a clearly defined set of RPC commands:
-                * read, write, flush, scsi command, power mgmt req,
-                * driver-specific, etc.
-                */
-
-               do_flush = (req_op(rq) == REQ_OP_FLUSH);
-               do_write = (rq_data_dir(rq) == WRITE);
-
-               if (!do_flush) { /* osd_flush does not use a bio */
-                       /* a bio clone to be passed down to OSD request */
-                       bio = bio_chain_clone(rq->bio, GFP_ATOMIC);
-                       if (!bio)
-                               break;
-               } else
-                       bio = NULL;
-
-               /* alloc internal OSD request, for OSD command execution */
-               or = osd_start_request(osdev->osd, GFP_ATOMIC);
-               if (!or) {
-                       bio_chain_put(bio);
-                       OSDBLK_DEBUG("osd_start_request with err\n");
-                       break;
-               }
-
-               orq = &osdev->req[rq->tag];
-               orq->rq = rq;
-               orq->bio = bio;
-               orq->osdev = osdev;
-
-               /* init OSD command: flush, write or read */
-               if (do_flush)
-                       osd_req_flush_object(or, &osdev->obj,
-                                            OSD_CDB_FLUSH_ALL, 0, 0);
-               else if (do_write)
-                       osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
-                                     bio, blk_rq_bytes(rq));
-               else
-                       osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
-                                    bio, blk_rq_bytes(rq));
-
-               OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n",
-                       do_flush ? "flush" : do_write ?
-                               "write" : "read", blk_rq_bytes(rq),
-                       blk_rq_pos(rq) * 512ULL);
-
-               /* begin OSD command execution */
-               if (osd_async_op(or, osdblk_osd_complete, orq,
-                                osdev->obj_cred)) {
-                       osd_end_request(or);
-                       blk_requeue_request(q, rq);
-                       bio_chain_put(bio);
-                       OSDBLK_DEBUG("osd_execute_request_async with err\n");
-                       break;
-               }
-
-               /* remove the special 'flush' marker, now that the command
-                * is executing
-                */
-               rq->special = NULL;
-       }
-}
-
-static void osdblk_free_disk(struct osdblk_device *osdev)
-{
-       struct gendisk *disk = osdev->disk;
-
-       if (!disk)
-               return;
-
-       if (disk->flags & GENHD_FL_UP)
-               del_gendisk(disk);
-       if (disk->queue)
-               blk_cleanup_queue(disk->queue);
-       put_disk(disk);
-}
-
-static int osdblk_init_disk(struct osdblk_device *osdev)
-{
-       struct gendisk *disk;
-       struct request_queue *q;
-       int rc;
-       u64 obj_size = 0;
-
-       /* contact OSD, request size info about the object being mapped */
-       rc = osdblk_get_obj_size(osdev, &obj_size);
-       if (rc)
-               return rc;
-
-       /* create gendisk info */
-       disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR);
-       if (!disk)
-               return -ENOMEM;
-
-       sprintf(disk->disk_name, DRV_NAME "%d", osdev->id);
-       disk->major = osdev->major;
-       disk->first_minor = 0;
-       disk->fops = &osdblk_bd_ops;
-       disk->private_data = osdev;
-
-       /* init rq */
-       q = blk_init_queue(osdblk_rq_fn, &osdev->lock);
-       if (!q) {
-               put_disk(disk);
-               return -ENOMEM;
-       }
-
-       /* switch queue to TCQ mode; allocate tag map */
-       rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO);
-       if (rc) {
-               blk_cleanup_queue(q);
-               put_disk(disk);
-               return rc;
-       }
-
-       /* Set our limits to the lower device limits, because osdblk cannot
-        * sleep when allocating a lower-request and therefore cannot be
-        * bouncing.
-        */
-       blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
-
-       blk_queue_prep_rq(q, blk_queue_start_tag);
-       blk_queue_write_cache(q, true, false);
-
-       disk->queue = q;
-
-       q->queuedata = osdev;
-
-       osdev->disk = disk;
-       osdev->q = q;
-
-       /* finally, announce the disk to the world */
-       set_capacity(disk, obj_size / 512ULL);
-       add_disk(disk);
-
-       printk(KERN_INFO "%s: Added of size 0x%llx\n",
-               disk->disk_name, (unsigned long long)obj_size);
-
-       return 0;
-}
-
-/********************************************************************
- * /sys/class/osdblk/
- *                   add       map OSD object to blkdev
- *                   remove    unmap OSD object
- *                   list      show mappings
- *******************************************************************/
-
-static void class_osdblk_release(struct class *cls)
-{
-       kfree(cls);
-}
-
-static ssize_t class_osdblk_list(struct class *c,
-                               struct class_attribute *attr,
-                               char *data)
-{
-       int n = 0;
-       struct list_head *tmp;
-
-       mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-       list_for_each(tmp, &osdblkdev_list) {
-               struct osdblk_device *osdev;
-
-               osdev = list_entry(tmp, struct osdblk_device, node);
-
-               n += sprintf(data+n, "%d %d %llu %llu %s\n",
-                       osdev->id,
-                       osdev->major,
-                       osdev->obj.partition,
-                       osdev->obj.id,
-                       osdev->osd_path);
-       }
-
-       mutex_unlock(&ctl_mutex);
-       return n;
-}
-
-static ssize_t class_osdblk_add(struct class *c,
-                               struct class_attribute *attr,
-                               const char *buf, size_t count)
-{
-       struct osdblk_device *osdev;
-       ssize_t rc;
-       int irc, new_id = 0;
-       struct list_head *tmp;
-
-       if (!try_module_get(THIS_MODULE))
-               return -ENODEV;
-
-       /* new osdblk_device object */
-       osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL);
-       if (!osdev) {
-               rc = -ENOMEM;
-               goto err_out_mod;
-       }
-
-       /* static osdblk_device initialization */
-       spin_lock_init(&osdev->lock);
-       INIT_LIST_HEAD(&osdev->node);
-
-       /* generate unique id: find highest unique id, add one */
-
-       mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-       list_for_each(tmp, &osdblkdev_list) {
-               struct osdblk_device *osdev;
-
-               osdev = list_entry(tmp, struct osdblk_device, node);
-               if (osdev->id > new_id)
-                       new_id = osdev->id + 1;
-       }
-
-       osdev->id = new_id;
-
-       /* add to global list */
-       list_add_tail(&osdev->node, &osdblkdev_list);
-
-       mutex_unlock(&ctl_mutex);
-
-       /* parse add command */
-       if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id,
-                  osdev->osd_path) != 3) {
-               rc = -EINVAL;
-               goto err_out_slot;
-       }
-
-       /* initialize rest of new object */
-       sprintf(osdev->name, DRV_NAME "%d", osdev->id);
-
-       /* contact requested OSD */
-       osdev->osd = osduld_path_lookup(osdev->osd_path);
-       if (IS_ERR(osdev->osd)) {
-               rc = PTR_ERR(osdev->osd);
-               goto err_out_slot;
-       }
-
-       /* build OSD credential */
-       osdblk_make_credential(osdev->obj_cred, &osdev->obj);
-
-       /* register our block device */
-       irc = register_blkdev(0, osdev->name);
-       if (irc < 0) {
-               rc = irc;
-               goto err_out_osd;
-       }
-
-       osdev->major = irc;
-
-       /* set up and announce blkdev mapping */
-       rc = osdblk_init_disk(osdev);
-       if (rc)
-               goto err_out_blkdev;
-
-       return count;
-
-err_out_blkdev:
-       unregister_blkdev(osdev->major, osdev->name);
-err_out_osd:
-       osduld_put_device(osdev->osd);
-err_out_slot:
-       mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-       list_del_init(&osdev->node);
-       mutex_unlock(&ctl_mutex);
-
-       kfree(osdev);
-err_out_mod:
-       OSDBLK_DEBUG("Error adding device %s\n", buf);
-       module_put(THIS_MODULE);
-       return rc;
-}
-
-static ssize_t class_osdblk_remove(struct class *c,
-                                       struct class_attribute *attr,
-                                       const char *buf,
-                                       size_t count)
-{
-       struct osdblk_device *osdev = NULL;
-       int target_id, rc;
-       unsigned long ul;
-       struct list_head *tmp;
-
-       rc = kstrtoul(buf, 10, &ul);
-       if (rc)
-               return rc;
-
-       /* convert to int; abort if we lost anything in the conversion */
-       target_id = (int) ul;
-       if (target_id != ul)
-               return -EINVAL;
-
-       /* remove object from list immediately */
-       mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-       list_for_each(tmp, &osdblkdev_list) {
-               osdev = list_entry(tmp, struct osdblk_device, node);
-               if (osdev->id == target_id) {
-                       list_del_init(&osdev->node);
-                       break;
-               }
-               osdev = NULL;
-       }
-
-       mutex_unlock(&ctl_mutex);
-
-       if (!osdev)
-               return -ENOENT;
-
-       /* clean up and free blkdev and associated OSD connection */
-       osdblk_free_disk(osdev);
-       unregister_blkdev(osdev->major, osdev->name);
-       osduld_put_device(osdev->osd);
-       kfree(osdev);
-
-       /* release module ref */
-       module_put(THIS_MODULE);
-
-       return count;
-}
-
-static struct class_attribute class_osdblk_attrs[] = {
-       __ATTR(add,     0200, NULL, class_osdblk_add),
-       __ATTR(remove,  0200, NULL, class_osdblk_remove),
-       __ATTR(list,    0444, class_osdblk_list, NULL),
-       __ATTR_NULL
-};
-
-static int osdblk_sysfs_init(void)
-{
-       int ret = 0;
-
-       /*
-        * create control files in sysfs
-        * /sys/class/osdblk/...
-        */
-       class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL);
-       if (!class_osdblk)
-               return -ENOMEM;
-
-       class_osdblk->name = DRV_NAME;
-       class_osdblk->owner = THIS_MODULE;
-       class_osdblk->class_release = class_osdblk_release;
-       class_osdblk->class_attrs = class_osdblk_attrs;
-
-       ret = class_register(class_osdblk);
-       if (ret) {
-               kfree(class_osdblk);
-               class_osdblk = NULL;
-               printk(PFX "failed to create class osdblk\n");
-               return ret;
-       }
-
-       return 0;
-}
-
-static void osdblk_sysfs_cleanup(void)
-{
-       if (class_osdblk)
-               class_destroy(class_osdblk);
-       class_osdblk = NULL;
-}
-
-static int __init osdblk_init(void)
-{
-       int rc;
-
-       rc = osdblk_sysfs_init();
-       if (rc)
-               return rc;
-
-       return 0;
-}
-
-static void __exit osdblk_exit(void)
-{
-       osdblk_sysfs_cleanup();
-}
-
-module_init(osdblk_init);
-module_exit(osdblk_exit);
-
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c

index 10aed84244f51854305ff7e0c59277731ab826f2..b1267ef34d5a7d918a5be1ff6d50194f96f78f33 100644 (file)
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -50,7 +50,7 @@
                          the slower the port i/o.  In some cases, setting
                          this to zero will speed up the device. (default -1)
                          
-            major       You may use this parameter to overide the
+            major       You may use this parameter to override the
                          default major number (46) that this driver
                          will use.  Be sure to change the device
                          name as well.
@@ -300,6 +300,11 @@ static void pcd_init_units(void)
                 struct gendisk *disk = alloc_disk(1);
                 if (!disk)
                         continue;
+               disk->queue = blk_init_queue(do_pcd_request, &pcd_lock);
+               if (!disk->queue) {
+                       put_disk(disk);
+                       continue;
+               }
                 cd->disk = disk;
                 cd->pi = &cd->pia;
                 cd->present = 0;
@@ -735,18 +740,36 @@ static int pcd_detect(void)
  }
  
  /* I/O request processing */
-static struct request_queue *pcd_queue;
+static int pcd_queue;
+
+static int set_next_request(void)
+{
+       struct pcd_unit *cd;
+       struct request_queue *q;
+       int old_pos = pcd_queue;
+
+       do {
+               cd = &pcd[pcd_queue];
+               q = cd->present ? cd->disk->queue : NULL;
+               if (++pcd_queue == PCD_UNITS)
+                       pcd_queue = 0;
+               if (q) {
+                       pcd_req = blk_fetch_request(q);
+                       if (pcd_req)
+                               break;
+               }
+       } while (pcd_queue != old_pos);
+
+       return pcd_req != NULL;
+}
  
-static void do_pcd_request(struct request_queue * q)
+static void pcd_request(void)
  {
         if (pcd_busy)
                 return;
         while (1) {
-               if (!pcd_req) {
-                       pcd_req = blk_fetch_request(q);
-                       if (!pcd_req)
-                               return;
-               }
+               if (!pcd_req && !set_next_request())
+                       return;
  
                 if (rq_data_dir(pcd_req) == READ) {
                         struct pcd_unit *cd = pcd_req->rq_disk->private_data;
@@ -766,6 +789,11 @@ static void do_pcd_request(struct request_queue * q)
         }
  }
  
+static void do_pcd_request(struct request_queue *q)
+{
+       pcd_request();
+}
+
  static inline void next_request(int err)
  {
         unsigned long saved_flags;
@@ -774,7 +802,7 @@ static inline void next_request(int err)
         if (!__blk_end_request_cur(pcd_req, err))
                 pcd_req = NULL;
         pcd_busy = 0;
-       do_pcd_request(pcd_queue);
+       pcd_request();
         spin_unlock_irqrestore(&pcd_lock, saved_flags);
  }
  
@@ -849,7 +877,7 @@ static void do_pcd_read_drq(void)
  
         do_pcd_read();
         spin_lock_irqsave(&pcd_lock, saved_flags);
-       do_pcd_request(pcd_queue);
+       pcd_request();
         spin_unlock_irqrestore(&pcd_lock, saved_flags);
  }
  
@@ -957,19 +985,10 @@ static int __init pcd_init(void)
                 return -EBUSY;
         }
  
-       pcd_queue = blk_init_queue(do_pcd_request, &pcd_lock);
-       if (!pcd_queue) {
-               unregister_blkdev(major, name);
-               for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
-                       put_disk(cd->disk);
-               return -ENOMEM;
-       }
-
         for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
                 if (cd->present) {
                         register_cdrom(&cd->info);
                         cd->disk->private_data = cd;
-                       cd->disk->queue = pcd_queue;
                         add_disk(cd->disk);
                 }
         }
@@ -988,9 +1007,9 @@ static void __exit pcd_exit(void)
                         pi_release(cd->pi);
                         unregister_cdrom(&cd->info);
                 }
+               blk_cleanup_queue(cd->disk->queue);
                 put_disk(cd->disk);
         }
-       blk_cleanup_queue(pcd_queue);
         unregister_blkdev(major, name);
         pi_unregister_driver(par_drv);
  }
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c

index 644ba0888bd41bb5e54f4ab58345b6af9519e0c2..7d2402f9097892332a43ba632c88084f52b53efa 100644 (file)
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -61,7 +61,7 @@
                          first drive found.
                         
  
-            major       You may use this parameter to overide the
+            major       You may use this parameter to override the
                          default major number (45) that this driver
                          will use.  Be sure to change the device
                          name as well.
@@ -381,12 +381,33 @@ static enum action do_pd_write_start(void);
  static enum action do_pd_read_drq(void);
  static enum action do_pd_write_done(void);
  
-static struct request_queue *pd_queue;
+static int pd_queue;
  static int pd_claimed;
  
  static struct pd_unit *pd_current; /* current request's drive */
  static PIA *pi_current; /* current request's PIA */
  
+static int set_next_request(void)
+{
+       struct gendisk *disk;
+       struct request_queue *q;
+       int old_pos = pd_queue;
+
+       do {
+               disk = pd[pd_queue].gd;
+               q = disk ? disk->queue : NULL;
+               if (++pd_queue == PD_UNITS)
+                       pd_queue = 0;
+               if (q) {
+                       pd_req = blk_fetch_request(q);
+                       if (pd_req)
+                               break;
+               }
+       } while (pd_queue != old_pos);
+
+       return pd_req != NULL;
+}
+
  static void run_fsm(void)
  {
         while (1) {
@@ -418,8 +439,7 @@ static void run_fsm(void)
                                 spin_lock_irqsave(&pd_lock, saved_flags);
                                 if (!__blk_end_request_cur(pd_req,
                                                 res == Ok ? 0 : -EIO)) {
-                                       pd_req = blk_fetch_request(pd_queue);
-                                       if (!pd_req)
+                                       if (!set_next_request())
                                                 stop = 1;
                                 }
                                 spin_unlock_irqrestore(&pd_lock, saved_flags);
@@ -719,18 +739,15 @@ static int pd_special_command(struct pd_unit *disk,
                       enum action (*func)(struct pd_unit *disk))
  {
         struct request *rq;
-       int err = 0;
  
         rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
         if (IS_ERR(rq))
                 return PTR_ERR(rq);
  
         rq->special = func;
-
-       err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
-
+       blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
         blk_put_request(rq);
-       return err;
+       return 0;
  }
  
  /* kernel glue structures */
@@ -839,7 +856,13 @@ static void pd_probe_drive(struct pd_unit *disk)
         p->first_minor = (disk - pd) << PD_BITS;
         disk->gd = p;
         p->private_data = disk;
-       p->queue = pd_queue;
+       p->queue = blk_init_queue(do_pd_request, &pd_lock);
+       if (!p->queue) {
+               disk->gd = NULL;
+               put_disk(p);
+               return;
+       }
+       blk_queue_max_hw_sectors(p->queue, cluster);
  
         if (disk->drive == -1) {
                 for (disk->drive = 0; disk->drive <= 1; disk->drive++)
@@ -919,26 +942,18 @@ static int __init pd_init(void)
         if (disable)
                 goto out1;
  
-       pd_queue = blk_init_queue(do_pd_request, &pd_lock);
-       if (!pd_queue)
-               goto out1;
-
-       blk_queue_max_hw_sectors(pd_queue, cluster);
-
         if (register_blkdev(major, name))
-               goto out2;
+               goto out1;
  
         printk("%s: %s version %s, major %d, cluster %d, nice %d\n",
                name, name, PD_VERSION, major, cluster, nice);
         if (!pd_detect())
-               goto out3;
+               goto out2;
  
         return 0;
  
-out3:
-       unregister_blkdev(major, name);
  out2:
-       blk_cleanup_queue(pd_queue);
+       unregister_blkdev(major, name);
  out1:
         return -ENODEV;
  }
@@ -953,11 +968,11 @@ static void __exit pd_exit(void)
                 if (p) {
                         disk->gd = NULL;
                         del_gendisk(p);
+                       blk_cleanup_queue(p->queue);
                         put_disk(p);
                         pi_release(disk->pi);
                 }
         }
-       blk_cleanup_queue(pd_queue);
  }
  
  MODULE_LICENSE("GPL");
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c

index ed93e8badf5684d513ef78a8c03f74ccc4531ecd..f24ca7315ddc91e24e2cfa6ca62c7f3a5578a296 100644 (file)
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -59,7 +59,7 @@
                          the slower the port i/o.  In some cases, setting
                          this to zero will speed up the device. (default -1)
  
-           major       You may use this parameter to overide the
+           major       You may use this parameter to override the
                         default major number (47) that this driver
                         will use.  Be sure to change the device
                         name as well.
@@ -287,6 +287,12 @@ static void __init pf_init_units(void)
                 struct gendisk *disk = alloc_disk(1);
                 if (!disk)
                         continue;
+               disk->queue = blk_init_queue(do_pf_request, &pf_spin_lock);
+               if (!disk->queue) {
+                       put_disk(disk);
+                       return;
+               }
+               blk_queue_max_segments(disk->queue, cluster);
                 pf->disk = disk;
                 pf->pi = &pf->pia;
                 pf->media_status = PF_NM;
@@ -772,7 +778,28 @@ static int pf_ready(void)
         return (((status_reg(pf_current) & (STAT_BUSY | pf_mask)) == pf_mask));
  }
  
-static struct request_queue *pf_queue;
+static int pf_queue;
+
+static int set_next_request(void)
+{
+       struct pf_unit *pf;
+       struct request_queue *q;
+       int old_pos = pf_queue;
+
+       do {
+               pf = &units[pf_queue];
+               q = pf->present ? pf->disk->queue : NULL;
+               if (++pf_queue == PF_UNITS)
+                       pf_queue = 0;
+               if (q) {
+                       pf_req = blk_fetch_request(q);
+                       if (pf_req)
+                               break;
+               }
+       } while (pf_queue != old_pos);
+
+       return pf_req != NULL;
+}
  
  static void pf_end_request(int err)
  {
@@ -780,16 +807,13 @@ static void pf_end_request(int err)
                 pf_req = NULL;
  }
  
-static void do_pf_request(struct request_queue * q)
+static void pf_request(void)
  {
         if (pf_busy)
                 return;
  repeat:
-       if (!pf_req) {
-               pf_req = blk_fetch_request(q);
-               if (!pf_req)
-                       return;
-       }
+       if (!pf_req && !set_next_request())
+               return;
  
         pf_current = pf_req->rq_disk->private_data;
         pf_block = blk_rq_pos(pf_req);
@@ -817,6 +841,11 @@ repeat:
         }
  }
  
+static void do_pf_request(struct request_queue *q)
+{
+       pf_request();
+}
+
  static int pf_next_buf(void)
  {
         unsigned long saved_flags;
@@ -846,7 +875,7 @@ static inline void next_request(int err)
         spin_lock_irqsave(&pf_spin_lock, saved_flags);
         pf_end_request(err);
         pf_busy = 0;
-       do_pf_request(pf_queue);
+       pf_request();
         spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
  }
  
@@ -972,15 +1001,6 @@ static int __init pf_init(void)
                         put_disk(pf->disk);
                 return -EBUSY;
         }
-       pf_queue = blk_init_queue(do_pf_request, &pf_spin_lock);
-       if (!pf_queue) {
-               unregister_blkdev(major, name);
-               for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
-                       put_disk(pf->disk);
-               return -ENOMEM;
-       }
-
-       blk_queue_max_segments(pf_queue, cluster);
  
         for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
                 struct gendisk *disk = pf->disk;
@@ -988,7 +1008,6 @@ static int __init pf_init(void)
                 if (!pf->present)
                         continue;
                 disk->private_data = pf;
-               disk->queue = pf_queue;
                 add_disk(disk);
         }
         return 0;
@@ -1003,10 +1022,10 @@ static void __exit pf_exit(void)
                 if (!pf->present)
                         continue;
                 del_gendisk(pf->disk);
+               blk_cleanup_queue(pf->disk->queue);
                 put_disk(pf->disk);
                 pi_release(pf->pi);
         }
-       blk_cleanup_queue(pf_queue);
  }
  
  MODULE_LICENSE("GPL");
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c

index 5db955fe3a949018e353ebaa1b98a4a1f17b86ef..3b5882bfb7364e33ab3f7b8355219ee2c977c4fa 100644 (file)
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -84,7 +84,7 @@
                         the slower the port i/o.  In some cases, setting
                         this to zero will speed up the device. (default -1)
  
-           major       You may use this parameter to overide the
+           major       You may use this parameter to override the
                         default major number (97) that this driver
                         will use.  Be sure to change the device
                         name as well.
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c

index 61fc6824299ac13c762e84dde6cae8baf8411e37..e815312a00add6b96651f2a956dc84d14d90adc7 100644 (file)
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -61,7 +61,7 @@
                          the slower the port i/o.  In some cases, setting
                          this to zero will speed up the device. (default -1)
  
-           major       You may use this parameter to overide the
+           major       You may use this parameter to override the
                         default major number (96) that this driver
                         will use.  Be sure to change the device
                         name as well.
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c

index 66d846ba85a9774c226dc92a85cc1c19b8c1ca9d..205b865ebeb9f123b12beb8d1a8c5179bbad7bd9 100644 (file)
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -724,7 +724,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
                 rq->rq_flags |= RQF_QUIET;
  
         blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0);
-       if (rq->errors)
+       if (scsi_req(rq)->result)
                 ret = -EIO;
  out:
         blk_put_request(rq);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c

index 4d680772379828423d8605b1cae8c5da271ec5b8..089ac4179919d2b3b678dc23c373b457842580d3 100644 (file)
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -120,10 +120,11 @@ static int atomic_dec_return_safe(atomic_t *v)
  
  /* Feature bits */
  
-#define RBD_FEATURE_LAYERING   (1<<0)
-#define RBD_FEATURE_STRIPINGV2 (1<<1)
-#define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2)
-#define RBD_FEATURE_DATA_POOL (1<<7)
+#define RBD_FEATURE_LAYERING           (1ULL<<0)
+#define RBD_FEATURE_STRIPINGV2         (1ULL<<1)
+#define RBD_FEATURE_EXCLUSIVE_LOCK     (1ULL<<2)
+#define RBD_FEATURE_DATA_POOL          (1ULL<<7)
+
  #define RBD_FEATURES_ALL       (RBD_FEATURE_LAYERING |         \
                                  RBD_FEATURE_STRIPINGV2 |       \
                                  RBD_FEATURE_EXCLUSIVE_LOCK |   \
@@ -499,16 +500,23 @@ static bool rbd_is_lock_owner(struct rbd_device *rbd_dev)
         return is_lock_owner;
  }
  
+static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf)
+{
+       return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
+}
+
  static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
  static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
  static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
  static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
+static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL);
  
  static struct attribute *rbd_bus_attrs[] = {
         &bus_attr_add.attr,
         &bus_attr_remove.attr,
         &bus_attr_add_single_major.attr,
         &bus_attr_remove_single_major.attr,
+       &bus_attr_supported_features.attr,
         NULL,
  };
  
@@ -4309,7 +4317,7 @@ static int rbd_init_request(void *data, struct request *rq,
         return 0;
  }
  
-static struct blk_mq_ops rbd_mq_ops = {
+static const struct blk_mq_ops rbd_mq_ops = {
         .queue_rq       = rbd_queue_rq,
         .init_request   = rbd_init_request,
  };
@@ -4372,7 +4380,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
         q->limits.discard_granularity = segment_size;
         q->limits.discard_alignment = segment_size;
         blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
-       q->limits.discard_zeroes_data = 1;
  
         if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
                 q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c

index f81d70b39d1096da4ea5338cdcb5c8aa22defcb4..9c566364ac9c3c5890d466a26c0d7daf72eeb38c 100644 (file)
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -300,7 +300,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
                                                 RSXX_HW_BLK_SIZE >> 9);
                 card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
                 card->queue->limits.discard_alignment   = RSXX_HW_BLK_SIZE;
-               card->queue->limits.discard_zeroes_data = 1;
         }
  
         card->queue->queuedata = card;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c

index b5afd495d482e8971be1cc431bb7b0ea8b54cae2..3064be6cf3755a4017cbd2a30cda023977250cbf 100644 (file)
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -211,7 +211,7 @@ enum head {
  struct swim_priv {
         struct swim __iomem *base;
         spinlock_t lock;
-       struct request_queue *queue;
+       int fdc_queue;
         int floppy_count;
         struct floppy_state unit[FD_MAX_UNIT];
  };
@@ -525,12 +525,33 @@ static int floppy_read_sectors(struct floppy_state *fs,
         return 0;
  }
  
-static void redo_fd_request(struct request_queue *q)
+static struct request *swim_next_request(struct swim_priv *swd)
  {
+       struct request_queue *q;
+       struct request *rq;
+       int old_pos = swd->fdc_queue;
+
+       do {
+               q = swd->unit[swd->fdc_queue].disk->queue;
+               if (++swd->fdc_queue == swd->floppy_count)
+                       swd->fdc_queue = 0;
+               if (q) {
+                       rq = blk_fetch_request(q);
+                       if (rq)
+                               return rq;
+               }
+       } while (swd->fdc_queue != old_pos);
+
+       return NULL;
+}
+
+static void do_fd_request(struct request_queue *q)
+{
+       struct swim_priv *swd = q->queuedata;
         struct request *req;
         struct floppy_state *fs;
  
-       req = blk_fetch_request(q);
+       req = swim_next_request(swd);
         while (req) {
                 int err = -EIO;
  
@@ -554,15 +575,10 @@ static void redo_fd_request(struct request_queue *q)
                 }
         done:
                 if (!__blk_end_request_cur(req, err))
-                       req = blk_fetch_request(q);
+                       req = swim_next_request(swd);
         }
  }
  
-static void do_fd_request(struct request_queue *q)
-{
-       redo_fd_request(q);
-}
-
  static struct floppy_struct floppy_type[4] = {
         {    0,  0, 0,  0, 0, 0x00, 0x00, 0x00, 0x00, NULL }, /* no testing   */
         {  720,  9, 1, 80, 0, 0x2A, 0x02, 0xDF, 0x50, NULL }, /* 360KB SS 3.5"*/
@@ -833,22 +849,25 @@ static int swim_floppy_init(struct swim_priv *swd)
                 return -EBUSY;
         }
  
+       spin_lock_init(&swd->lock);
+
         for (drive = 0; drive < swd->floppy_count; drive++) {
                 swd->unit[drive].disk = alloc_disk(1);
                 if (swd->unit[drive].disk == NULL) {
                         err = -ENOMEM;
                         goto exit_put_disks;
                 }
+               swd->unit[drive].disk->queue = blk_init_queue(do_fd_request,
+                                                             &swd->lock);
+               if (!swd->unit[drive].disk->queue) {
+                       err = -ENOMEM;
+                       put_disk(swd->unit[drive].disk);
+                       goto exit_put_disks;
+               }
+               swd->unit[drive].disk->queue->queuedata = swd;
                 swd->unit[drive].swd = swd;
         }
  
-       spin_lock_init(&swd->lock);
-       swd->queue = blk_init_queue(do_fd_request, &swd->lock);
-       if (!swd->queue) {
-               err = -ENOMEM;
-               goto exit_put_disks;
-       }
-
         for (drive = 0; drive < swd->floppy_count; drive++) {
                 swd->unit[drive].disk->flags = GENHD_FL_REMOVABLE;
                 swd->unit[drive].disk->major = FLOPPY_MAJOR;
@@ -856,7 +875,6 @@ static int swim_floppy_init(struct swim_priv *swd)
                 sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
                 swd->unit[drive].disk->fops = &floppy_fops;
                 swd->unit[drive].disk->private_data = &swd->unit[drive];
-               swd->unit[drive].disk->queue = swd->queue;
                 set_capacity(swd->unit[drive].disk, 2880);
                 add_disk(swd->unit[drive].disk);
         }
@@ -943,13 +961,12 @@ static int swim_remove(struct platform_device *dev)
  
         for (drive = 0; drive < swd->floppy_count; drive++) {
                 del_gendisk(swd->unit[drive].disk);
+               blk_cleanup_queue(swd->unit[drive].disk->queue);
                 put_disk(swd->unit[drive].disk);
         }
  
         unregister_blkdev(FLOPPY_MAJOR, "fd");
  
-       blk_cleanup_queue(swd->queue);
-
         /* eject floppies */
  
         for (drive = 0; drive < swd->floppy_count; drive++)
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c

index 61b3ffa4f45897aef1e4f2b33220f16e1a19e4b5..ba4809c9bdbadfccfb3bd28b7e3c9825ce0e39f1 100644 (file)
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -343,8 +343,8 @@ static void start_request(struct floppy_state *fs)
                           req->rq_disk->disk_name, req->cmd,
                           (long)blk_rq_pos(req), blk_rq_sectors(req),
                           bio_data(req->bio));
-               swim3_dbg("           errors=%d current_nr_sectors=%u\n",
-                         req->errors, blk_rq_cur_sectors(req));
+               swim3_dbg("           current_nr_sectors=%u\n",
+                         blk_rq_cur_sectors(req));
  #endif
  
                 if (blk_rq_pos(req) >= fs->total_secs) {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c

index 1d4c9f8bc1e16e012aba5d98a54b3e0f55e40565..f94614257462c33d058bcadd22347c69060fdd40 100644 (file)
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -111,7 +111,7 @@ static int virtblk_add_req_scsi(struct virtqueue *vq, struct virtblk_req *vbr,
         return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
  }
  
-static inline void virtblk_scsi_reques_done(struct request *req)
+static inline void virtblk_scsi_request_done(struct request *req)
  {
         struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
         struct virtio_blk *vblk = req->q->queuedata;
@@ -119,7 +119,7 @@ static inline void virtblk_scsi_reques_done(struct request *req)
  
         sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
         sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
-       req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
+       sreq->result = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
  }
  
  static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
@@ -144,7 +144,7 @@ static inline int virtblk_add_req_scsi(struct virtqueue *vq,
  {
         return -EIO;
  }
-static inline void virtblk_scsi_reques_done(struct request *req)
+static inline void virtblk_scsi_request_done(struct request *req)
  {
  }
  #define virtblk_ioctl  NULL
@@ -175,19 +175,15 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
  static inline void virtblk_request_done(struct request *req)
  {
         struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
-       int error = virtblk_result(vbr);
  
         switch (req_op(req)) {
         case REQ_OP_SCSI_IN:
         case REQ_OP_SCSI_OUT:
-               virtblk_scsi_reques_done(req);
-               break;
-       case REQ_OP_DRV_IN:
-               req->errors = (error != 0);
+               virtblk_scsi_request_done(req);
                 break;
         }
  
-       blk_mq_end_request(req, error);
+       blk_mq_end_request(req, virtblk_result(vbr));
  }
  
  static void virtblk_done(struct virtqueue *vq)
@@ -205,7 +201,7 @@ static void virtblk_done(struct virtqueue *vq)
                 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
                         struct request *req = blk_mq_rq_from_pdu(vbr);
  
-                       blk_mq_complete_request(req, req->errors);
+                       blk_mq_complete_request(req);
                         req_done = true;
                 }
                 if (unlikely(virtqueue_is_broken(vq)))
@@ -310,7 +306,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
         if (err)
                 goto out;
  
-       err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
+       blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
+       err = virtblk_result(blk_mq_rq_to_pdu(req));
  out:
         blk_put_request(req);
         return err;
@@ -597,7 +594,7 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set)
         return blk_mq_virtio_map_queues(set, vblk->vdev, 0);
  }
  
-static struct blk_mq_ops virtio_mq_ops = {
+static const struct blk_mq_ops virtio_mq_ops = {
         .queue_rq       = virtio_queue_rq,
         .complete       = virtblk_request_done,
         .init_request   = virtblk_init_request,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c

index 5067a0a952cb2161aec7da6168adfcb874aa02bc..39459631667cc248a8d569bf13ed6e67273848ae 100644 (file)
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -115,6 +115,15 @@ struct split_bio {
         atomic_t pending;
  };
  
+struct blkif_req {
+       int     error;
+};
+
+static inline struct blkif_req *blkif_req(struct request *rq)
+{
+       return blk_mq_rq_to_pdu(rq);
+}
+
  static DEFINE_MUTEX(blkfront_mutex);
  static const struct block_device_operations xlvbd_block_fops;
  
@@ -907,8 +916,14 @@ out_busy:
         return BLK_MQ_RQ_QUEUE_BUSY;
  }
  
-static struct blk_mq_ops blkfront_mq_ops = {
+static void blkif_complete_rq(struct request *rq)
+{
+       blk_mq_end_request(rq, blkif_req(rq)->error);
+}
+
+static const struct blk_mq_ops blkfront_mq_ops = {
         .queue_rq = blkif_queue_rq,
+       .complete = blkif_complete_rq,
  };
  
  static void blkif_set_queue_limits(struct blkfront_info *info)
@@ -969,7 +984,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
                 info->tag_set.queue_depth = BLK_RING_SIZE(info);
         info->tag_set.numa_node = NUMA_NO_NODE;
         info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
-       info->tag_set.cmd_size = 0;
+       info->tag_set.cmd_size = sizeof(struct blkif_req);
         info->tag_set.driver_data = info;
  
         if (blk_mq_alloc_tag_set(&info->tag_set))
@@ -1543,7 +1558,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
         unsigned long flags;
         struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
         struct blkfront_info *info = rinfo->dev_info;
-       int error;
  
         if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
                 return IRQ_HANDLED;
@@ -1587,37 +1601,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                         continue;
                 }
  
-               error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+               blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
                 switch (bret->operation) {
                 case BLKIF_OP_DISCARD:
                         if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
                                 struct request_queue *rq = info->rq;
                                 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
                                            info->gd->disk_name, op_name(bret->operation));
-                               error = -EOPNOTSUPP;
+                               blkif_req(req)->error = -EOPNOTSUPP;
                                 info->feature_discard = 0;
                                 info->feature_secdiscard = 0;
                                 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
                                 queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
                         }
-                       blk_mq_complete_request(req, error);
                         break;
                 case BLKIF_OP_FLUSH_DISKCACHE:
                 case BLKIF_OP_WRITE_BARRIER:
                         if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
                                 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
                                        info->gd->disk_name, op_name(bret->operation));
-                               error = -EOPNOTSUPP;
+                               blkif_req(req)->error = -EOPNOTSUPP;
                         }
                         if (unlikely(bret->status == BLKIF_RSP_ERROR &&
                                      rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
                                 printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
                                        info->gd->disk_name, op_name(bret->operation));
-                               error = -EOPNOTSUPP;
+                               blkif_req(req)->error = -EOPNOTSUPP;
                         }
-                       if (unlikely(error)) {
-                               if (error == -EOPNOTSUPP)
-                                       error = 0;
+                       if (unlikely(blkif_req(req)->error)) {
+                               if (blkif_req(req)->error == -EOPNOTSUPP)
+                                       blkif_req(req)->error = 0;
                                 info->feature_fua = 0;
                                 info->feature_flush = 0;
                                 xlvbd_flush(info);
@@ -1629,11 +1642,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                                 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
                                         "request: %x\n", bret->status);
  
-                       blk_mq_complete_request(req, error);
                         break;
                 default:
                         BUG();
                 }
+
+               blk_mq_complete_request(req);
         }
  
         rinfo->ring.rsp_cons = i;
@@ -2345,6 +2359,7 @@ static void blkfront_connect(struct blkfront_info *info)
         unsigned long sector_size;
         unsigned int physical_sector_size;
         unsigned int binfo;
+       char *envp[] = { "RESIZE=1", NULL };
         int err, i;
  
         switch (info->connected) {
@@ -2361,6 +2376,8 @@ static void blkfront_connect(struct blkfront_info *info)
                        sectors);
                 set_capacity(info->gd, sectors);
                 revalidate_disk(info->gd);
+               kobject_uevent_env(&disk_to_dev(info->gd)->kobj,
+                                  KOBJ_CHANGE, envp);
  
                 return;
         case BLKIF_STATE_SUSPENDED:
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c

index e27d89a36c34170d1c894b60f43ab3903a5fbf70..6fac5fedd6107b8b86bd1cef612c0f34899c5d8e 100644 (file)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -523,7 +523,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
  
         cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
         if (size == PAGE_SIZE) {
-               copy_page(mem, cmem);
+               memcpy(mem, cmem, PAGE_SIZE);
         } else {
                 struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
  
@@ -717,7 +717,7 @@ compress_again:
  
         if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
                 src = kmap_atomic(page);
-               copy_page(cmem, src);
+               memcpy(cmem, src, PAGE_SIZE);
                 kunmap_atomic(src);
         } else {
                 memcpy(cmem, src, clen);
@@ -829,10 +829,14 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
         offset = (bio->bi_iter.bi_sector &
                   (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
  
-       if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+       switch (bio_op(bio)) {
+       case REQ_OP_DISCARD:
+       case REQ_OP_WRITE_ZEROES:
                 zram_bio_discard(zram, index, offset, bio);
                 bio_endio(bio);
                 return;
+       default:
+               break;
         }
  
         bio_for_each_segment(bvec, bio, iter) {
@@ -928,7 +932,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
         }
  
         index = sector >> SECTORS_PER_PAGE_SHIFT;
-       offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
+       offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
  
         bv.bv_page = page;
         bv.bv_len = PAGE_SIZE;
@@ -1189,7 +1193,11 @@ static int zram_add(void)
         blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
         blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
         zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
+       zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
+       zram->disk->queue->limits.chunk_sectors = 0;
         blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
+       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
+
         /*
          * zram_bio_discard() will clear all logical blocks if logical block
          * size is identical with physical block size(PAGE_SIZE). But if it is
@@ -1199,10 +1207,7 @@ static int zram_add(void)
          * zeroed.
          */
         if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
-               zram->disk->queue->limits.discard_zeroes_data = 1;
-       else
-               zram->disk->queue->limits.discard_zeroes_data = 0;
-       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
+               blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
  
         add_disk(zram->disk);
  
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig

index c2c14a12713b56038c8c21deae2212550d24422b..08e054507d0bcd7fc59f5d14d0927ba6ca35291f 100644 (file)
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -344,7 +344,8 @@ config BT_WILINK
  
  config BT_QCOMSMD
         tristate "Qualcomm SMD based HCI support"
-       depends on (QCOM_SMD && QCOM_WCNSS_CTRL) || COMPILE_TEST
+       depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+       depends on QCOM_WCNSS_CTRL || (COMPILE_TEST && QCOM_WCNSS_CTRL=n)
         select BT_QCA
         help
           Qualcomm SMD based HCI driver.
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c

index 87739649eac21a6c3688c5c60956280746babd70..76c952fd9ab9056250341da04d08c6b012e343e2 100644 (file)
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2218,7 +2218,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
                 rq->timeout = 60 * HZ;
                 bio = rq->bio;
  
-               if (blk_execute_rq(q, cdi->disk, rq, 0)) {
+               blk_execute_rq(q, cdi->disk, rq, 0);
+               if (scsi_req(rq)->result) {
                         struct request_sense *s = req->sense;
                         ret = -EIO;
                         cdi->last_sense = s->sense_key;
diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c

index 4a99ac756f0815a890665f1433b1038a98069be9..9959c762da2f8ec1f5cb0fde48021598f3deecfd 100644 (file)
--- a/drivers/char/hw_random/amd-rng.c
+++ b/drivers/char/hw_random/amd-rng.c
@@ -55,6 +55,7 @@ MODULE_DEVICE_TABLE(pci, pci_tbl);
  struct amd768_priv {
         void __iomem *iobase;
         struct pci_dev *pcidev;
+       u32 pmbase;
  };
  
  static int amd_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
@@ -148,33 +149,58 @@ found:
         if (pmbase == 0)
                 return -EIO;
  
-       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
         if (!priv)
                 return -ENOMEM;
  
-       if (!devm_request_region(&pdev->dev, pmbase + PMBASE_OFFSET,
-                               PMBASE_SIZE, DRV_NAME)) {
+       if (!request_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE, DRV_NAME)) {
                 dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n",
                         pmbase + 0xF0);
-               return -EBUSY;
+               err = -EBUSY;
+               goto out;
         }
  
-       priv->iobase = devm_ioport_map(&pdev->dev, pmbase + PMBASE_OFFSET,
-                       PMBASE_SIZE);
+       priv->iobase = ioport_map(pmbase + PMBASE_OFFSET, PMBASE_SIZE);
         if (!priv->iobase) {
                 pr_err(DRV_NAME "Cannot map ioport\n");
-               return -ENOMEM;
+               err = -EINVAL;
+               goto err_iomap;
         }
  
         amd_rng.priv = (unsigned long)priv;
+       priv->pmbase = pmbase;
         priv->pcidev = pdev;
  
         pr_info(DRV_NAME " detected\n");
-       return devm_hwrng_register(&pdev->dev, &amd_rng);
+       err = hwrng_register(&amd_rng);
+       if (err) {
+               pr_err(DRV_NAME " registering failed (%d)\n", err);
+               goto err_hwrng;
+       }
+       return 0;
+
+err_hwrng:
+       ioport_unmap(priv->iobase);
+err_iomap:
+       release_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE);
+out:
+       kfree(priv);
+       return err;
  }
  
  static void __exit mod_exit(void)
  {
+       struct amd768_priv *priv;
+
+       priv = (struct amd768_priv *)amd_rng.priv;
+
+       hwrng_unregister(&amd_rng);
+
+       ioport_unmap(priv->iobase);
+
+       release_region(priv->pmbase + PMBASE_OFFSET, PMBASE_SIZE);
+
+       kfree(priv);
  }
  
  module_init(mod_init);
diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c

index e7a2459420291b6b931b285fe2b9be22cdd3e897..e1d421a36a138d6a5e30b16bf62208846b5372ab 100644 (file)
--- a/drivers/char/hw_random/geode-rng.c
+++ b/drivers/char/hw_random/geode-rng.c
@@ -31,6 +31,9 @@
  #include <linux/module.h>
  #include <linux/pci.h>
  
+
+#define PFX    KBUILD_MODNAME ": "
+
  #define GEODE_RNG_DATA_REG   0x50
  #define GEODE_RNG_STATUS_REG 0x54
  
@@ -82,6 +85,7 @@ static struct hwrng geode_rng = {
  
  static int __init mod_init(void)
  {
+       int err = -ENODEV;
         struct pci_dev *pdev = NULL;
         const struct pci_device_id *ent;
         void __iomem *mem;
@@ -89,27 +93,43 @@ static int __init mod_init(void)
  
         for_each_pci_dev(pdev) {
                 ent = pci_match_id(pci_tbl, pdev);
-               if (ent) {
-                       rng_base = pci_resource_start(pdev, 0);
-                       if (rng_base == 0)
-                               return -ENODEV;
-
-                       mem = devm_ioremap(&pdev->dev, rng_base, 0x58);
-                       if (!mem)
-                               return -ENOMEM;
-                       geode_rng.priv = (unsigned long)mem;
-
-                       pr_info("AMD Geode RNG detected\n");
-                       return devm_hwrng_register(&pdev->dev, &geode_rng);
-               }
+               if (ent)
+                       goto found;
         }
-
         /* Device not found. */
-       return -ENODEV;
+       goto out;
+
+found:
+       rng_base = pci_resource_start(pdev, 0);
+       if (rng_base == 0)
+               goto out;
+       err = -ENOMEM;
+       mem = ioremap(rng_base, 0x58);
+       if (!mem)
+               goto out;
+       geode_rng.priv = (unsigned long)mem;
+
+       pr_info("AMD Geode RNG detected\n");
+       err = hwrng_register(&geode_rng);
+       if (err) {
+               pr_err(PFX "RNG registering failed (%d)\n",
+                      err);
+               goto err_unmap;
+       }
+out:
+       return err;
+
+err_unmap:
+       iounmap(mem);
+       goto out;
  }
  
  static void __exit mod_exit(void)
  {
+       void __iomem *mem = (void __iomem *)geode_rng.priv;
+
+       hwrng_unregister(&geode_rng);
+       iounmap(mem);
  }
  
  module_init(mod_init);
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c

index 3ad86fdf954e96a71b16f436f2b22bd02e6cbd8f..b1ad12552b566a6892a7de411e9cd0c65cf25933 100644 (file)
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -397,9 +397,8 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
                                 irq, err);
                         return err;
                 }
-               omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
  
-               priv->clk = of_clk_get(pdev->dev.of_node, 0);
+               priv->clk = devm_clk_get(&pdev->dev, NULL);
                 if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
                         return -EPROBE_DEFER;
                 if (!IS_ERR(priv->clk)) {
@@ -408,6 +407,19 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
                                 dev_err(&pdev->dev, "unable to enable the clk, "
                                                     "err = %d\n", err);
                 }
+
+               /*
+                * On OMAP4, enabling the shutdown_oflo interrupt is
+                * done in the interrupt mask register. There is no
+                * such register on EIP76, and it's enabled by the
+                * same bit in the control register
+                */
+               if (priv->pdata->regs[RNG_INTMASK_REG])
+                       omap_rng_write(priv, RNG_INTMASK_REG,
+                                      RNG_SHUTDOWN_OFLO_MASK);
+               else
+                       omap_rng_write(priv, RNG_CONTROL_REG,
+                                      RNG_SHUTDOWN_OFLO_MASK);
         }
         return 0;
  }
diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c

index d6f5d9eb102dd5dce22179cc4cc8290d4bbce927..70d434bc1cbf450e70e43e610a3c2535c97db860 100644 (file)
--- a/drivers/char/ipmi/bt-bmc.c
+++ b/drivers/char/ipmi/bt-bmc.c
@@ -523,6 +523,7 @@ static int bt_bmc_remove(struct platform_device *pdev)
  
  static const struct of_device_id bt_bmc_match[] = {
         { .compatible = "aspeed,ast2400-ibt-bmc" },
+       { .compatible = "aspeed,ast2500-ibt-bmc" },
         { },
  };
  
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c

index 2a7c425ddfa73aef490b7e0fd2b081ebe0ea5926..b2b618f066e02b85c185ba3a17cd1a4ca9e8eaf8 100644 (file)
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1954,7 +1954,9 @@ static int hotmod_handler(const char *val, struct kernel_param *kp)
                                 kfree(info);
                                 goto out;
                         }
+                       mutex_lock(&smi_infos_lock);
                         rv = try_smi_init(info);
+                       mutex_unlock(&smi_infos_lock);
                         if (rv) {
                                 cleanup_one_si(info);
                                 goto out;
@@ -2042,8 +2044,10 @@ static int hardcode_find_bmc(void)
                 info->slave_addr = slave_addrs[i];
  
                 if (!add_smi(info)) {
+                       mutex_lock(&smi_infos_lock);
                         if (try_smi_init(info))
                                 cleanup_one_si(info);
+                       mutex_unlock(&smi_infos_lock);
                         ret = 0;
                 } else {
                         kfree(info);
@@ -3492,6 +3496,11 @@ out_err:
         return rv;
  }
  
+/*
+ * Try to start up an interface.  Must be called with smi_infos_lock
+ * held, primarily to keep smi_num consistent, we only one to do these
+ * one at a time.
+ */
  static int try_smi_init(struct smi_info *new_smi)
  {
         int rv = 0;
@@ -3524,9 +3533,12 @@ static int try_smi_init(struct smi_info *new_smi)
                 goto out_err;
         }
  
+       new_smi->intf_num = smi_num;
+
         /* Do this early so it's available for logs. */
         if (!new_smi->dev) {
-               init_name = kasprintf(GFP_KERNEL, "ipmi_si.%d", 0);
+               init_name = kasprintf(GFP_KERNEL, "ipmi_si.%d",
+                                     new_smi->intf_num);
  
                 /*
                  * If we don't already have a device from something
@@ -3593,8 +3605,6 @@ static int try_smi_init(struct smi_info *new_smi)
  
         new_smi->interrupt_disabled = true;
         atomic_set(&new_smi->need_watch, 0);
-       new_smi->intf_num = smi_num;
-       smi_num++;
  
         rv = try_enable_event_buffer(new_smi);
         if (rv == 0)
@@ -3661,6 +3671,9 @@ static int try_smi_init(struct smi_info *new_smi)
                 goto out_err_stop_timer;
         }
  
+       /* Don't increment till we know we have succeeded. */
+       smi_num++;
+
         dev_info(new_smi->dev, "IPMI %s interface initialized\n",
                  si_to_str[new_smi->si_type]);
  
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c

index cca6e5bc1cea3c01831b66223f09b22fe7cf4c74..0b22a9be5029155265c0d648e14f1833a5ba4ada 100644 (file)
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -891,6 +891,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
                  * for details on the intricacies of this.
                  */
                 int left;
+               unsigned char *data_to_send;
  
                 ssif_inc_stat(ssif_info, sent_messages_parts);
  
@@ -899,6 +900,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
                         left = 32;
                 /* Length byte. */
                 ssif_info->multi_data[ssif_info->multi_pos] = left;
+               data_to_send = ssif_info->multi_data + ssif_info->multi_pos;
                 ssif_info->multi_pos += left;
                 if (left < 32)
                         /*
@@ -912,7 +914,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
                 rv = ssif_i2c_send(ssif_info, msg_written_handler,
                                   I2C_SMBUS_WRITE,
                                   SSIF_IPMI_MULTI_PART_REQUEST_MIDDLE,
-                                 ssif_info->multi_data + ssif_info->multi_pos,
+                                 data_to_send,
                                   I2C_SMBUS_BLOCK_DATA);
                 if (rv < 0) {
                         /* request failed, just return the error. */
@@ -1642,9 +1644,8 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
  
         spin_lock_init(&ssif_info->lock);
         ssif_info->ssif_state = SSIF_NORMAL;
-       init_timer(&ssif_info->retry_timer);
-       ssif_info->retry_timer.data = (unsigned long) ssif_info;
-       ssif_info->retry_timer.function = retry_timeout;
+       setup_timer(&ssif_info->retry_timer, retry_timeout,
+                   (unsigned long)ssif_info);
  
         for (i = 0; i < SSIF_NUM_STATS; i++)
                 atomic_set(&ssif_info->stats[i], 0);
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c

index 5ca24d9b101b922abdd3a884c203b84675481f2d..d165af8abe36c7118f3ae602fec14d0f7230b965 100644 (file)
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -516,7 +516,7 @@ static void panic_halt_ipmi_heartbeat(void)
         msg.cmd = IPMI_WDOG_RESET_TIMER;
         msg.data = NULL;
         msg.data_len = 0;
-       atomic_add(2, &panic_done_count);
+       atomic_add(1, &panic_done_count);
         rv = ipmi_request_supply_msgs(watchdog_user,
                                       (struct ipmi_addr *) &addr,
                                       0,
@@ -526,7 +526,7 @@ static void panic_halt_ipmi_heartbeat(void)
                                       &panic_halt_heartbeat_recv_msg,
                                       1);
         if (rv)
-               atomic_sub(2, &panic_done_count);
+               atomic_sub(1, &panic_done_count);
  }
  
  static struct ipmi_smi_msg panic_halt_smi_msg = {
@@ -550,12 +550,12 @@ static void panic_halt_ipmi_set_timeout(void)
         /* Wait for the messages to be free. */
         while (atomic_read(&panic_done_count) != 0)
                 ipmi_poll_interface(watchdog_user);
-       atomic_add(2, &panic_done_count);
+       atomic_add(1, &panic_done_count);
         rv = i_ipmi_set_timeout(&panic_halt_smi_msg,
                                 &panic_halt_recv_msg,
                                 &send_heartbeat_now);
         if (rv) {
-               atomic_sub(2, &panic_done_count);
+               atomic_sub(1, &panic_done_count);
                 printk(KERN_WARNING PFX
                        "Unable to extend the watchdog timeout.");
         } else {
diff --git a/drivers/char/mem.c b/drivers/char/mem.c

index 6d9cc2d39d22306fd68f30bac6f4a60e6cfa5a87..7e4a9d1296bb7fb666f6b37ced2757a8585b7d75 100644 (file)
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -60,6 +60,10 @@ static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
  #endif
  
  #ifdef CONFIG_STRICT_DEVMEM
+static inline int page_is_allowed(unsigned long pfn)
+{
+       return devmem_is_allowed(pfn);
+}
  static inline int range_is_allowed(unsigned long pfn, unsigned long size)
  {
         u64 from = ((u64)pfn) << PAGE_SHIFT;
@@ -75,6 +79,10 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
         return 1;
  }
  #else
+static inline int page_is_allowed(unsigned long pfn)
+{
+       return 1;
+}
  static inline int range_is_allowed(unsigned long pfn, unsigned long size)
  {
         return 1;
@@ -122,23 +130,31 @@ static ssize_t read_mem(struct file *file, char __user *buf,
  
         while (count > 0) {
                 unsigned long remaining;
+               int allowed;
  
                 sz = size_inside_page(p, count);
  
-               if (!range_is_allowed(p >> PAGE_SHIFT, count))
+               allowed = page_is_allowed(p >> PAGE_SHIFT);
+               if (!allowed)
                         return -EPERM;
+               if (allowed == 2) {
+                       /* Show zeros for restricted memory. */
+                       remaining = clear_user(buf, sz);
+               } else {
+                       /*
+                        * On ia64 if a page has been mapped somewhere as
+                        * uncached, then it must also be accessed uncached
+                        * by the kernel or data corruption may occur.
+                        */
+                       ptr = xlate_dev_mem_ptr(p);
+                       if (!ptr)
+                               return -EFAULT;
  
-               /*
-                * On ia64 if a page has been mapped somewhere as uncached, then
-                * it must also be accessed uncached by the kernel or data
-                * corruption may occur.
-                */
-               ptr = xlate_dev_mem_ptr(p);
-               if (!ptr)
-                       return -EFAULT;
+                       remaining = copy_to_user(buf, ptr, sz);
+
+                       unxlate_dev_mem_ptr(p, ptr);
+               }
  
-               remaining = copy_to_user(buf, ptr, sz);
-               unxlate_dev_mem_ptr(p, ptr);
                 if (remaining)
                         return -EFAULT;
  
@@ -181,30 +197,36 @@ static ssize_t write_mem(struct file *file, const char __user *buf,
  #endif
  
         while (count > 0) {
+               int allowed;
+
                 sz = size_inside_page(p, count);
  
-               if (!range_is_allowed(p >> PAGE_SHIFT, sz))
+               allowed = page_is_allowed(p >> PAGE_SHIFT);
+               if (!allowed)
                         return -EPERM;
  
-               /*
-                * On ia64 if a page has been mapped somewhere as uncached, then
-                * it must also be accessed uncached by the kernel or data
-                * corruption may occur.
-                */
-               ptr = xlate_dev_mem_ptr(p);
-               if (!ptr) {
-                       if (written)
-                               break;
-                       return -EFAULT;
-               }
+               /* Skip actual writing when a page is marked as restricted. */
+               if (allowed == 1) {
+                       /*
+                        * On ia64 if a page has been mapped somewhere as
+                        * uncached, then it must also be accessed uncached
+                        * by the kernel or data corruption may occur.
+                        */
+                       ptr = xlate_dev_mem_ptr(p);
+                       if (!ptr) {
+                               if (written)
+                                       break;
+                               return -EFAULT;
+                       }
  
-               copied = copy_from_user(ptr, buf, sz);
-               unxlate_dev_mem_ptr(p, ptr);
-               if (copied) {
-                       written += sz - copied;
-                       if (written)
-                               break;
-                       return -EFAULT;
+                       copied = copy_from_user(ptr, buf, sz);
+                       unxlate_dev_mem_ptr(p, ptr);
+                       if (copied) {
+                               written += sz - copied;
+                               if (written)
+                                       break;
+                               return -EFAULT;
+                       }
                 }
  
                 buf += sz;
diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c

index a5b1eb276c0bf97c9959d72171cf1738ac0503b6..e6d0d271c58c83073e3acb71c788867d4edfc846 100644 (file)
--- a/drivers/char/nwbutton.c
+++ b/drivers/char/nwbutton.c
@@ -6,7 +6,7 @@
  
  #include <linux/module.h>
  #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
  #include <linux/interrupt.h>
  #include <linux/time.h>
  #include <linux/timer.h>
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c

index 2a558c706581b21864d45ac701213beb301e8899..3e73bcdf9e658d378963bee83bfeda2e76659c40 100644 (file)
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -84,11 +84,14 @@ struct pp_struct {
         struct ieee1284_info state;
         struct ieee1284_info saved_state;
         long default_inactivity;
+       int index;
  };
  
  /* should we use PARDEVICE_MAX here? */
  static struct device *devices[PARPORT_MAX];
  
+static DEFINE_IDA(ida_index);
+
  /* pp_struct.flags bitfields */
  #define PP_CLAIMED    (1<<0)
  #define PP_EXCL       (1<<1)
@@ -290,7 +293,7 @@ static int register_device(int minor, struct pp_struct *pp)
         struct pardevice *pdev = NULL;
         char *name;
         struct pardev_cb ppdev_cb;
-       int rc = 0;
+       int rc = 0, index;
  
         name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor);
         if (name == NULL)
@@ -303,20 +306,23 @@ static int register_device(int minor, struct pp_struct *pp)
                 goto err;
         }
  
+       index = ida_simple_get(&ida_index, 0, 0, GFP_KERNEL);
         memset(&ppdev_cb, 0, sizeof(ppdev_cb));
         ppdev_cb.irq_func = pp_irq;
         ppdev_cb.flags = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0;
         ppdev_cb.private = pp;
-       pdev = parport_register_dev_model(port, name, &ppdev_cb, minor);
+       pdev = parport_register_dev_model(port, name, &ppdev_cb, index);
         parport_put_port(port);
  
         if (!pdev) {
                 pr_warn("%s: failed to register device!\n", name);
                 rc = -ENXIO;
+               ida_simple_remove(&ida_index, index);
                 goto err;
         }
  
         pp->pdev = pdev;
+       pp->index = index;
         dev_dbg(&pdev->dev, "registered pardevice\n");
  err:
         kfree(name);
@@ -755,6 +761,7 @@ static int pp_release(struct inode *inode, struct file *file)
  
         if (pp->pdev) {
                 parport_unregister_device(pp->pdev);
+               ida_simple_remove(&ida_index, pp->index);
                 pp->pdev = NULL;
                 pr_debug(CHRDEV "%x: unregistered pardevice\n", minor);
         }
diff --git a/drivers/char/random.c b/drivers/char/random.c

index 1ef26403bcc83f6a0c26f20d67b74e7fe6331635..0ab0249189072befe3cee1b8696052727f360540 100644 (file)
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -312,13 +312,6 @@ static int random_read_wakeup_bits = 64;
   */
  static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
  
-/*
- * The minimum number of seconds between urandom pool reseeding.  We
- * do this to limit the amount of entropy that can be drained from the
- * input pool even if there are heavy demands on /dev/urandom.
- */
-static int random_min_urandom_seed = 60;
-
  /*
   * Originally, we used a primitive polynomial of degree .poolwords
   * over GF(2).  The taps for various sizes are defined below.  They
@@ -409,7 +402,6 @@ static struct poolinfo {
   */
  static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
  static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
-static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait);
  static struct fasync_struct *fasync;
  
  static DEFINE_SPINLOCK(random_ready_list_lock);
@@ -467,7 +459,6 @@ struct entropy_store {
         int entropy_count;
         int entropy_total;
         unsigned int initialized:1;
-       unsigned int limit:1;
         unsigned int last_data_init:1;
         __u8 last_data[EXTRACT_SIZE];
  };
@@ -485,7 +476,6 @@ static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy;
  static struct entropy_store input_pool = {
         .poolinfo = &poolinfo_table[0],
         .name = "input",
-       .limit = 1,
         .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
         .pool = input_pool_data
  };
@@ -493,7 +483,6 @@ static struct entropy_store input_pool = {
  static struct entropy_store blocking_pool = {
         .poolinfo = &poolinfo_table[1],
         .name = "blocking",
-       .limit = 1,
         .pull = &input_pool,
         .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
         .pool = blocking_pool_data,
@@ -855,13 +844,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
         spin_unlock_irqrestore(&primary_crng.lock, flags);
  }
  
-static inline void maybe_reseed_primary_crng(void)
-{
-       if (crng_init > 2 &&
-           time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL))
-               crng_reseed(&primary_crng, &input_pool);
-}
-
  static inline void crng_wait_ready(void)
  {
         wait_event_interruptible(crng_init_wait, crng_ready());
@@ -1220,15 +1202,6 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
             r->entropy_count > r->poolinfo->poolfracbits)
                 return;
  
-       if (r->limit == 0 && random_min_urandom_seed) {
-               unsigned long now = jiffies;
-
-               if (time_before(now,
-                               r->last_pulled + random_min_urandom_seed * HZ))
-                       return;
-               r->last_pulled = now;
-       }
-
         _xfer_secondary_pool(r, nbytes);
  }
  
@@ -1236,8 +1209,6 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
  {
         __u32   tmp[OUTPUT_POOL_WORDS];
  
-       /* For /dev/random's pool, always leave two wakeups' worth */
-       int rsvd_bytes = r->limit ? 0 : random_read_wakeup_bits / 4;
         int bytes = nbytes;
  
         /* pull at least as much as a wakeup */
@@ -1248,7 +1219,7 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
         trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8,
                                   ENTROPY_BITS(r), ENTROPY_BITS(r->pull));
         bytes = extract_entropy(r->pull, tmp, bytes,
-                               random_read_wakeup_bits / 8, rsvd_bytes);
+                               random_read_wakeup_bits / 8, 0);
         mix_pool_bytes(r, tmp, bytes);
         credit_entropy_bits(r, bytes*8);
  }
@@ -1276,7 +1247,7 @@ static void push_to_pool(struct work_struct *work)
  static size_t account(struct entropy_store *r, size_t nbytes, int min,
                       int reserved)
  {
-       int entropy_count, orig;
+       int entropy_count, orig, have_bytes;
         size_t ibytes, nfrac;
  
         BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
@@ -1285,14 +1256,12 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
  retry:
         entropy_count = orig = ACCESS_ONCE(r->entropy_count);
         ibytes = nbytes;
-       /* If limited, never pull more than available */
-       if (r->limit) {
-               int have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
+       /* never pull more than available */
+       have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
  
-               if ((have_bytes -= reserved) < 0)
-                       have_bytes = 0;
-               ibytes = min_t(size_t, ibytes, have_bytes);
-       }
+       if ((have_bytes -= reserved) < 0)
+               have_bytes = 0;
+       ibytes = min_t(size_t, ibytes, have_bytes);
         if (ibytes < min)
                 ibytes = 0;
  
@@ -1912,6 +1881,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
  static int min_read_thresh = 8, min_write_thresh;
  static int max_read_thresh = OUTPUT_POOL_WORDS * 32;
  static int max_write_thresh = INPUT_POOL_WORDS * 32;
+static int random_min_urandom_seed = 60;
  static char sysctl_bootid[16];
  
  /*
@@ -2042,63 +2012,64 @@ struct ctl_table random_table[] = {
  };
  #endif         /* CONFIG_SYSCTL */
  
-static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
-
-int random_int_secret_init(void)
-{
-       get_random_bytes(random_int_secret, sizeof(random_int_secret));
-       return 0;
-}
-
-static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash)
-               __aligned(sizeof(unsigned long));
+struct batched_entropy {
+       union {
+               u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)];
+               u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)];
+       };
+       unsigned int position;
+};
  
  /*
- * Get a random word for internal kernel use only. Similar to urandom but
- * with the goal of minimal entropy pool depletion. As a result, the random
- * value is not cryptographically secure but for several uses the cost of
- * depleting entropy is too high
+ * Get a random word for internal kernel use only. The quality of the random
+ * number is either as good as RDRAND or as good as /dev/urandom, with the
+ * goal of being quite fast and not depleting entropy.
   */
-unsigned int get_random_int(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
+u64 get_random_u64(void)
  {
-       __u32 *hash;
-       unsigned int ret;
+       u64 ret;
+       struct batched_entropy *batch;
  
-       if (arch_get_random_int(&ret))
+#if BITS_PER_LONG == 64
+       if (arch_get_random_long((unsigned long *)&ret))
                 return ret;
+#else
+       if (arch_get_random_long((unsigned long *)&ret) &&
+           arch_get_random_long((unsigned long *)&ret + 1))
+           return ret;
+#endif
  
-       hash = get_cpu_var(get_random_int_hash);
-
-       hash[0] += current->pid + jiffies + random_get_entropy();
-       md5_transform(hash, random_int_secret);
-       ret = hash[0];
-       put_cpu_var(get_random_int_hash);
-
+       batch = &get_cpu_var(batched_entropy_u64);
+       if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
+               extract_crng((u8 *)batch->entropy_u64);
+               batch->position = 0;
+       }
+       ret = batch->entropy_u64[batch->position++];
+       put_cpu_var(batched_entropy_u64);
         return ret;
  }
-EXPORT_SYMBOL(get_random_int);
+EXPORT_SYMBOL(get_random_u64);
  
-/*
- * Same as get_random_int(), but returns unsigned long.
- */
-unsigned long get_random_long(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
+u32 get_random_u32(void)
  {
-       __u32 *hash;
-       unsigned long ret;
+       u32 ret;
+       struct batched_entropy *batch;
  
-       if (arch_get_random_long(&ret))
+       if (arch_get_random_int(&ret))
                 return ret;
  
-       hash = get_cpu_var(get_random_int_hash);
-
-       hash[0] += current->pid + jiffies + random_get_entropy();
-       md5_transform(hash, random_int_secret);
-       ret = *(unsigned long *)hash;
-       put_cpu_var(get_random_int_hash);
-
+       batch = &get_cpu_var(batched_entropy_u32);
+       if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
+               extract_crng((u8 *)batch->entropy_u32);
+               batch->position = 0;
+       }
+       ret = batch->entropy_u32[batch->position++];
+       put_cpu_var(batched_entropy_u32);
         return ret;
  }
-EXPORT_SYMBOL(get_random_long);
+EXPORT_SYMBOL(get_random_u32);
  
  /**
   * randomize_page - Generate a random, page aligned address
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c

index e9b7e0b3cabe60d3be3ab8a092159b137854d8ec..87fe111d0be6b03ec8157f0a688046d490d7e887 100644 (file)
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -2202,14 +2202,16 @@ static int virtcons_freeze(struct virtio_device *vdev)
  
         vdev->config->reset(vdev);
  
-       virtqueue_disable_cb(portdev->c_ivq);
+       if (use_multiport(portdev))
+               virtqueue_disable_cb(portdev->c_ivq);
         cancel_work_sync(&portdev->control_work);
         cancel_work_sync(&portdev->config_work);
         /*
          * Once more: if control_work_handler() was running, it would
          * enable the cb as the last step.
          */
-       virtqueue_disable_cb(portdev->c_ivq);
+       if (use_multiport(portdev))
+               virtqueue_disable_cb(portdev->c_ivq);
         remove_controlq_data(portdev);
  
         list_for_each_entry(port, &portdev->ports, list) {
diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c

index ab609a76706f7bb0258ce47dda61366a0602d5cc..cf9449b3dbd9742bd8a3559c9939af9e057d9b5f 100644 (file)
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -429,6 +429,13 @@ static const struct clk_div_table pll_divp_table[] = {
         { 0, 2 }, { 1, 4 }, { 2, 6 }, { 3, 8 }, { 0 }
  };
  
+static const struct clk_div_table pll_divq_table[] = {
+       { 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 },
+       { 8, 8 }, { 9, 9 }, { 10, 10 }, { 11, 11 }, { 12, 12 }, { 13, 13 },
+       { 14, 14 }, { 15, 15 },
+       { 0 }
+};
+
  static const struct clk_div_table pll_divr_table[] = {
         { 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 }, { 0 }
  };
@@ -496,9 +503,9 @@ struct stm32f4_div_data {
  
  #define MAX_PLL_DIV 3
  static const struct stm32f4_div_data  div_data[MAX_PLL_DIV] = {
-       { 16, 2, 0,                     pll_divp_table  },
-       { 24, 4, CLK_DIVIDER_ONE_BASED, NULL            },
-       { 28, 3, 0,                     pll_divr_table  },
+       { 16, 2, 0, pll_divp_table },
+       { 24, 4, 0, pll_divq_table },
+       { 28, 3, 0, pll_divr_table },
  };
  
  struct stm32f4_pll_data {
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c

index 0fb39fe217d17ae1bb681912764b2506007f5f42..67201f67a14af7b07aec557308a7fb39d1432157 100644 (file)
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2502,7 +2502,7 @@ struct clk *__clk_create_clk(struct clk_hw *hw, const char *dev_id,
  
         clk->core = hw->core;
         clk->dev_id = dev_id;
-       clk->con_id = con_id;
+       clk->con_id = kstrdup_const(con_id, GFP_KERNEL);
         clk->max_rate = ULONG_MAX;
  
         clk_prepare_lock();
@@ -2518,6 +2518,7 @@ void __clk_free_clk(struct clk *clk)
         hlist_del(&clk->clks_node);
         clk_prepare_unlock();
  
+       kfree_const(clk->con_id);
         kfree(clk);
  }
  
diff --git a/drivers/clk/rockchip/clk-rk3036.c b/drivers/clk/rockchip/clk-rk3036.c

index 924f560dcf80e8a5681fba4c670524b2b20b01ee..00d4150e33c37434c056ba27867db908d6117bdf 100644 (file)
--- a/drivers/clk/rockchip/clk-rk3036.c
+++ b/drivers/clk/rockchip/clk-rk3036.c
@@ -127,7 +127,7 @@ PNAME(mux_ddrphy_p)         = { "dpll_ddr", "gpll_ddr" };
  PNAME(mux_pll_src_3plls_p)     = { "apll", "dpll", "gpll" };
  PNAME(mux_timer_p)             = { "xin24m", "pclk_peri_src" };
  
-PNAME(mux_pll_src_apll_dpll_gpll_usb480m_p)    = { "apll", "dpll", "gpll" "usb480m" };
+PNAME(mux_pll_src_apll_dpll_gpll_usb480m_p)    = { "apll", "dpll", "gpll", "usb480m" };
  
  PNAME(mux_mmc_src_p)   = { "apll", "dpll", "gpll", "xin24m" };
  PNAME(mux_i2s_pre_p)   = { "i2s_src", "i2s_frac", "ext_i2s", "xin12m" };
@@ -450,6 +450,13 @@ static void __init rk3036_clk_init(struct device_node *np)
                 return;
         }
  
+       /*
+        * Make uart_pll_clk a child of the gpll, as all other sources are
+        * not that usable / stable.
+        */
+       writel_relaxed(HIWORD_UPDATE(0x2, 0x3, 10),
+                      reg_base + RK2928_CLKSEL_CON(13));
+
         ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
         if (IS_ERR(ctx)) {
                 pr_err("%s: rockchip clk init failed\n", __func__);
diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig

index 695bbf9ef428f98f5348d100a6af2525f04afb59..a077ab6edffae759564362b85fd596887ea7e89a 100644 (file)
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -1,6 +1,7 @@
  config SUNXI_CCU
         bool "Clock support for Allwinner SoCs"
         depends on ARCH_SUNXI || COMPILE_TEST
+       select RESET_CONTROLLER
         default ARCH_SUNXI
  
  if SUNXI_CCU
@@ -15,7 +16,7 @@ config SUNXI_CCU_FRAC
         bool
  
  config SUNXI_CCU_GATE
-       bool
+       def_bool y
  
  config SUNXI_CCU_MUX
         bool
@@ -80,6 +81,7 @@ config SUN6I_A31_CCU
         select SUNXI_CCU_DIV
         select SUNXI_CCU_NK
         select SUNXI_CCU_NKM
+       select SUNXI_CCU_NKMP
         select SUNXI_CCU_NM
         select SUNXI_CCU_MP
         select SUNXI_CCU_PHASE
@@ -134,6 +136,7 @@ config SUN8I_V3S_CCU
  config SUN9I_A80_CCU
         bool "Support for the Allwinner A80 CCU"
         select SUNXI_CCU_DIV
+       select SUNXI_CCU_MULT
         select SUNXI_CCU_GATE
         select SUNXI_CCU_NKMP
         select SUNXI_CCU_NM
diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c

index e3c084cc6da55e77f24bf058038bda710fd792d0..f54114c607df76edeb77c70e0f7c97656e67634c 100644 (file)
--- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
@@ -566,7 +566,7 @@ static SUNXI_CCU_M_WITH_GATE(gpu_clk, "gpu", "pll-gpu",
                              0x1a0, 0, 3, BIT(31), CLK_SET_RATE_PARENT);
  
  /* Fixed Factor clocks */
-static CLK_FIXED_FACTOR(osc12M_clk, "osc12M", "osc24M", 1, 2, 0);
+static CLK_FIXED_FACTOR(osc12M_clk, "osc12M", "osc24M", 2, 1, 0);
  
  /* We hardcode the divider to 4 for now */
  static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",
diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c

index 4c9a920ff4ab7c351d59333d131d039f5e36f40a..89e68d29bf456ab3d682f7d2ba7d35ad4a21bb58 100644 (file)
--- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
+++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
@@ -608,7 +608,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", lcd_ch1_parents,
                                  0x150, 0, 4, 24, 2, BIT(31),
                                  CLK_SET_RATE_PARENT);
  
-static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(31), 0);
+static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0);
  
  static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0);
  
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c

index a7b3c08ed0e232c0cf41ae419f4980bf1614590e..2c69b631967aea3ae81389d29b20e8014c3030e1 100644 (file)
--- a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
@@ -752,6 +752,13 @@ static const struct sunxi_ccu_desc sun8i_a33_ccu_desc = {
         .num_resets     = ARRAY_SIZE(sun8i_a33_ccu_resets),
  };
  
+static struct ccu_pll_nb sun8i_a33_pll_cpu_nb = {
+       .common = &pll_cpux_clk.common,
+       /* copy from pll_cpux_clk */
+       .enable = BIT(31),
+       .lock   = BIT(28),
+};
+
  static struct ccu_mux_nb sun8i_a33_cpu_nb = {
         .common         = &cpux_clk.common,
         .cm             = &cpux_clk.mux,
@@ -783,6 +790,10 @@ static void __init sun8i_a33_ccu_setup(struct device_node *node)
  
         sunxi_ccu_probe(node, reg, &sun8i_a33_ccu_desc);
  
+       /* Gate then ungate PLL CPU after any rate changes */
+       ccu_pll_notifier_register(&sun8i_a33_pll_cpu_nb);
+
+       /* Reparent CPU during PLL CPU rate changes */
         ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk,
                                   &sun8i_a33_cpu_nb);
  }
diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c

index 8a47bafd78905bce849235d791f1469d448afcc9..9d8724715a4352ddd07a411945bc1cd58367053d 100644 (file)
--- a/drivers/clk/sunxi-ng/ccu_common.c
+++ b/drivers/clk/sunxi-ng/ccu_common.c
@@ -14,11 +14,13 @@
   * GNU General Public License for more details.
   */
  
+#include <linux/clk.h>
  #include <linux/clk-provider.h>
  #include <linux/iopoll.h>
  #include <linux/slab.h>
  
  #include "ccu_common.h"
+#include "ccu_gate.h"
  #include "ccu_reset.h"
  
  static DEFINE_SPINLOCK(ccu_lock);
@@ -39,6 +41,53 @@ void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock)
         WARN_ON(readl_relaxed_poll_timeout(addr, reg, reg & lock, 100, 70000));
  }
  
+/*
+ * This clock notifier is called when the frequency of a PLL clock is
+ * changed. In common PLL designs, changes to the dividers take effect
+ * almost immediately, while changes to the multipliers (implemented
+ * as dividers in the feedback loop) take a few cycles to work into
+ * the feedback loop for the PLL to stablize.
+ *
+ * Sometimes when the PLL clock rate is changed, the decrease in the
+ * divider is too much for the decrease in the multiplier to catch up.
+ * The PLL clock rate will spike, and in some cases, might lock up
+ * completely.
+ *
+ * This notifier callback will gate and then ungate the clock,
+ * effectively resetting it, so it proceeds to work. Care must be
+ * taken to reparent consumers to other temporary clocks during the
+ * rate change, and that this notifier callback must be the first
+ * to be registered.
+ */
+static int ccu_pll_notifier_cb(struct notifier_block *nb,
+                              unsigned long event, void *data)
+{
+       struct ccu_pll_nb *pll = to_ccu_pll_nb(nb);
+       int ret = 0;
+
+       if (event != POST_RATE_CHANGE)
+               goto out;
+
+       ccu_gate_helper_disable(pll->common, pll->enable);
+
+       ret = ccu_gate_helper_enable(pll->common, pll->enable);
+       if (ret)
+               goto out;
+
+       ccu_helper_wait_for_lock(pll->common, pll->lock);
+
+out:
+       return notifier_from_errno(ret);
+}
+
+int ccu_pll_notifier_register(struct ccu_pll_nb *pll_nb)
+{
+       pll_nb->clk_nb.notifier_call = ccu_pll_notifier_cb;
+
+       return clk_notifier_register(pll_nb->common->hw.clk,
+                                    &pll_nb->clk_nb);
+}
+
  int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
                     const struct sunxi_ccu_desc *desc)
  {
diff --git a/drivers/clk/sunxi-ng/ccu_common.h b/drivers/clk/sunxi-ng/ccu_common.h

index 73d81dc58fc5ad91f8a293530aa89e2b22fcbdb3..d6fdd7a789aa746a72f939fb51c552004941fd37 100644 (file)
--- a/drivers/clk/sunxi-ng/ccu_common.h
+++ b/drivers/clk/sunxi-ng/ccu_common.h
@@ -83,6 +83,18 @@ struct sunxi_ccu_desc {
  
  void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock);
  
+struct ccu_pll_nb {
+       struct notifier_block   clk_nb;
+       struct ccu_common       *common;
+
+       u32     enable;
+       u32     lock;
+};
+
+#define to_ccu_pll_nb(_nb) container_of(_nb, struct ccu_pll_nb, clk_nb)
+
+int ccu_pll_notifier_register(struct ccu_pll_nb *pll_nb);
+
  int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
                     const struct sunxi_ccu_desc *desc);
  
diff --git a/drivers/clk/sunxi-ng/ccu_mp.c b/drivers/clk/sunxi-ng/ccu_mp.c

index 22c2ca7a2a221c1f25456e6e2548d381fbc23adf..b583f186a804df669e974f811e6bef91c4aaa877 100644 (file)
--- a/drivers/clk/sunxi-ng/ccu_mp.c
+++ b/drivers/clk/sunxi-ng/ccu_mp.c
@@ -85,6 +85,10 @@ static unsigned long ccu_mp_recalc_rate(struct clk_hw *hw,
         unsigned int m, p;
         u32 reg;
  
+       /* Adjust parent_rate according to pre-dividers */
+       ccu_mux_helper_adjust_parent_for_prediv(&cmp->common, &cmp->mux,
+                                               -1, &parent_rate);
+
         reg = readl(cmp->common.base + cmp->common.reg);
  
         m = reg >> cmp->m.shift;
@@ -117,6 +121,10 @@ static int ccu_mp_set_rate(struct clk_hw *hw, unsigned long rate,
         unsigned int m, p;
         u32 reg;
  
+       /* Adjust parent_rate according to pre-dividers */
+       ccu_mux_helper_adjust_parent_for_prediv(&cmp->common, &cmp->mux,
+                                               -1, &parent_rate);
+
         max_m = cmp->m.max ?: 1 << cmp->m.width;
         max_p = cmp->p.max ?: 1 << ((1 << cmp->p.width) - 1);
  
diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c

index a2b40a0001577d2579aa563341d1d890797f4b7f..488055ed944f2b9dff8ca1baa5059ea22879328b 100644 (file)
--- a/drivers/clk/sunxi-ng/ccu_nkmp.c
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
@@ -107,7 +107,7 @@ static unsigned long ccu_nkmp_recalc_rate(struct clk_hw *hw,
         p = reg >> nkmp->p.shift;
         p &= (1 << nkmp->p.width) - 1;
  
-       return parent_rate * n * k >> p / m;
+       return (parent_rate * n * k >> p) / m;
  }
  
  static long ccu_nkmp_round_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/clocksource/clkevt-probe.c b/drivers/clocksource/clkevt-probe.c

index 8c30fec86094df926b8fa23bd162d44051894006..eb89b502acbdfdb5f343a34fde38afc1aa55683b 100644 (file)
--- a/drivers/clocksource/clkevt-probe.c
+++ b/drivers/clocksource/clkevt-probe.c
@@ -17,7 +17,7 @@
  
  #include <linux/init.h>
  #include <linux/of.h>
-#include <linux/clockchip.h>
+#include <linux/clockchips.h>
  
  extern struct of_device_id __clkevt_of_table[];
  
diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c

index 745844ee973e1deda08203725d9b9d1b8e412972..d4ca9962a7595a0206710a0dd4a95656f426ae8e 100644 (file)
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -10,7 +10,6 @@
  #include <linux/io.h>
  #include <linux/platform_device.h>
  #include <linux/atmel_tc.h>
-#include <linux/sched_clock.h>
  
  
  /*
@@ -57,14 +56,9 @@ static u64 tc_get_cycles(struct clocksource *cs)
         return (upper << 16) | lower;
  }
  
-static u32 tc_get_cv32(void)
-{
-       return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
-}
-
  static u64 tc_get_cycles32(struct clocksource *cs)
  {
-       return tc_get_cv32();
+       return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
  }
  
  static struct clocksource clksrc = {
@@ -75,11 +69,6 @@ static struct clocksource clksrc = {
         .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
  };
  
-static u64 notrace tc_read_sched_clock(void)
-{
-       return tc_get_cv32();
-}
-
  #ifdef CONFIG_GENERIC_CLOCKEVENTS
  
  struct tc_clkevt_device {
@@ -350,9 +339,6 @@ static int __init tcb_clksrc_init(void)
                 clksrc.read = tc_get_cycles32;
                 /* setup ony channel 0 */
                 tcb_setup_single_chan(tc, best_divisor_idx);
-
-               /* register sched_clock on chips with single 32 bit counter */
-               sched_clock_register(tc_read_sched_clock, 32, divided_rate);
         } else {
                 /* tclib will give us three clocks no matter what the
                  * underlying platform supports.
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c

index a475432818642fee4547699011ba4cf5aa619f3a..0e3f6496524d92c7c1717d8d2259684952d7acb8 100644 (file)
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -680,9 +680,11 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
                                         char *buf)
  {
         unsigned int cur_freq = __cpufreq_get(policy);
-       if (!cur_freq)
-               return sprintf(buf, "<unknown>");
-       return sprintf(buf, "%u\n", cur_freq);
+
+       if (cur_freq)
+               return sprintf(buf, "%u\n", cur_freq);
+
+       return sprintf(buf, "<unknown>\n");
  }
  
  /**
@@ -916,11 +918,19 @@ static struct kobj_type ktype_cpufreq = {
         .release        = cpufreq_sysfs_release,
  };
  
-static int add_cpu_dev_symlink(struct cpufreq_policy *policy,
-                              struct device *dev)
+static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu)
  {
+       struct device *dev = get_cpu_device(cpu);
+
+       if (!dev)
+               return;
+
+       if (cpumask_test_and_set_cpu(cpu, policy->real_cpus))
+               return;
+
         dev_dbg(dev, "%s: Adding symlink\n", __func__);
-       return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
+       if (sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"))
+               dev_err(dev, "cpufreq symlink creation failed\n");
  }
  
  static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
@@ -1178,10 +1188,13 @@ static int cpufreq_online(unsigned int cpu)
                 policy->user_policy.min = policy->min;
                 policy->user_policy.max = policy->max;
  
-               write_lock_irqsave(&cpufreq_driver_lock, flags);
-               for_each_cpu(j, policy->related_cpus)
+               for_each_cpu(j, policy->related_cpus) {
                         per_cpu(cpufreq_cpu_data, j) = policy;
-               write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+                       add_cpu_dev_symlink(policy, j);
+               }
+       } else {
+               policy->min = policy->user_policy.min;
+               policy->max = policy->user_policy.max;
         }
  
         if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
@@ -1270,13 +1283,15 @@ out_exit_policy:
  
         if (cpufreq_driver->exit)
                 cpufreq_driver->exit(policy);
+
+       for_each_cpu(j, policy->real_cpus)
+               remove_cpu_dev_symlink(policy, get_cpu_device(j));
+
  out_free_policy:
         cpufreq_policy_free(policy);
         return ret;
  }
  
-static int cpufreq_offline(unsigned int cpu);
-
  /**
   * cpufreq_add_dev - the cpufreq interface for a CPU device.
   * @dev: CPU device.
@@ -1298,16 +1313,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
  
         /* Create sysfs link on CPU registration */
         policy = per_cpu(cpufreq_cpu_data, cpu);
-       if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
-               return 0;
+       if (policy)
+               add_cpu_dev_symlink(policy, cpu);
  
-       ret = add_cpu_dev_symlink(policy, dev);
-       if (ret) {
-               cpumask_clear_cpu(cpu, policy->real_cpus);
-               cpufreq_offline(cpu);
-       }
-
-       return ret;
+       return 0;
  }
  
  static int cpufreq_offline(unsigned int cpu)
@@ -2389,6 +2398,20 @@ EXPORT_SYMBOL_GPL(cpufreq_boost_enabled);
   *********************************************************************/
  static enum cpuhp_state hp_online;
  
+static int cpuhp_cpufreq_online(unsigned int cpu)
+{
+       cpufreq_online(cpu);
+
+       return 0;
+}
+
+static int cpuhp_cpufreq_offline(unsigned int cpu)
+{
+       cpufreq_offline(cpu);
+
+       return 0;
+}
+
  /**
   * cpufreq_register_driver - register a CPU Frequency driver
   * @driver_data: A struct cpufreq_driver containing the values#
@@ -2451,8 +2474,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
         }
  
         ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "cpufreq:online",
-                                       cpufreq_online,
-                                       cpufreq_offline);
+                                       cpuhp_cpufreq_online,
+                                       cpuhp_cpufreq_offline);
         if (ret < 0)
                 goto err_if_unreg;
         hp_online = ret;
@@ -2532,4 +2555,5 @@ static int __init cpufreq_core_init(void)
  
         return 0;
  }
+module_param(off, int, 0444);
  core_initcall(cpufreq_core_init);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c

index b1fbaa30ae0415c330b9b1069e17900b99a48868..283491f742d3d78659696bd58c48fc5a3bd7a370 100644 (file)
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -84,6 +84,11 @@ static inline u64 div_ext_fp(u64 x, u64 y)
         return div64_u64(x << EXT_FRAC_BITS, y);
  }
  
+static inline int32_t percent_ext_fp(int percent)
+{
+       return div_ext_fp(percent, 100);
+}
+
  /**
   * struct sample -     Store performance sample
   * @core_avg_perf:     Ratio of APERF/MPERF which is the actual average
@@ -359,9 +364,7 @@ static bool driver_registered __read_mostly;
  static bool acpi_ppc;
  #endif
  
-static struct perf_limits performance_limits;
-static struct perf_limits powersave_limits;
-static struct perf_limits *limits;
+static struct perf_limits global;
  
  static void intel_pstate_init_limits(struct perf_limits *limits)
  {
@@ -372,13 +375,6 @@ static void intel_pstate_init_limits(struct perf_limits *limits)
         limits->max_sysfs_pct = 100;
  }
  
-static void intel_pstate_set_performance_limits(struct perf_limits *limits)
-{
-       intel_pstate_init_limits(limits);
-       limits->min_perf_pct = 100;
-       limits->min_perf = int_ext_tofp(1);
-}
-
  static DEFINE_MUTEX(intel_pstate_driver_lock);
  static DEFINE_MUTEX(intel_pstate_limits_lock);
  
@@ -501,7 +497,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
          * correct max turbo frequency based on the turbo state.
          * Also need to convert to MHz as _PSS freq is in MHz.
          */
-       if (!limits->turbo_disabled)
+       if (!global.turbo_disabled)
                 cpu->acpi_perf_data.states[0].core_frequency =
                                         policy->cpuinfo.max_freq / 1000;
         cpu->valid_pss_table = true;
@@ -620,7 +616,7 @@ static inline void update_turbo_state(void)
  
         cpu = all_cpu_data[0];
         rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
-       limits->turbo_disabled =
+       global.turbo_disabled =
                 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
                  cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
  }
@@ -844,12 +840,11 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
  
  static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
  {
-       int min, hw_min, max, hw_max, cpu, range, adj_range;
-       struct perf_limits *perf_limits = limits;
+       int min, hw_min, max, hw_max, cpu;
+       struct perf_limits *perf_limits = &global;
         u64 value, cap;
  
         for_each_cpu(cpu, policy->cpus) {
-               int max_perf_pct, min_perf_pct;
                 struct cpudata *cpu_data = all_cpu_data[cpu];
                 s16 epp;
  
@@ -858,24 +853,22 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
  
                 rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
                 hw_min = HWP_LOWEST_PERF(cap);
-               if (limits->no_turbo)
+               if (global.no_turbo)
                         hw_max = HWP_GUARANTEED_PERF(cap);
                 else
                         hw_max = HWP_HIGHEST_PERF(cap);
-               range = hw_max - hw_min;
  
-               max_perf_pct = perf_limits->max_perf_pct;
-               min_perf_pct = perf_limits->min_perf_pct;
+               max = fp_ext_toint(hw_max * perf_limits->max_perf);
+               if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
+                       min = max;
+               else
+                       min = fp_ext_toint(hw_max * perf_limits->min_perf);
  
                 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
-               adj_range = min_perf_pct * range / 100;
-               min = hw_min + adj_range;
+
                 value &= ~HWP_MIN_PERF(~0L);
                 value |= HWP_MIN_PERF(min);
  
-               adj_range = max_perf_pct * range / 100;
-               max = hw_min + adj_range;
-
                 value &= ~HWP_MAX_PERF(~0L);
                 value |= HWP_MAX_PERF(max);
  
@@ -979,6 +972,7 @@ static void intel_pstate_update_policies(void)
  static int pid_param_set(void *data, u64 val)
  {
         *(u32 *)data = val;
+       pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
         intel_pstate_reset_all_pid();
         return 0;
  }
@@ -1050,7 +1044,7 @@ static void intel_pstate_debug_hide_params(void)
         static ssize_t show_##file_name                                 \
         (struct kobject *kobj, struct attribute *attr, char *buf)       \
         {                                                               \
-               return sprintf(buf, "%u\n", limits->object);            \
+               return sprintf(buf, "%u\n", global.object);             \
         }
  
  static ssize_t intel_pstate_show_status(char *buf);
@@ -1141,10 +1135,10 @@ static ssize_t show_no_turbo(struct kobject *kobj,
         }
  
         update_turbo_state();
-       if (limits->turbo_disabled)
-               ret = sprintf(buf, "%u\n", limits->turbo_disabled);
+       if (global.turbo_disabled)
+               ret = sprintf(buf, "%u\n", global.turbo_disabled);
         else
-               ret = sprintf(buf, "%u\n", limits->no_turbo);
+               ret = sprintf(buf, "%u\n", global.no_turbo);
  
         mutex_unlock(&intel_pstate_driver_lock);
  
@@ -1171,14 +1165,14 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
         mutex_lock(&intel_pstate_limits_lock);
  
         update_turbo_state();
-       if (limits->turbo_disabled) {
+       if (global.turbo_disabled) {
                 pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
                 mutex_unlock(&intel_pstate_limits_lock);
                 mutex_unlock(&intel_pstate_driver_lock);
                 return -EPERM;
         }
  
-       limits->no_turbo = clamp_t(int, input, 0, 1);
+       global.no_turbo = clamp_t(int, input, 0, 1);
  
         mutex_unlock(&intel_pstate_limits_lock);
  
@@ -1208,14 +1202,11 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
  
         mutex_lock(&intel_pstate_limits_lock);
  
-       limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
-       limits->max_perf_pct = min(limits->max_policy_pct,
-                                  limits->max_sysfs_pct);
-       limits->max_perf_pct = max(limits->min_policy_pct,
-                                  limits->max_perf_pct);
-       limits->max_perf_pct = max(limits->min_perf_pct,
-                                  limits->max_perf_pct);
-       limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+       global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
+       global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
+       global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
+       global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
+       global.max_perf = percent_ext_fp(global.max_perf_pct);
  
         mutex_unlock(&intel_pstate_limits_lock);
  
@@ -1245,14 +1236,11 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
  
         mutex_lock(&intel_pstate_limits_lock);
  
-       limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
-       limits->min_perf_pct = max(limits->min_policy_pct,
-                                  limits->min_sysfs_pct);
-       limits->min_perf_pct = min(limits->max_policy_pct,
-                                  limits->min_perf_pct);
-       limits->min_perf_pct = min(limits->max_perf_pct,
-                                  limits->min_perf_pct);
-       limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
+       global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
+       global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
+       global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
+       global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
+       global.min_perf = percent_ext_fp(global.min_perf_pct);
  
         mutex_unlock(&intel_pstate_limits_lock);
  
@@ -1377,7 +1365,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate)
         u32 vid;
  
         val = (u64)pstate << 8;
-       if (limits->no_turbo && !limits->turbo_disabled)
+       if (global.no_turbo && !global.turbo_disabled)
                 val |= (u64)1 << 32;
  
         vid_fp = cpudata->vid.min + mul_fp(
@@ -1547,7 +1535,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate)
         u64 val;
  
         val = (u64)pstate << 8;
-       if (limits->no_turbo && !limits->turbo_disabled)
+       if (global.no_turbo && !global.turbo_disabled)
                 val |= (u64)1 << 32;
  
         return val;
@@ -1673,9 +1661,9 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
         int max_perf = cpu->pstate.turbo_pstate;
         int max_perf_adj;
         int min_perf;
-       struct perf_limits *perf_limits = limits;
+       struct perf_limits *perf_limits = &global;
  
-       if (limits->no_turbo || limits->turbo_disabled)
+       if (global.no_turbo || global.turbo_disabled)
                 max_perf = cpu->pstate.max_pstate;
  
         if (per_cpu_limits)
@@ -1810,7 +1798,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
  
         sample->busy_scaled = busy_frac * 100;
  
-       target = limits->no_turbo || limits->turbo_disabled ?
+       target = global.no_turbo || global.turbo_disabled ?
                         cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
         target += target >> 2;
         target = mul_fp(target, busy_frac);
@@ -1874,13 +1862,11 @@ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
  
         intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
         pstate = clamp_t(int, pstate, min_perf, max_perf);
-       trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
         return pstate;
  }
  
  static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
  {
-       pstate = intel_pstate_prepare_request(cpu, pstate);
         if (pstate == cpu->pstate.current_pstate)
                 return;
  
@@ -1900,6 +1886,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
  
         update_turbo_state();
  
+       target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
+       trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
         intel_pstate_update_pstate(cpu, target_pstate);
  
         sample = &cpu->sample;
@@ -2070,36 +2058,34 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
  static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
                                             struct perf_limits *limits)
  {
+       int32_t max_policy_perf, min_policy_perf;
  
-       limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
-                                             policy->cpuinfo.max_freq);
-       limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
+       max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
+       max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
         if (policy->max == policy->min) {
-               limits->min_policy_pct = limits->max_policy_pct;
+               min_policy_perf = max_policy_perf;
         } else {
-               limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100,
-                                                     policy->cpuinfo.max_freq);
-               limits->min_policy_pct = clamp_t(int, limits->min_policy_pct,
-                                                0, 100);
+               min_policy_perf = div_ext_fp(policy->min,
+                                            policy->cpuinfo.max_freq);
+               min_policy_perf = clamp_t(int32_t, min_policy_perf,
+                                         0, max_policy_perf);
         }
  
-       /* Normalize user input to [min_policy_pct, max_policy_pct] */
-       limits->min_perf_pct = max(limits->min_policy_pct,
-                                  limits->min_sysfs_pct);
-       limits->min_perf_pct = min(limits->max_policy_pct,
-                                  limits->min_perf_pct);
-       limits->max_perf_pct = min(limits->max_policy_pct,
-                                  limits->max_sysfs_pct);
-       limits->max_perf_pct = max(limits->min_policy_pct,
-                                  limits->max_perf_pct);
-
-       /* Make sure min_perf_pct <= max_perf_pct */
-       limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
-
-       limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
-       limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+       /* Normalize user input to [min_perf, max_perf] */
+       limits->min_perf = max(min_policy_perf,
+                              percent_ext_fp(limits->min_sysfs_pct));
+       limits->min_perf = min(limits->min_perf, max_policy_perf);
+       limits->max_perf = min(max_policy_perf,
+                              percent_ext_fp(limits->max_sysfs_pct));
+       limits->max_perf = max(min_policy_perf, limits->max_perf);
+
+       /* Make sure min_perf <= max_perf */
+       limits->min_perf = min(limits->min_perf, limits->max_perf);
+
         limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
         limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
+       limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
+       limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
  
         pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
                  limits->max_perf_pct, limits->min_perf_pct);
@@ -2108,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
  static int intel_pstate_set_policy(struct cpufreq_policy *policy)
  {
         struct cpudata *cpu;
-       struct perf_limits *perf_limits = NULL;
+       struct perf_limits *perf_limits = &global;
  
         if (!policy->cpuinfo.max_freq)
                 return -ENODEV;
@@ -2131,28 +2117,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
  
         mutex_lock(&intel_pstate_limits_lock);
  
-       if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
-               if (!perf_limits) {
-                       limits = &performance_limits;
-                       perf_limits = limits;
-               }
-               if (policy->max >= policy->cpuinfo.max_freq &&
-                   !limits->no_turbo) {
-                       pr_debug("set performance\n");
-                       intel_pstate_set_performance_limits(perf_limits);
-                       goto out;
-               }
-       } else {
-               pr_debug("set powersave\n");
-               if (!perf_limits) {
-                       limits = &powersave_limits;
-                       perf_limits = limits;
-               }
-
-       }
-
         intel_pstate_update_perf_limits(policy, perf_limits);
- out:
+
         if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
                 /*
                  * NOHZ_FULL CPUs need this as the governor callback may not
@@ -2174,16 +2140,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
  static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
  {
         struct cpudata *cpu = all_cpu_data[policy->cpu];
-       struct perf_limits *perf_limits;
-
-       if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
-               perf_limits = &performance_limits;
-       else
-               perf_limits = &powersave_limits;
  
         update_turbo_state();
-       policy->cpuinfo.max_freq = perf_limits->turbo_disabled ||
-                                       perf_limits->no_turbo ?
+       policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ?
                                         cpu->pstate.max_freq :
                                         cpu->pstate.turbo_freq;
  
@@ -2198,9 +2157,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
                 unsigned int max_freq, min_freq;
  
                 max_freq = policy->cpuinfo.max_freq *
-                                               limits->max_sysfs_pct / 100;
+                                       global.max_sysfs_pct / 100;
                 min_freq = policy->cpuinfo.max_freq *
-                                               limits->min_sysfs_pct / 100;
+                                       global.min_sysfs_pct / 100;
                 cpufreq_verify_within_limits(policy, min_freq, max_freq);
         }
  
@@ -2243,13 +2202,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
  
         cpu = all_cpu_data[policy->cpu];
  
-       /*
-        * We need sane value in the cpu->perf_limits, so inherit from global
-        * perf_limits limits, which are seeded with values based on the
-        * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up.
-        */
         if (per_cpu_limits)
-               memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits));
+               intel_pstate_init_limits(cpu->perf_limits);
  
         policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
         policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
@@ -2257,7 +2211,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
         /* cpuinfo and default policy values */
         policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
         update_turbo_state();
-       policy->cpuinfo.max_freq = limits->turbo_disabled ?
+       policy->cpuinfo.max_freq = global.turbo_disabled ?
                         cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
         policy->cpuinfo.max_freq *= cpu->pstate.scaling;
  
@@ -2277,7 +2231,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
                 return ret;
  
         policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-       if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
+       if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
                 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
         else
                 policy->policy = CPUFREQ_POLICY_POWERSAVE;
@@ -2301,46 +2255,16 @@ static struct cpufreq_driver intel_pstate = {
  static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
  {
         struct cpudata *cpu = all_cpu_data[policy->cpu];
-       struct perf_limits *perf_limits = limits;
  
         update_turbo_state();
-       policy->cpuinfo.max_freq = limits->turbo_disabled ?
+       policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ?
                         cpu->pstate.max_freq : cpu->pstate.turbo_freq;
  
         cpufreq_verify_within_cpu_limits(policy);
  
-       if (per_cpu_limits)
-               perf_limits = cpu->perf_limits;
-
-       mutex_lock(&intel_pstate_limits_lock);
-
-       intel_pstate_update_perf_limits(policy, perf_limits);
-
-       mutex_unlock(&intel_pstate_limits_lock);
-
         return 0;
  }
  
-static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
-                                              struct cpufreq_policy *policy,
-                                              unsigned int target_freq)
-{
-       unsigned int max_freq;
-
-       update_turbo_state();
-
-       max_freq = limits->no_turbo || limits->turbo_disabled ?
-                       cpu->pstate.max_freq : cpu->pstate.turbo_freq;
-       policy->cpuinfo.max_freq = max_freq;
-       if (policy->max > max_freq)
-               policy->max = max_freq;
-
-       if (target_freq > max_freq)
-               target_freq = max_freq;
-
-       return target_freq;
-}
-
  static int intel_cpufreq_target(struct cpufreq_policy *policy,
                                 unsigned int target_freq,
                                 unsigned int relation)
@@ -2349,8 +2273,10 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
         struct cpufreq_freqs freqs;
         int target_pstate;
  
+       update_turbo_state();
+
         freqs.old = policy->cur;
-       freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+       freqs.new = target_freq;
  
         cpufreq_freq_transition_begin(policy, &freqs);
         switch (relation) {
@@ -2370,6 +2296,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
                 wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
                               pstate_funcs.get_val(cpu, target_pstate));
         }
+       freqs.new = target_pstate * cpu->pstate.scaling;
         cpufreq_freq_transition_end(policy, &freqs, false);
  
         return 0;
@@ -2381,10 +2308,12 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
         struct cpudata *cpu = all_cpu_data[policy->cpu];
         int target_pstate;
  
-       target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+       update_turbo_state();
+
         target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
+       target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
         intel_pstate_update_pstate(cpu, target_pstate);
-       return target_freq;
+       return target_pstate * cpu->pstate.scaling;
  }
  
  static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -2435,10 +2364,7 @@ static int intel_pstate_register_driver(void)
  {
         int ret;
  
-       intel_pstate_init_limits(&powersave_limits);
-       intel_pstate_set_performance_limits(&performance_limits);
-       limits = IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) ?
-                       &performance_limits : &powersave_limits;
+       intel_pstate_init_limits(&global);
  
         ret = cpufreq_register_driver(intel_pstate_driver);
         if (ret) {
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c

index 370593006f5f76db29a0433912966cb49820346d..cda8f62d555b57700daded5a5214c8e88ffca4ee 100644 (file)
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -175,6 +175,24 @@ static int powernv_cpuidle_driver_init(void)
                 drv->state_count += 1;
         }
  
+       /*
+        * On the PowerNV platform cpu_present may be less than cpu_possible in
+        * cases when firmware detects the CPU, but it is not available to the
+        * OS.  If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at
+        * run time and hence cpu_devices are not created for those CPUs by the
+        * generic topology_init().
+        *
+        * drv->cpumask defaults to cpu_possible_mask in
+        * __cpuidle_driver_init().  This breaks cpuidle on PowerNV where
+        * cpu_devices are not created for CPUs in cpu_possible_mask that
+        * cannot be hot-added later at run time.
+        *
+        * Trying cpuidle_register_device() on a CPU without a cpu_device is
+        * incorrect, so pass a correct CPU mask to the generic cpuidle driver.
+        */
+
+       drv->cpumask = (struct cpumask *)cpu_present_mask;
+
         return 0;
  }
  
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c

index c5adc8c9ac43afeffb0f7ca842f730df76ef8eb1..ae948b1da93a379b12d16aaf98be5df6ac762da4 100644 (file)
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -615,6 +615,18 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
         struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
         int error;
  
+       /*
+        * Return if cpu_device is not setup for this CPU.
+        *
+        * This could happen if the arch did not set up cpu_device
+        * since this CPU is not in cpu_present mask and the
+        * driver did not send a correct CPU mask during registration.
+        * Without this check we would end up passing bogus
+        * value for &cpu_dev->kobj in kobject_init_and_add()
+        */
+       if (!cpu_dev)
+               return -ENODEV;
+
         kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
         if (!kdev)
                 return -ENOMEM;
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c

index 32100c4851dd4bd94b9b5b6814558812d8b13629..49cbdcba7883072decb2683857ebb8da856cd68c 100644 (file)
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -506,7 +506,7 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
         ctx->dev = caam_jr_alloc();
  
         if (IS_ERR(ctx->dev)) {
-               dev_err(ctx->dev, "Job Ring Device allocation for transform failed\n");
+               pr_err("Job Ring Device allocation for transform failed\n");
                 return PTR_ERR(ctx->dev);
         }
  
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c

index fef39f9f41ee200c5ed7138edae76beaefdff3cb..5d7f73d60515effd80861603cb183595ea7fbb85 100644 (file)
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -281,7 +281,8 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask)
                         /* Try to run it through DECO0 */
                         ret = run_descriptor_deco0(ctrldev, desc, &status);
  
-                       if (ret || status) {
+                       if (ret ||
+                           (status && status != JRSTA_SSRC_JUMP_HALT_CC)) {
                                 dev_err(ctrldev,
                                         "Failed to deinstantiate RNG4 SH%d\n",
                                         sh_idx);
@@ -301,15 +302,13 @@ static int caam_remove(struct platform_device *pdev)
         struct device *ctrldev;
         struct caam_drv_private *ctrlpriv;
         struct caam_ctrl __iomem *ctrl;
-       int ring;
  
         ctrldev = &pdev->dev;
         ctrlpriv = dev_get_drvdata(ctrldev);
         ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
  
-       /* Remove platform devices for JobRs */
-       for (ring = 0; ring < ctrlpriv->total_jobrs; ring++)
-               of_device_unregister(ctrlpriv->jrpdev[ring]);
+       /* Remove platform devices under the crypto node */
+       of_platform_depopulate(ctrldev);
  
         /* De-initialize RNG state handles initialized by this driver. */
         if (ctrlpriv->rng4_sh_init)
@@ -418,10 +417,21 @@ DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n");
  DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n");
  #endif
  
+static const struct of_device_id caam_match[] = {
+       {
+               .compatible = "fsl,sec-v4.0",
+       },
+       {
+               .compatible = "fsl,sec4.0",
+       },
+       {},
+};
+MODULE_DEVICE_TABLE(of, caam_match);
+
  /* Probe routine for CAAM top (controller) level */
  static int caam_probe(struct platform_device *pdev)
  {
-       int ret, ring, ridx, rspec, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
+       int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
         u64 caam_id;
         struct device *dev;
         struct device_node *nprop, *np;
@@ -597,47 +607,24 @@ static int caam_probe(struct platform_device *pdev)
                 goto iounmap_ctrl;
         }
  
-       /*
-        * Detect and enable JobRs
-        * First, find out how many ring spec'ed, allocate references
-        * for all, then go probe each one.
-        */
-       rspec = 0;
-       for_each_available_child_of_node(nprop, np)
-               if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
-                   of_device_is_compatible(np, "fsl,sec4.0-job-ring"))
-                       rspec++;
-
-       ctrlpriv->jrpdev = devm_kcalloc(&pdev->dev, rspec,
-                                       sizeof(*ctrlpriv->jrpdev), GFP_KERNEL);
-       if (ctrlpriv->jrpdev == NULL) {
-               ret = -ENOMEM;
+       ret = of_platform_populate(nprop, caam_match, NULL, dev);
+       if (ret) {
+               dev_err(dev, "JR platform devices creation error\n");
                 goto iounmap_ctrl;
         }
  
         ring = 0;
-       ridx = 0;
-       ctrlpriv->total_jobrs = 0;
         for_each_available_child_of_node(nprop, np)
                 if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
                     of_device_is_compatible(np, "fsl,sec4.0-job-ring")) {
-                       ctrlpriv->jrpdev[ring] =
-                               of_platform_device_create(np, NULL, dev);
-                       if (!ctrlpriv->jrpdev[ring]) {
-                               pr_warn("JR physical index %d: Platform device creation error\n",
-                                       ridx);
-                               ridx++;
-                               continue;
-                       }
                         ctrlpriv->jr[ring] = (struct caam_job_ring __iomem __force *)
                                              ((__force uint8_t *)ctrl +
-                                            (ridx + JR_BLOCK_NUMBER) *
+                                            (ring + JR_BLOCK_NUMBER) *
                                               BLOCK_OFFSET
                                              );
                         ctrlpriv->total_jobrs++;
                         ring++;
-                       ridx++;
-       }
+               }
  
         /* Check to see if QI present. If so, enable */
         ctrlpriv->qi_present =
@@ -847,17 +834,6 @@ disable_caam_ipg:
         return ret;
  }
  
-static struct of_device_id caam_match[] = {
-       {
-               .compatible = "fsl,sec-v4.0",
-       },
-       {
-               .compatible = "fsl,sec4.0",
-       },
-       {},
-};
-MODULE_DEVICE_TABLE(of, caam_match);
-
  static struct platform_driver caam_driver = {
         .driver = {
                 .name = "caam",
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h

index e2bcacc1a921675cf30f70a40816e1306a8c3ef9..dbed8baeebe5d4765b741f562b99df206bc4b47b 100644 (file)
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -66,7 +66,6 @@ struct caam_drv_private_jr {
  struct caam_drv_private {
  
         struct device *dev;
-       struct platform_device **jrpdev; /* Alloc'ed array per sub-device */
         struct platform_device *pdev;
  
         /* Physical-presence section */
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c

index 41cc853f8569cfd4825806028331ed5d98f005c0..fc08b4ed69d936f2866d5c9fef19450b852f900c 100644 (file)
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -1015,6 +1015,7 @@ const struct ccp_vdata ccpv5a = {
  
  const struct ccp_vdata ccpv5b = {
         .version = CCP_VERSION(5, 0),
+       .dma_chan_attr = DMA_PRIVATE,
         .setup = ccp5other_config,
         .perform = &ccp5_actions,
         .bar = 2,
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c

index 511ab042b5e7939b008045129de0c9e268f2db46..92d1c6959f08b8943f513a9ed5fd8525c2d6b702 100644 (file)
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -283,11 +283,14 @@ EXPORT_SYMBOL_GPL(ccp_version);
   */
  int ccp_enqueue_cmd(struct ccp_cmd *cmd)
  {
-       struct ccp_device *ccp = ccp_get_device();
+       struct ccp_device *ccp;
         unsigned long flags;
         unsigned int i;
         int ret;
  
+       /* Some commands might need to be sent to a specific device */
+       ccp = cmd->ccp ? cmd->ccp : ccp_get_device();
+
         if (!ccp)
                 return -ENODEV;
  
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h

index 2b5c01fade05a526d5e78241eab38ec16e790aa6..aa36f3f81860560a442687518f78a0e752a9c11b 100644 (file)
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -179,6 +179,10 @@
  
  /* ------------------------ General CCP Defines ------------------------ */
  
+#define        CCP_DMA_DFLT                    0x0
+#define        CCP_DMA_PRIV                    0x1
+#define        CCP_DMA_PUB                     0x2
+
  #define CCP_DMAPOOL_MAX_SIZE           64
  #define CCP_DMAPOOL_ALIGN              BIT(5)
  
@@ -636,6 +640,7 @@ struct ccp_actions {
  /* Structure to hold CCP version-specific values */
  struct ccp_vdata {
         const unsigned int version;
+       const unsigned int dma_chan_attr;
         void (*setup)(struct ccp_device *);
         const struct ccp_actions *perform;
         const unsigned int bar;
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c

index e5d9278f40197427e913993fe9249d405585fe87..e00be01fbf5a036fcd8a9f09234b581998b7b75f 100644 (file)
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -10,6 +10,7 @@
   * published by the Free Software Foundation.
   */
  
+#include <linux/module.h>
  #include <linux/kernel.h>
  #include <linux/dmaengine.h>
  #include <linux/spinlock.h>
@@ -25,6 +26,37 @@
         (mask == 0) ? 64 : fls64(mask); \
  })
  
+/* The CCP as a DMA provider can be configured for public or private
+ * channels. Default is specified in the vdata for the device (PCI ID).
+ * This module parameter will override for all channels on all devices:
+ *   dma_chan_attr = 0x2 to force all channels public
+ *                 = 0x1 to force all channels private
+ *                 = 0x0 to defer to the vdata setting
+ *                 = any other value: warning, revert to 0x0
+ */
+static unsigned int dma_chan_attr = CCP_DMA_DFLT;
+module_param(dma_chan_attr, uint, 0444);
+MODULE_PARM_DESC(dma_chan_attr, "Set DMA channel visibility: 0 (default) = device defaults, 1 = make private, 2 = make public");
+
+unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp)
+{
+       switch (dma_chan_attr) {
+       case CCP_DMA_DFLT:
+               return ccp->vdata->dma_chan_attr;
+
+       case CCP_DMA_PRIV:
+               return DMA_PRIVATE;
+
+       case CCP_DMA_PUB:
+               return 0;
+
+       default:
+               dev_info_once(ccp->dev, "Invalid value for dma_chan_attr: %d\n",
+                             dma_chan_attr);
+               return ccp->vdata->dma_chan_attr;
+       }
+}
+
  static void ccp_free_cmd_resources(struct ccp_device *ccp,
                                    struct list_head *list)
  {
@@ -390,6 +422,7 @@ static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan,
                         goto err;
  
                 ccp_cmd = &cmd->ccp_cmd;
+               ccp_cmd->ccp = chan->ccp;
                 ccp_pt = &ccp_cmd->u.passthru_nomap;
                 ccp_cmd->flags = CCP_CMD_MAY_BACKLOG;
                 ccp_cmd->flags |= CCP_CMD_PASSTHRU_NO_DMA_MAP;
@@ -674,6 +707,15 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
         dma_cap_set(DMA_SG, dma_dev->cap_mask);
         dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
  
+       /* The DMA channels for this device can be set to public or private,
+        * and overridden by the module parameter dma_chan_attr.
+        * Default: according to the value in vdata (dma_chan_attr=0)
+        * dma_chan_attr=0x1: all channels private (override vdata)
+        * dma_chan_attr=0x2: all channels public (override vdata)
+        */
+       if (ccp_get_dma_chan_attr(ccp) == DMA_PRIVATE)
+               dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
         INIT_LIST_HEAD(&dma_dev->channels);
         for (i = 0; i < ccp->cmd_q_count; i++) {
                 chan = ccp->ccp_dma_chan + i;
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c

index dce1af0ce85ce8ec6dbd7184f02776cb173c41f0..1b9da3dc799b05dff2971e6a5415cc3ae3d4bd85 100644 (file)
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -270,7 +270,7 @@ static void s5p_sg_copy_buf(void *buf, struct scatterlist *sg,
         scatterwalk_done(&walk, out, 0);
  }
  
-static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+static void s5p_sg_done(struct s5p_aes_dev *dev)
  {
         if (dev->sg_dst_cpy) {
                 dev_dbg(dev->dev,
@@ -281,8 +281,11 @@ static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
         }
         s5p_free_sg_cpy(dev, &dev->sg_src_cpy);
         s5p_free_sg_cpy(dev, &dev->sg_dst_cpy);
+}
  
-       /* holding a lock outside */
+/* Calls the completion. Cannot be called with dev->lock hold. */
+static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+{
         dev->req->base.complete(&dev->req->base, err);
         dev->busy = false;
  }
@@ -368,51 +371,44 @@ exit:
  }
  
  /*
- * Returns true if new transmitting (output) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_outdata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ *  - 0 if there is no more data,
+ *  - 1 if new transmitting (output) data is ready and its address+length
+ *     have to be written to device (by calling s5p_set_dma_outdata()).
   */
-static bool s5p_aes_tx(struct s5p_aes_dev *dev)
+static int s5p_aes_tx(struct s5p_aes_dev *dev)
  {
-       int err = 0;
-       bool ret = false;
+       int ret = 0;
  
         s5p_unset_outdata(dev);
  
         if (!sg_is_last(dev->sg_dst)) {
-               err = s5p_set_outdata(dev, sg_next(dev->sg_dst));
-               if (err)
-                       s5p_aes_complete(dev, err);
-               else
-                       ret = true;
-       } else {
-               s5p_aes_complete(dev, err);
-
-               dev->busy = true;
-               tasklet_schedule(&dev->tasklet);
+               ret = s5p_set_outdata(dev, sg_next(dev->sg_dst));
+               if (!ret)
+                       ret = 1;
         }
  
         return ret;
  }
  
  /*
- * Returns true if new receiving (input) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_indata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ *  - 0 if there is no more data,
+ *  - 1 if new receiving (input) data is ready and its address+length
+ *     have to be written to device (by calling s5p_set_dma_indata()).
   */
-static bool s5p_aes_rx(struct s5p_aes_dev *dev)
+static int s5p_aes_rx(struct s5p_aes_dev *dev/*, bool *set_dma*/)
  {
-       int err;
-       bool ret = false;
+       int ret = 0;
  
         s5p_unset_indata(dev);
  
         if (!sg_is_last(dev->sg_src)) {
-               err = s5p_set_indata(dev, sg_next(dev->sg_src));
-               if (err)
-                       s5p_aes_complete(dev, err);
-               else
-                       ret = true;
+               ret = s5p_set_indata(dev, sg_next(dev->sg_src));
+               if (!ret)
+                       ret = 1;
         }
  
         return ret;
@@ -422,33 +418,73 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id)
  {
         struct platform_device *pdev = dev_id;
         struct s5p_aes_dev *dev = platform_get_drvdata(pdev);
-       bool set_dma_tx = false;
-       bool set_dma_rx = false;
+       int err_dma_tx = 0;
+       int err_dma_rx = 0;
+       bool tx_end = false;
         unsigned long flags;
         uint32_t status;
+       int err;
  
         spin_lock_irqsave(&dev->lock, flags);
  
+       /*
+        * Handle rx or tx interrupt. If there is still data (scatterlist did not
+        * reach end), then map next scatterlist entry.
+        * In case of such mapping error, s5p_aes_complete() should be called.
+        *
+        * If there is no more data in tx scatter list, call s5p_aes_complete()
+        * and schedule new tasklet.
+        */
         status = SSS_READ(dev, FCINTSTAT);
         if (status & SSS_FCINTSTAT_BRDMAINT)
-               set_dma_rx = s5p_aes_rx(dev);
-       if (status & SSS_FCINTSTAT_BTDMAINT)
-               set_dma_tx = s5p_aes_tx(dev);
+               err_dma_rx = s5p_aes_rx(dev);
+
+       if (status & SSS_FCINTSTAT_BTDMAINT) {
+               if (sg_is_last(dev->sg_dst))
+                       tx_end = true;
+               err_dma_tx = s5p_aes_tx(dev);
+       }
  
         SSS_WRITE(dev, FCINTPEND, status);
  
-       /*
-        * Writing length of DMA block (either receiving or transmitting)
-        * will start the operation immediately, so this should be done
-        * at the end (even after clearing pending interrupts to not miss the
-        * interrupt).
-        */
-       if (set_dma_tx)
-               s5p_set_dma_outdata(dev, dev->sg_dst);
-       if (set_dma_rx)
-               s5p_set_dma_indata(dev, dev->sg_src);
+       if (err_dma_rx < 0) {
+               err = err_dma_rx;
+               goto error;
+       }
+       if (err_dma_tx < 0) {
+               err = err_dma_tx;
+               goto error;
+       }
+
+       if (tx_end) {
+               s5p_sg_done(dev);
+
+               spin_unlock_irqrestore(&dev->lock, flags);
+
+               s5p_aes_complete(dev, 0);
+               dev->busy = true;
+               tasklet_schedule(&dev->tasklet);
+       } else {
+               /*
+                * Writing length of DMA block (either receiving or
+                * transmitting) will start the operation immediately, so this
+                * should be done at the end (even after clearing pending
+                * interrupts to not miss the interrupt).
+                */
+               if (err_dma_tx == 1)
+                       s5p_set_dma_outdata(dev, dev->sg_dst);
+               if (err_dma_rx == 1)
+                       s5p_set_dma_indata(dev, dev->sg_src);
  
+               spin_unlock_irqrestore(&dev->lock, flags);
+       }
+
+       return IRQ_HANDLED;
+
+error:
+       s5p_sg_done(dev);
         spin_unlock_irqrestore(&dev->lock, flags);
+       s5p_aes_complete(dev, err);
  
         return IRQ_HANDLED;
  }
@@ -597,8 +633,9 @@ outdata_error:
         s5p_unset_indata(dev);
  
  indata_error:
-       s5p_aes_complete(dev, err);
+       s5p_sg_done(dev);
         spin_unlock_irqrestore(&dev->lock, flags);
+       s5p_aes_complete(dev, err);
  }
  
  static void s5p_tasklet_cb(unsigned long data)
@@ -805,8 +842,9 @@ static int s5p_aes_probe(struct platform_device *pdev)
                 dev_warn(dev, "feed control interrupt is not available.\n");
                 goto err_irq;
         }
-       err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt,
-                              IRQF_SHARED, pdev->name, pdev);
+       err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL,
+                                       s5p_aes_interrupt, IRQF_ONESHOT,
+                                       pdev->name, pdev);
         if (err < 0) {
                 dev_warn(dev, "feed control interrupt is not available.\n");
                 goto err_irq;
diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c

index 43a0c8a26ab0c56c25b56f425d9ce8192780e54b..00a16ab601cb07d4b525a89a8ca6bf3a5393c94b 100644 (file)
--- a/drivers/crypto/ux500/cryp/cryp.c
+++ b/drivers/crypto/ux500/cryp/cryp.c
@@ -82,7 +82,7 @@ void cryp_activity(struct cryp_device_data *device_data,
  void cryp_flush_inoutfifo(struct cryp_device_data *device_data)
  {
         /*
-        * We always need to disble the hardware before trying to flush the
+        * We always need to disable the hardware before trying to flush the
          * FIFO. This is something that isn't written in the design
          * specification, but we have been informed by the hardware designers
          * that this must be done.
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig

index 3e2ab3b14eea205f19e8b436291e5117cec9567d..9e95bf94eb13ff2e830905694e0ce4c045fc76b1 100644 (file)
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -2,6 +2,7 @@ menuconfig DEV_DAX
         tristate "DAX: direct access to differentiated memory"
         default m if NVDIMM_DAX
         depends on TRANSPARENT_HUGEPAGE
+       select SRCU
         help
           Support raw access to differentiated (persistence, bandwidth,
           latency...) memory via an mmap(2) capable character
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c

index 8d9829ff2a784de9490404a86a194e2304ed65c7..806f180c80d816b313319f960479a7ad2848c672 100644 (file)
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -25,6 +25,7 @@
  #include "dax.h"
  
  static dev_t dax_devt;
+DEFINE_STATIC_SRCU(dax_srcu);
  static struct class *dax_class;
  static DEFINE_IDA(dax_minor_ida);
  static int nr_dax = CONFIG_NR_DEV_DAX;
@@ -60,7 +61,7 @@ struct dax_region {
   * @region - parent region
   * @dev - device backing the character device
   * @cdev - core chardev data
- * @alive - !alive + rcu grace period == no new mappings can be established
+ * @alive - !alive + srcu grace period == no new mappings can be established
   * @id - child id in the region
   * @num_resources - number of physical address extents in this device
   * @res - array of physical address ranges
@@ -427,6 +428,7 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
         int rc = VM_FAULT_SIGBUS;
         phys_addr_t phys;
         pfn_t pfn;
+       unsigned int fault_size = PAGE_SIZE;
  
         if (check_vma(dax_dev, vmf->vma, __func__))
                 return VM_FAULT_SIGBUS;
@@ -437,9 +439,12 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
                 return VM_FAULT_SIGBUS;
         }
  
+       if (fault_size != dax_region->align)
+               return VM_FAULT_SIGBUS;
+
         phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE);
         if (phys == -1) {
-               dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+               dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
                                 vmf->pgoff);
                 return VM_FAULT_SIGBUS;
         }
@@ -464,6 +469,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
         phys_addr_t phys;
         pgoff_t pgoff;
         pfn_t pfn;
+       unsigned int fault_size = PMD_SIZE;
  
         if (check_vma(dax_dev, vmf->vma, __func__))
                 return VM_FAULT_SIGBUS;
@@ -480,10 +486,20 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
                 return VM_FAULT_SIGBUS;
         }
  
+       if (fault_size < dax_region->align)
+               return VM_FAULT_SIGBUS;
+       else if (fault_size > dax_region->align)
+               return VM_FAULT_FALLBACK;
+
+       /* if we are outside of the VMA */
+       if (pmd_addr < vmf->vma->vm_start ||
+                       (pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
+               return VM_FAULT_SIGBUS;
+
         pgoff = linear_page_index(vmf->vma, pmd_addr);
         phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
         if (phys == -1) {
-               dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+               dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
                                 pgoff);
                 return VM_FAULT_SIGBUS;
         }
@@ -503,6 +519,8 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
         phys_addr_t phys;
         pgoff_t pgoff;
         pfn_t pfn;
+       unsigned int fault_size = PUD_SIZE;
+
  
         if (check_vma(dax_dev, vmf->vma, __func__))
                 return VM_FAULT_SIGBUS;
@@ -519,10 +537,20 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
                 return VM_FAULT_SIGBUS;
         }
  
+       if (fault_size < dax_region->align)
+               return VM_FAULT_SIGBUS;
+       else if (fault_size > dax_region->align)
+               return VM_FAULT_FALLBACK;
+
+       /* if we are outside of the VMA */
+       if (pud_addr < vmf->vma->vm_start ||
+                       (pud_addr + PUD_SIZE) > vmf->vma->vm_end)
+               return VM_FAULT_SIGBUS;
+
         pgoff = linear_page_index(vmf->vma, pud_addr);
         phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE);
         if (phys == -1) {
-               dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+               dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
                                 pgoff);
                 return VM_FAULT_SIGBUS;
         }
@@ -542,7 +570,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
  static int dax_dev_huge_fault(struct vm_fault *vmf,
                 enum page_entry_size pe_size)
  {
-       int rc;
+       int rc, id;
         struct file *filp = vmf->vma->vm_file;
         struct dax_dev *dax_dev = filp->private_data;
  
@@ -551,7 +579,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
                         ? "write" : "read",
                         vmf->vma->vm_start, vmf->vma->vm_end);
  
-       rcu_read_lock();
+       id = srcu_read_lock(&dax_srcu);
         switch (pe_size) {
         case PE_SIZE_PTE:
                 rc = __dax_dev_pte_fault(dax_dev, vmf);
@@ -565,7 +593,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
         default:
                 return VM_FAULT_FALLBACK;
         }
-       rcu_read_unlock();
+       srcu_read_unlock(&dax_srcu, id);
  
         return rc;
  }
@@ -686,11 +714,11 @@ static void unregister_dax_dev(void *dev)
          * Note, rcu is not protecting the liveness of dax_dev, rcu is
          * ensuring that any fault handlers that might have seen
          * dax_dev->alive == true, have completed.  Any fault handlers
-        * that start after synchronize_rcu() has started will abort
+        * that start after synchronize_srcu() has started will abort
          * upon seeing dax_dev->alive == false.
          */
         dax_dev->alive = false;
-       synchronize_rcu();
+       synchronize_srcu(&dax_srcu);
         unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
         cdev_del(cdev);
         device_unregister(dev);
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c

index e18dc596cf2447fa9ef7e41b62d9396e29043426..6204cc32d09c5096df8aec304c3c37b3bcb6be44 100644 (file)
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -251,8 +251,11 @@ static void bcm2835_dma_create_cb_set_length(
          */
  
         /* have we filled in period_length yet? */
-       if (*total_len + control_block->length < period_len)
+       if (*total_len + control_block->length < period_len) {
+               /* update number of bytes in this period so far */
+               *total_len += control_block->length;
                 return;
+       }
  
         /* calculate the length that remains to reach period_length */
         control_block->length = period_len - *total_len;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c

index 24e0221fd66d1ff58eead62ee9f4a865eb87da03..d9118ec23025417eb6732542d653ac989cee05e1 100644 (file)
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1108,12 +1108,14 @@ static struct dmaengine_unmap_pool *__get_unmap_pool(int nr)
         switch (order) {
         case 0 ... 1:
                 return &unmap_pool[0];
+#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID)
         case 2 ... 4:
                 return &unmap_pool[1];
         case 5 ... 7:
                 return &unmap_pool[2];
         case 8:
                 return &unmap_pool[3];
+#endif
         default:
                 BUG();
                 return NULL;
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig

index 82d85cce81f815b9b8bf70352e3bdeeece2d7d41..96afb2aeed18a43f44aa0d318f7d58aeb428f401 100644 (file)
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -10,26 +10,16 @@ config EDAC_SUPPORT
         bool
  
  menuconfig EDAC
-       bool "EDAC (Error Detection And Correction) reporting"
-       depends on HAS_IOMEM && EDAC_SUPPORT
+       tristate "EDAC (Error Detection And Correction) reporting"
+       depends on HAS_IOMEM && EDAC_SUPPORT && RAS
         help
-         EDAC is designed to report errors in the core system.
-         These are low-level errors that are reported in the CPU or
-         supporting chipset or other subsystems:
+         EDAC is a subsystem along with hardware-specific drivers designed to
+         report hardware errors. These are low-level errors that are reported
+         in the CPU or supporting chipset or other subsystems:
           memory errors, cache errors, PCI errors, thermal throttling, etc..
           If unsure, select 'Y'.
  
-         If this code is reporting problems on your system, please
-         see the EDAC project web pages for more information at:
-
-         <http://bluesmoke.sourceforge.net/>
-
-         and:
-
-         <http://buttersideup.com/edacwiki>
-
-         There is also a mailing list for the EDAC project, which can
-         be found via the sourceforge page.
+         The mailing list for the EDAC project is linux-edac@vger.kernel.org.
  
  if EDAC
  
@@ -43,6 +33,7 @@ config EDAC_LEGACY_SYSFS
  
  config EDAC_DEBUG
         bool "Debugging"
+       select DEBUG_FS
         help
           This turns on debugging information for the entire EDAC subsystem.
           You do so by inserting edac_module with "edac_debug_level=x." Valid
@@ -61,21 +52,9 @@ config EDAC_DECODE_MCE
           which occur really early upon boot, before the module infrastructure
           has been initialized.
  
-config EDAC_MM_EDAC
-       tristate "Main Memory EDAC (Error Detection And Correction) reporting"
-       select RAS
-       help
-         Some systems are able to detect and correct errors in main
-         memory.  EDAC can report statistics on memory error
-         detection and correction (EDAC - or commonly referred to ECC
-         errors).  EDAC will also try to decode where these errors
-         occurred so that a particular failing memory module can be
-         replaced.  If unsure, select 'Y'.
-
  config EDAC_GHES
         bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
-       depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y)
-       default y
+       depends on ACPI_APEI_GHES && (EDAC=y)
         help
           Not all machines support hardware-driven error report. Some of those
           provide a BIOS-driven error report mechanism via ACPI, using the
@@ -97,7 +76,7 @@ config EDAC_GHES
  
  config EDAC_AMD64
         tristate "AMD64 (Opteron, Athlon64)"
-       depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE
+       depends on AMD_NB && EDAC_DECODE_MCE
         help
           Support for error detection and correction of DRAM ECC errors on
           the AMD64 families (>= K8) of memory controllers.
@@ -123,28 +102,28 @@ config EDAC_AMD64_ERROR_INJECTION
  
  config EDAC_AMD76X
         tristate "AMD 76x (760, 762, 768)"
-       depends on EDAC_MM_EDAC && PCI && X86_32
+       depends on PCI && X86_32
         help
           Support for error detection and correction on the AMD 76x
           series of chipsets used with the Athlon processor.
  
  config EDAC_E7XXX
         tristate "Intel e7xxx (e7205, e7500, e7501, e7505)"
-       depends on EDAC_MM_EDAC && PCI && X86_32
+       depends on PCI && X86_32
         help
           Support for error detection and correction on the Intel
           E7205, E7500, E7501 and E7505 server chipsets.
  
  config EDAC_E752X
         tristate "Intel e752x (e7520, e7525, e7320) and 3100"
-       depends on EDAC_MM_EDAC && PCI && X86
+       depends on PCI && X86
         help
           Support for error detection and correction on the Intel
           E7520, E7525, E7320 server chipsets.
  
  config EDAC_I82443BXGX
         tristate "Intel 82443BX/GX (440BX/GX)"
-       depends on EDAC_MM_EDAC && PCI && X86_32
+       depends on PCI && X86_32
         depends on BROKEN
         help
           Support for error detection and correction on the Intel
@@ -152,56 +131,56 @@ config EDAC_I82443BXGX
  
  config EDAC_I82875P
         tristate "Intel 82875p (D82875P, E7210)"
-       depends on EDAC_MM_EDAC && PCI && X86_32
+       depends on PCI && X86_32
         help
           Support for error detection and correction on the Intel
           DP82785P and E7210 server chipsets.
  
  config EDAC_I82975X
         tristate "Intel 82975x (D82975x)"
-       depends on EDAC_MM_EDAC && PCI && X86
+       depends on PCI && X86
         help
           Support for error detection and correction on the Intel
           DP82975x server chipsets.
  
  config EDAC_I3000
         tristate "Intel 3000/3010"
-       depends on EDAC_MM_EDAC && PCI && X86
+       depends on PCI && X86
         help
           Support for error detection and correction on the Intel
           3000 and 3010 server chipsets.
  
  config EDAC_I3200
         tristate "Intel 3200"
-       depends on EDAC_MM_EDAC && PCI && X86
+       depends on PCI && X86
         help
           Support for error detection and correction on the Intel
           3200 and 3210 server chipsets.
  
  config EDAC_IE31200
         tristate "Intel e312xx"
-       depends on EDAC_MM_EDAC && PCI && X86
+       depends on PCI && X86
         help
           Support for error detection and correction on the Intel
           E3-1200 based DRAM controllers.
  
  config EDAC_X38
         tristate "Intel X38"
-       depends on EDAC_MM_EDAC && PCI && X86
+       depends on PCI && X86
         help
           Support for error detection and correction on the Intel
           X38 server chipsets.
  
  config EDAC_I5400
         tristate "Intel 5400 (Seaburg) chipsets"
-       depends on EDAC_MM_EDAC && PCI && X86
+       depends on PCI && X86
         help
           Support for error detection and correction the Intel
           i5400 MCH chipset (Seaburg).
  
  config EDAC_I7CORE
         tristate "Intel i7 Core (Nehalem) processors"
-       depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
+       depends on PCI && X86 && X86_MCE_INTEL
         help
           Support for error detection and correction the Intel
           i7 Core (Nehalem) Integrated Memory Controller that exists on
@@ -210,87 +189,93 @@ config EDAC_I7CORE
  
  config EDAC_I82860
         tristate "Intel 82860"
-       depends on EDAC_MM_EDAC && PCI && X86_32
+       depends on PCI && X86_32
         help
           Support for error detection and correction on the Intel
           82860 chipset.
  
  config EDAC_R82600
         tristate "Radisys 82600 embedded chipset"
-       depends on EDAC_MM_EDAC && PCI && X86_32
+       depends on PCI && X86_32
         help
           Support for error detection and correction on the Radisys
           82600 embedded chipset.
  
  config EDAC_I5000
         tristate "Intel Greencreek/Blackford chipset"
-       depends on EDAC_MM_EDAC && X86 && PCI
+       depends on X86 && PCI
         help
           Support for error detection and correction the Intel
           Greekcreek/Blackford chipsets.
  
  config EDAC_I5100
         tristate "Intel San Clemente MCH"
-       depends on EDAC_MM_EDAC && X86 && PCI
+       depends on X86 && PCI
         help
           Support for error detection and correction the Intel
           San Clemente MCH.
  
  config EDAC_I7300
         tristate "Intel Clarksboro MCH"
-       depends on EDAC_MM_EDAC && X86 && PCI
+       depends on X86 && PCI
         help
           Support for error detection and correction the Intel
           Clarksboro MCH (Intel 7300 chipset).
  
  config EDAC_SBRIDGE
         tristate "Intel Sandy-Bridge/Ivy-Bridge/Haswell Integrated MC"
-       depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
-       depends on PCI_MMCONFIG
+       depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
         help
           Support for error detection and correction the Intel
           Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers.
  
  config EDAC_SKX
         tristate "Intel Skylake server Integrated MC"
-       depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
-       depends on PCI_MMCONFIG
+       depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
         help
           Support for error detection and correction the Intel
           Skylake server Integrated Memory Controllers.
  
+config EDAC_PND2
+       tristate "Intel Pondicherry2"
+       depends on PCI && X86_64 && X86_MCE_INTEL
+       help
+         Support for error detection and correction on the Intel
+         Pondicherry2 Integrated Memory Controller. This SoC IP is
+         first used on the Apollo Lake platform and Denverton
+         micro-server but may appear on others in the future.
+
  config EDAC_MPC85XX
         tristate "Freescale MPC83xx / MPC85xx"
-       depends on EDAC_MM_EDAC && FSL_SOC
+       depends on FSL_SOC
         help
           Support for error detection and correction on the Freescale
           MPC8349, MPC8560, MPC8540, MPC8548, T4240
  
  config EDAC_LAYERSCAPE
         tristate "Freescale Layerscape DDR"
-       depends on EDAC_MM_EDAC && ARCH_LAYERSCAPE
+       depends on ARCH_LAYERSCAPE
         help
           Support for error detection and correction on Freescale memory
           controllers on Layerscape SoCs.
  
  config EDAC_MV64X60
         tristate "Marvell MV64x60"
-       depends on EDAC_MM_EDAC && MV64X60
+       depends on MV64X60
         help
           Support for error detection and correction on the Marvell
           MV64360 and MV64460 chipsets.
  
  config EDAC_PASEMI
         tristate "PA Semi PWRficient"
-       depends on EDAC_MM_EDAC && PCI
-       depends on PPC_PASEMI
+       depends on PPC_PASEMI && PCI
         help
           Support for error detection and correction on PA Semi
           PWRficient.
  
  config EDAC_CELL
         tristate "Cell Broadband Engine memory controller"
-       depends on EDAC_MM_EDAC && PPC_CELL_COMMON
+       depends on PPC_CELL_COMMON
         help
           Support for error detection and correction on the
           Cell Broadband Engine internal memory controller
@@ -298,7 +283,7 @@ config EDAC_CELL
  
  config EDAC_PPC4XX
         tristate "PPC4xx IBM DDR2 Memory Controller"
-       depends on EDAC_MM_EDAC && 4xx
+       depends on 4xx
         help
           This enables support for EDAC on the ECC memory used
           with the IBM DDR2 memory controller found in various
@@ -307,7 +292,7 @@ config EDAC_PPC4XX
  
  config EDAC_AMD8131
         tristate "AMD8131 HyperTransport PCI-X Tunnel"
-       depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
+       depends on PCI && PPC_MAPLE
         help
           Support for error detection and correction on the
           AMD8131 HyperTransport PCI-X Tunnel chip.
@@ -316,7 +301,7 @@ config EDAC_AMD8131
  
  config EDAC_AMD8111
         tristate "AMD8111 HyperTransport I/O Hub"
-       depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
+       depends on PCI && PPC_MAPLE
         help
           Support for error detection and correction on the
           AMD8111 HyperTransport I/O Hub chip.
@@ -325,7 +310,7 @@ config EDAC_AMD8111
  
  config EDAC_CPC925
         tristate "IBM CPC925 Memory Controller (PPC970FX)"
-       depends on EDAC_MM_EDAC && PPC64
+       depends on PPC64
         help
           Support for error detection and correction on the
           IBM CPC925 Bridge and Memory Controller, which is
@@ -334,7 +319,7 @@ config EDAC_CPC925
  
  config EDAC_TILE
         tristate "Tilera Memory Controller"
-       depends on EDAC_MM_EDAC && TILE
+       depends on TILE
         default y
         help
           Support for error detection and correction on the
@@ -342,49 +327,59 @@ config EDAC_TILE
  
  config EDAC_HIGHBANK_MC
         tristate "Highbank Memory Controller"
-       depends on EDAC_MM_EDAC && ARCH_HIGHBANK
+       depends on ARCH_HIGHBANK
         help
           Support for error detection and correction on the
           Calxeda Highbank memory controller.
  
  config EDAC_HIGHBANK_L2
         tristate "Highbank L2 Cache"
-       depends on EDAC_MM_EDAC && ARCH_HIGHBANK
+       depends on ARCH_HIGHBANK
         help
           Support for error detection and correction on the
           Calxeda Highbank memory controller.
  
  config EDAC_OCTEON_PC
         tristate "Cavium Octeon Primary Caches"
-       depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON
+       depends on CPU_CAVIUM_OCTEON
         help
           Support for error detection and correction on the primary caches of
           the cnMIPS cores of Cavium Octeon family SOCs.
  
  config EDAC_OCTEON_L2C
         tristate "Cavium Octeon Secondary Caches (L2C)"
-       depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC
+       depends on CAVIUM_OCTEON_SOC
         help
           Support for error detection and correction on the
           Cavium Octeon family of SOCs.
  
  config EDAC_OCTEON_LMC
         tristate "Cavium Octeon DRAM Memory Controller (LMC)"
-       depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC
+       depends on CAVIUM_OCTEON_SOC
         help
           Support for error detection and correction on the
           Cavium Octeon family of SOCs.
  
  config EDAC_OCTEON_PCI
         tristate "Cavium Octeon PCI Controller"
-       depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC
+       depends on PCI && CAVIUM_OCTEON_SOC
         help
           Support for error detection and correction on the
           Cavium Octeon family of SOCs.
  
+config EDAC_THUNDERX
+       tristate "Cavium ThunderX EDAC"
+       depends on ARM64
+       depends on PCI
+       help
+         Support for error detection and correction on the
+         Cavium ThunderX memory controllers (LMC), Cache
+         Coherent Processor Interconnect (CCPI) and L2 cache
+         blocks (TAD, CBC, MCI).
+
  config EDAC_ALTERA
         bool "Altera SOCFPGA ECC"
-       depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA
+       depends on EDAC=y && ARCH_SOCFPGA
         help
           Support for error detection and correction on the
           Altera SOCs. This must be selected for SDRAM ECC.
@@ -450,14 +445,14 @@ config EDAC_ALTERA_SDMMC
  
  config EDAC_SYNOPSYS
         tristate "Synopsys DDR Memory Controller"
-       depends on EDAC_MM_EDAC && ARCH_ZYNQ
+       depends on ARCH_ZYNQ
         help
           Support for error detection and correction on the Synopsys DDR
           memory controller.
  
  config EDAC_XGENE
         tristate "APM X-Gene SoC"
-       depends on EDAC_MM_EDAC && (ARM64 || COMPILE_TEST)
+       depends on (ARM64 || COMPILE_TEST)
         help
           Support for error detection and correction on the
           APM X-Gene family of SOCs.
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile

index 88e472e8b9a918e36e65b4385c9c412f92c7cea3..0fd9ffa632996b2820756033dc4894ff4778e207 100644 (file)
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -6,8 +6,7 @@
  # GNU General Public License.
  #
  
-obj-$(CONFIG_EDAC)                     := edac_stub.o
-obj-$(CONFIG_EDAC_MM_EDAC)             += edac_core.o
+obj-$(CONFIG_EDAC)                     := edac_core.o
  
  edac_core-y    := edac_mc.o edac_device.o edac_mc_sysfs.o
  edac_core-y    += edac_module.o edac_device_sysfs.o wq.o
@@ -32,6 +31,7 @@ obj-$(CONFIG_EDAC_I7300)              += i7300_edac.o
  obj-$(CONFIG_EDAC_I7CORE)              += i7core_edac.o
  obj-$(CONFIG_EDAC_SBRIDGE)             += sb_edac.o
  obj-$(CONFIG_EDAC_SKX)                 += skx_edac.o
+obj-$(CONFIG_EDAC_PND2)                        += pnd2_edac.o
  obj-$(CONFIG_EDAC_E7XXX)               += e7xxx_edac.o
  obj-$(CONFIG_EDAC_E752X)               += e752x_edac.o
  obj-$(CONFIG_EDAC_I82443BXGX)          += i82443bxgx_edac.o
@@ -66,13 +66,14 @@ obj-$(CONFIG_EDAC_AMD8131)          += amd8131_edac.o
  
  obj-$(CONFIG_EDAC_TILE)                        += tile_edac.o
  
-obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o
-obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o
+obj-$(CONFIG_EDAC_HIGHBANK_MC)         += highbank_mc_edac.o
+obj-$(CONFIG_EDAC_HIGHBANK_L2)         += highbank_l2_edac.o
  
  obj-$(CONFIG_EDAC_OCTEON_PC)           += octeon_edac-pc.o
  obj-$(CONFIG_EDAC_OCTEON_L2C)          += octeon_edac-l2c.o
  obj-$(CONFIG_EDAC_OCTEON_LMC)          += octeon_edac-lmc.o
  obj-$(CONFIG_EDAC_OCTEON_PCI)          += octeon_edac-pci.o
+obj-$(CONFIG_EDAC_THUNDERX)            += thunderx_edac.o
  
  obj-$(CONFIG_EDAC_ALTERA)              += altera_edac.o
  obj-$(CONFIG_EDAC_SYNOPSYS)            += synopsys_edac.o
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c

index c5a5b91f37f0b67499af386ed4e7031d4938ac94..7717b094fabb663ac4816d69a16fd3a9a973b4e1 100644 (file)
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1023,13 +1023,23 @@ out:
         return ret;
  }
  
+static int socfpga_is_a10(void)
+{
+       return of_machine_is_compatible("altr,socfpga-arria10");
+}
+
  static int validate_parent_available(struct device_node *np);
  static const struct of_device_id altr_edac_a10_device_of_match[];
  static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
  {
         int irq;
-       struct device_node *child, *np = of_find_compatible_node(NULL, NULL,
-                                       "altr,socfpga-a10-ecc-manager");
+       struct device_node *child, *np;
+
+       if (!socfpga_is_a10())
+               return -ENODEV;
+
+       np = of_find_compatible_node(NULL, NULL,
+                                    "altr,socfpga-a10-ecc-manager");
         if (!np) {
                 edac_printk(KERN_ERR, EDAC_DEVICE, "ECC Manager not found\n");
                 return -ENODEV;
@@ -1545,8 +1555,12 @@ static const struct edac_device_prv_data a10_sdmmceccb_data = {
  static int __init socfpga_init_sdmmc_ecc(void)
  {
         int rc = -ENODEV;
-       struct device_node *child = of_find_compatible_node(NULL, NULL,
-                                               "altr,socfpga-sdmmc-ecc");
+       struct device_node *child;
+
+       if (!socfpga_is_a10())
+               return -ENODEV;
+
+       child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc");
         if (!child) {
                 edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n");
                 return -ENODEV;
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c

index e5573c56b15e092df0eb0edf5d0d27490910f287..480072139b7aa00db3d27d9381c0e6eace04835e 100644 (file)
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -40,6 +40,11 @@
  #define edac_atomic_scrub(va, size) do { } while (0)
  #endif
  
+int edac_op_state = EDAC_OPSTATE_INVAL;
+EXPORT_SYMBOL_GPL(edac_op_state);
+
+static int edac_report = EDAC_REPORTING_ENABLED;
+
  /* lock to memory controller's control array */
  static DEFINE_MUTEX(mem_ctls_mutex);
  static LIST_HEAD(mc_devices);
@@ -52,6 +57,65 @@ static void const *edac_mc_owner;
  
  static struct bus_type mc_bus[EDAC_MAX_MCS];
  
+int edac_get_report_status(void)
+{
+       return edac_report;
+}
+EXPORT_SYMBOL_GPL(edac_get_report_status);
+
+void edac_set_report_status(int new)
+{
+       if (new == EDAC_REPORTING_ENABLED ||
+           new == EDAC_REPORTING_DISABLED ||
+           new == EDAC_REPORTING_FORCE)
+               edac_report = new;
+}
+EXPORT_SYMBOL_GPL(edac_set_report_status);
+
+static int edac_report_set(const char *str, const struct kernel_param *kp)
+{
+       if (!str)
+               return -EINVAL;
+
+       if (!strncmp(str, "on", 2))
+               edac_report = EDAC_REPORTING_ENABLED;
+       else if (!strncmp(str, "off", 3))
+               edac_report = EDAC_REPORTING_DISABLED;
+       else if (!strncmp(str, "force", 5))
+               edac_report = EDAC_REPORTING_FORCE;
+
+       return 0;
+}
+
+static int edac_report_get(char *buffer, const struct kernel_param *kp)
+{
+       int ret = 0;
+
+       switch (edac_report) {
+       case EDAC_REPORTING_ENABLED:
+               ret = sprintf(buffer, "on");
+               break;
+       case EDAC_REPORTING_DISABLED:
+               ret = sprintf(buffer, "off");
+               break;
+       case EDAC_REPORTING_FORCE:
+               ret = sprintf(buffer, "force");
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
+static const struct kernel_param_ops edac_report_ops = {
+       .set = edac_report_set,
+       .get = edac_report_get,
+};
+
+module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644);
+
  unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
                                  unsigned len)
  {
@@ -504,22 +568,6 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev)
  }
  EXPORT_SYMBOL_GPL(find_mci_by_dev);
  
-/*
- * handler for EDAC to check if NMI type handler has asserted interrupt
- */
-static int edac_mc_assert_error_check_and_clear(void)
-{
-       int old_state;
-
-       if (edac_op_state == EDAC_OPSTATE_POLL)
-               return 1;
-
-       old_state = edac_err_assert;
-       edac_err_assert = 0;
-
-       return old_state;
-}
-
  /*
   * edac_mc_workq_function
   *     performs the operation scheduled by a workq request
@@ -536,7 +584,7 @@ static void edac_mc_workq_function(struct work_struct *work_req)
                 return;
         }
  
-       if (edac_mc_assert_error_check_and_clear())
+       if (edac_op_state == EDAC_OPSTATE_POLL)
                 mci->edac_check(mci);
  
         mutex_unlock(&mem_ctls_mutex);
@@ -601,7 +649,6 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
         }
  
         list_add_tail_rcu(&mci->link, insert_before);
-       atomic_inc(&edac_handlers);
         return 0;
  
  fail0:
@@ -619,7 +666,6 @@ fail1:
  
  static int del_mc_from_global_list(struct mem_ctl_info *mci)
  {
-       int handlers = atomic_dec_return(&edac_handlers);
         list_del_rcu(&mci->link);
  
         /* these are for safe removal of devices from global list while
@@ -628,7 +674,7 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci)
         synchronize_rcu();
         INIT_LIST_HEAD(&mci->link);
  
-       return handlers;
+       return list_empty(&mc_devices);
  }
  
  struct mem_ctl_info *edac_mc_find(int idx)
@@ -763,7 +809,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
         /* mark MCI offline: */
         mci->op_state = OP_OFFLINE;
  
-       if (!del_mc_from_global_list(mci))
+       if (del_mc_from_global_list(mci))
                 edac_mc_owner = NULL;
  
         mutex_unlock(&mem_ctls_mutex);
@@ -1195,10 +1241,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
  
         /* Report the error via the trace interface */
         grain_bits = fls_long(e->grain) + 1;
-       trace_mc_event(type, e->msg, e->label, e->error_count,
-                      mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
-                      (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
-                      grain_bits, e->syndrome, e->other_detail);
+
+       if (IS_ENABLED(CONFIG_RAS))
+               trace_mc_event(type, e->msg, e->label, e->error_count,
+                              mci->mc_idx, e->top_layer, e->mid_layer,
+                              e->low_layer,
+                              (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
+                              grain_bits, e->syndrome, e->other_detail);
  
         edac_raw_mc_handle_error(type, mci, e);
  }
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c

deleted file mode 100644 (file)

index 952e411..0000000
--- a/drivers/edac/edac_stub.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * common EDAC components that must be in kernel
- *
- * Author: Dave Jiang <djiang@mvista.com>
- *
- * 2007 (c) MontaVista Software, Inc.
- * 2010 (c) Advanced Micro Devices Inc.
- *         Borislav Petkov <bp@alien8.de>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- *
- */
-#include <linux/module.h>
-#include <linux/edac.h>
-#include <linux/atomic.h>
-#include <linux/device.h>
-
-int edac_op_state = EDAC_OPSTATE_INVAL;
-EXPORT_SYMBOL_GPL(edac_op_state);
-
-atomic_t edac_handlers = ATOMIC_INIT(0);
-EXPORT_SYMBOL_GPL(edac_handlers);
-
-int edac_err_assert = 0;
-EXPORT_SYMBOL_GPL(edac_err_assert);
-
-int edac_report_status = EDAC_REPORTING_ENABLED;
-EXPORT_SYMBOL_GPL(edac_report_status);
-
-static int __init edac_report_setup(char *str)
-{
-       if (!str)
-               return -EINVAL;
-
-       if (!strncmp(str, "on", 2))
-               set_edac_report_status(EDAC_REPORTING_ENABLED);
-       else if (!strncmp(str, "off", 3))
-               set_edac_report_status(EDAC_REPORTING_DISABLED);
-       else if (!strncmp(str, "force", 5))
-               set_edac_report_status(EDAC_REPORTING_FORCE);
-
-       return 0;
-}
-__setup("edac_report=", edac_report_setup);
-
-/*
- * called to determine if there is an EDAC driver interested in
- * knowing an event (such as NMI) occurred
- */
-int edac_handler_set(void)
-{
-       if (edac_op_state == EDAC_OPSTATE_POLL)
-               return 0;
-
-       return atomic_read(&edac_handlers);
-}
-EXPORT_SYMBOL_GPL(edac_handler_set);
-
-/*
- * handler for NMI type of interrupts to assert error
- */
-void edac_atomic_assert_error(void)
-{
-       edac_err_assert++;
-}
-EXPORT_SYMBOL_GPL(edac_atomic_assert_error);
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c

index 1670d27bcac82d51cbb3de30d75e261100ddb83e..f683919981b06730090c2a11b98d45a5c0713944 100644 (file)
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -1293,7 +1293,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci)
                         dimm->mtype = MEM_FB_DDR2;
  
                         /* ask what device type on this row */
-                       if (MTR_DRAM_WIDTH(mtr))
+                       if (MTR_DRAM_WIDTH(mtr) == 8)
                                 dimm->dtype = DEV_X8;
                         else
                                 dimm->dtype = DEV_X4;
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c

index abf6ef22e220602f48559504ec18e77c5115de5b..37a9ba71da449bab30c12438325ecc419d9fa3e8 100644 (file)
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -1207,13 +1207,14 @@ static int i5400_init_dimms(struct mem_ctl_info *mci)
  
                         dimm->nr_pages = size_mb << 8;
                         dimm->grain = 8;
-                       dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4;
+                       dimm->dtype = MTR_DRAM_WIDTH(mtr) == 8 ?
+                                     DEV_X8 : DEV_X4;
                         dimm->mtype = MEM_FB_DDR2;
                         /*
                          * The eccc mechanism is SDDC (aka SECC), with
                          * is similar to Chipkill.
                          */
-                       dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ?
+                       dimm->edac_mode = MTR_DRAM_WIDTH(mtr) == 8 ?
                                           EDAC_S8ECD8ED : EDAC_S4ECD4ED;
                         ndimms++;
                 }
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c

new file mode 100644 (file)

index 0000000..1cad5a9
--- /dev/null
+++ b/drivers/edac/pnd2_edac.c
@@ -0,0 +1,1546 @@
+/*
+ * Driver for Pondicherry2 memory controller.
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * [Derived from sb_edac.c]
+ *
+ * Translation of system physical addresses to DIMM addresses
+ * is a two stage process:
+ *
+ * First the Pondicherry 2 memory controller handles slice and channel interleaving
+ * in "sys2pmi()". This is (almost) completley common between platforms.
+ *
+ * Then a platform specific dunit (DIMM unit) completes the process to provide DIMM,
+ * rank, bank, row and column using the appropriate "dunit_ops" functions/parameters.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/edac.h>
+#include <linux/mmzone.h>
+#include <linux/smp.h>
+#include <linux/bitmap.h>
+#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/processor.h>
+#include <asm/mce.h>
+
+#include "edac_mc.h"
+#include "edac_module.h"
+#include "pnd2_edac.h"
+
+#define APL_NUM_CHANNELS       4
+#define DNV_NUM_CHANNELS       2
+#define DNV_MAX_DIMMS          2 /* Max DIMMs per channel */
+
+enum type {
+       APL,
+       DNV, /* All requests go to PMI CH0 on each slice (CH1 disabled) */
+};
+
+struct dram_addr {
+       int chan;
+       int dimm;
+       int rank;
+       int bank;
+       int row;
+       int col;
+};
+
+struct pnd2_pvt {
+       int dimm_geom[APL_NUM_CHANNELS];
+       u64 tolm, tohm;
+};
+
+/*
+ * System address space is divided into multiple regions with
+ * different interleave rules in each. The as0/as1 regions
+ * have no interleaving at all. The as2 region is interleaved
+ * between two channels. The mot region is magic and may overlap
+ * other regions, with its interleave rules taking precedence.
+ * Addresses not in any of these regions are interleaved across
+ * all four channels.
+ */
+static struct region {
+       u64     base;
+       u64     limit;
+       u8      enabled;
+} mot, as0, as1, as2;
+
+static struct dunit_ops {
+       char *name;
+       enum type type;
+       int pmiaddr_shift;
+       int pmiidx_shift;
+       int channels;
+       int dimms_per_channel;
+       int (*rd_reg)(int port, int off, int op, void *data, size_t sz, char *name);
+       int (*get_registers)(void);
+       int (*check_ecc)(void);
+       void (*mk_region)(char *name, struct region *rp, void *asym);
+       void (*get_dimm_config)(struct mem_ctl_info *mci);
+       int (*pmi2mem)(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx,
+                                  struct dram_addr *daddr, char *msg);
+} *ops;
+
+static struct mem_ctl_info *pnd2_mci;
+
+#define PND2_MSG_SIZE  256
+
+/* Debug macros */
+#define pnd2_printk(level, fmt, arg...)                        \
+       edac_printk(level, "pnd2", fmt, ##arg)
+
+#define pnd2_mc_printk(mci, level, fmt, arg...)        \
+       edac_mc_chipset_printk(mci, level, "pnd2", fmt, ##arg)
+
+#define MOT_CHAN_INTLV_BIT_1SLC_2CH 12
+#define MOT_CHAN_INTLV_BIT_2SLC_2CH 13
+#define SELECTOR_DISABLED (-1)
+#define _4GB (1ul << 32)
+
+#define PMI_ADDRESS_WIDTH      31
+#define PND_MAX_PHYS_BIT       39
+
+#define APL_ASYMSHIFT          28
+#define DNV_ASYMSHIFT          31
+#define CH_HASH_MASK_LSB       6
+#define SLICE_HASH_MASK_LSB    6
+#define MOT_SLC_INTLV_BIT      12
+#define LOG2_PMI_ADDR_GRANULARITY      5
+#define MOT_SHIFT      24
+
+#define GET_BITFIELD(v, lo, hi)        (((v) & GENMASK_ULL(hi, lo)) >> (lo))
+#define U64_LSHIFT(val, s)     ((u64)(val) << (s))
+
+#ifdef CONFIG_X86_INTEL_SBI_APL
+#include "linux/platform_data/sbi_apl.h"
+int sbi_send(int port, int off, int op, u32 *data)
+{
+       struct sbi_apl_message sbi_arg;
+       int ret, read = 0;
+
+       memset(&sbi_arg, 0, sizeof(sbi_arg));
+
+       if (op == 0 || op == 4 || op == 6)
+               read = 1;
+       else
+               sbi_arg.data = *data;
+
+       sbi_arg.opcode = op;
+       sbi_arg.port_address = port;
+       sbi_arg.register_offset = off;
+       ret = sbi_apl_commit(&sbi_arg);
+       if (ret || sbi_arg.status)
+               edac_dbg(2, "sbi_send status=%d ret=%d data=%x\n",
+                                sbi_arg.status, ret, sbi_arg.data);
+
+       if (ret == 0)
+               ret = sbi_arg.status;
+
+       if (ret == 0 && read)
+               *data = sbi_arg.data;
+
+       return ret;
+}
+#else
+int sbi_send(int port, int off, int op, u32 *data)
+{
+       return -EUNATCH;
+}
+#endif
+
+static int apl_rd_reg(int port, int off, int op, void *data, size_t sz, char *name)
+{
+       int     ret = 0;
+
+       edac_dbg(2, "Read %s port=%x off=%x op=%x\n", name, port, off, op);
+       switch (sz) {
+       case 8:
+               ret = sbi_send(port, off + 4, op, (u32 *)(data + 4));
+       case 4:
+               ret = sbi_send(port, off, op, (u32 *)data);
+               pnd2_printk(KERN_DEBUG, "%s=%x%08x ret=%d\n", name,
+                                       sz == 8 ? *((u32 *)(data + 4)) : 0, *((u32 *)data), ret);
+               break;
+       }
+
+       return ret;
+}
+
+static u64 get_mem_ctrl_hub_base_addr(void)
+{
+       struct b_cr_mchbar_lo_pci lo;
+       struct b_cr_mchbar_hi_pci hi;
+       struct pci_dev *pdev;
+
+       pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL);
+       if (pdev) {
+               pci_read_config_dword(pdev, 0x48, (u32 *)&lo);
+               pci_read_config_dword(pdev, 0x4c, (u32 *)&hi);
+               pci_dev_put(pdev);
+       } else {
+               return 0;
+       }
+
+       if (!lo.enable) {
+               edac_dbg(2, "MMIO via memory controller hub base address is disabled!\n");
+               return 0;
+       }
+
+       return U64_LSHIFT(hi.base, 32) | U64_LSHIFT(lo.base, 15);
+}
+
+static u64 get_sideband_reg_base_addr(void)
+{
+       struct pci_dev *pdev;
+       u32 hi, lo;
+
+       pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x19dd, NULL);
+       if (pdev) {
+               pci_read_config_dword(pdev, 0x10, &lo);
+               pci_read_config_dword(pdev, 0x14, &hi);
+               pci_dev_put(pdev);
+               return (U64_LSHIFT(hi, 32) | U64_LSHIFT(lo, 0));
+       } else {
+               return 0xfd000000;
+       }
+}
+
+static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *name)
+{
+       struct pci_dev *pdev;
+       char *base;
+       u64 addr;
+
+       if (op == 4) {
+               pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL);
+               if (!pdev)
+                       return -ENODEV;
+
+               pci_read_config_dword(pdev, off, data);
+               pci_dev_put(pdev);
+       } else {
+               /* MMIO via memory controller hub base address */
+               if (op == 0 && port == 0x4c) {
+                       addr = get_mem_ctrl_hub_base_addr();
+                       if (!addr)
+                               return -ENODEV;
+               } else {
+                       /* MMIO via sideband register base address */
+                       addr = get_sideband_reg_base_addr();
+                       if (!addr)
+                               return -ENODEV;
+                       addr += (port << 16);
+               }
+
+               base = ioremap((resource_size_t)addr, 0x10000);
+               if (!base)
+                       return -ENODEV;
+
+               if (sz == 8)
+                       *(u32 *)(data + 4) = *(u32 *)(base + off + 4);
+               *(u32 *)data = *(u32 *)(base + off);
+
+               iounmap(base);
+       }
+
+       edac_dbg(2, "Read %s=%.8x_%.8x\n", name,
+                       (sz == 8) ? *(u32 *)(data + 4) : 0, *(u32 *)data);
+
+       return 0;
+}
+
+#define RD_REGP(regp, regname, port)   \
+       ops->rd_reg(port,                                       \
+               regname##_offset,                               \
+               regname##_r_opcode,                             \
+               regp, sizeof(struct regname),   \
+               #regname)
+
+#define RD_REG(regp, regname)                  \
+       ops->rd_reg(regname ## _port,           \
+               regname##_offset,                               \
+               regname##_r_opcode,                             \
+               regp, sizeof(struct regname),   \
+               #regname)
+
+static u64 top_lm, top_hm;
+static bool two_slices;
+static bool two_channels; /* Both PMI channels in one slice enabled */
+
+static u8 sym_chan_mask;
+static u8 asym_chan_mask;
+static u8 chan_mask;
+
+static int slice_selector = -1;
+static int chan_selector = -1;
+static u64 slice_hash_mask;
+static u64 chan_hash_mask;
+
+static void mk_region(char *name, struct region *rp, u64 base, u64 limit)
+{
+       rp->enabled = 1;
+       rp->base = base;
+       rp->limit = limit;
+       edac_dbg(2, "Region:%s [%llx, %llx]\n", name, base, limit);
+}
+
+static void mk_region_mask(char *name, struct region *rp, u64 base, u64 mask)
+{
+       if (mask == 0) {
+               pr_info(FW_BUG "MOT mask cannot be zero\n");
+               return;
+       }
+       if (mask != GENMASK_ULL(PND_MAX_PHYS_BIT, __ffs(mask))) {
+               pr_info(FW_BUG "MOT mask not power of two\n");
+               return;
+       }
+       if (base & ~mask) {
+               pr_info(FW_BUG "MOT region base/mask alignment error\n");
+               return;
+       }
+       rp->base = base;
+       rp->limit = (base | ~mask) & GENMASK_ULL(PND_MAX_PHYS_BIT, 0);
+       rp->enabled = 1;
+       edac_dbg(2, "Region:%s [%llx, %llx]\n", name, base, rp->limit);
+}
+
+static bool in_region(struct region *rp, u64 addr)
+{
+       if (!rp->enabled)
+               return false;
+
+       return rp->base <= addr && addr <= rp->limit;
+}
+
+static int gen_sym_mask(struct b_cr_slice_channel_hash *p)
+{
+       int mask = 0;
+
+       if (!p->slice_0_mem_disabled)
+               mask |= p->sym_slice0_channel_enabled;
+
+       if (!p->slice_1_disabled)
+               mask |= p->sym_slice1_channel_enabled << 2;
+
+       if (p->ch_1_disabled || p->enable_pmi_dual_data_mode)
+               mask &= 0x5;
+
+       return mask;
+}
+
+static int gen_asym_mask(struct b_cr_slice_channel_hash *p,
+                        struct b_cr_asym_mem_region0_mchbar *as0,
+                        struct b_cr_asym_mem_region1_mchbar *as1,
+                        struct b_cr_asym_2way_mem_region_mchbar *as2way)
+{
+       const int intlv[] = { 0x5, 0xA, 0x3, 0xC };
+       int mask = 0;
+
+       if (as2way->asym_2way_interleave_enable)
+               mask = intlv[as2way->asym_2way_intlv_mode];
+       if (as0->slice0_asym_enable)
+               mask |= (1 << as0->slice0_asym_channel_select);
+       if (as1->slice1_asym_enable)
+               mask |= (4 << as1->slice1_asym_channel_select);
+       if (p->slice_0_mem_disabled)
+               mask &= 0xc;
+       if (p->slice_1_disabled)
+               mask &= 0x3;
+       if (p->ch_1_disabled || p->enable_pmi_dual_data_mode)
+               mask &= 0x5;
+
+       return mask;
+}
+
+static struct b_cr_tolud_pci tolud;
+static struct b_cr_touud_lo_pci touud_lo;
+static struct b_cr_touud_hi_pci touud_hi;
+static struct b_cr_asym_mem_region0_mchbar asym0;
+static struct b_cr_asym_mem_region1_mchbar asym1;
+static struct b_cr_asym_2way_mem_region_mchbar asym_2way;
+static struct b_cr_mot_out_base_mchbar mot_base;
+static struct b_cr_mot_out_mask_mchbar mot_mask;
+static struct b_cr_slice_channel_hash chash;
+
+/* Apollo Lake dunit */
+/*
+ * Validated on board with just two DIMMs in the [0] and [2] positions
+ * in this array. Other port number matches documentation, but caution
+ * advised.
+ */
+static const int apl_dports[APL_NUM_CHANNELS] = { 0x18, 0x10, 0x11, 0x19 };
+static struct d_cr_drp0 drp0[APL_NUM_CHANNELS];
+
+/* Denverton dunit */
+static const int dnv_dports[DNV_NUM_CHANNELS] = { 0x10, 0x12 };
+static struct d_cr_dsch dsch;
+static struct d_cr_ecc_ctrl ecc_ctrl[DNV_NUM_CHANNELS];
+static struct d_cr_drp drp[DNV_NUM_CHANNELS];
+static struct d_cr_dmap dmap[DNV_NUM_CHANNELS];
+static struct d_cr_dmap1 dmap1[DNV_NUM_CHANNELS];
+static struct d_cr_dmap2 dmap2[DNV_NUM_CHANNELS];
+static struct d_cr_dmap3 dmap3[DNV_NUM_CHANNELS];
+static struct d_cr_dmap4 dmap4[DNV_NUM_CHANNELS];
+static struct d_cr_dmap5 dmap5[DNV_NUM_CHANNELS];
+
+static void apl_mk_region(char *name, struct region *rp, void *asym)
+{
+       struct b_cr_asym_mem_region0_mchbar *a = asym;
+
+       mk_region(name, rp,
+                         U64_LSHIFT(a->slice0_asym_base, APL_ASYMSHIFT),
+                         U64_LSHIFT(a->slice0_asym_limit, APL_ASYMSHIFT) +
+                         GENMASK_ULL(APL_ASYMSHIFT - 1, 0));
+}
+
+static void dnv_mk_region(char *name, struct region *rp, void *asym)
+{
+       struct b_cr_asym_mem_region_denverton *a = asym;
+
+       mk_region(name, rp,
+                         U64_LSHIFT(a->slice_asym_base, DNV_ASYMSHIFT),
+                         U64_LSHIFT(a->slice_asym_limit, DNV_ASYMSHIFT) +
+                         GENMASK_ULL(DNV_ASYMSHIFT - 1, 0));
+}
+
+static int apl_get_registers(void)
+{
+       int i;
+
+       if (RD_REG(&asym_2way, b_cr_asym_2way_mem_region_mchbar))
+               return -ENODEV;
+
+       for (i = 0; i < APL_NUM_CHANNELS; i++)
+               if (RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i]))
+                       return -ENODEV;
+
+       return 0;
+}
+
+static int dnv_get_registers(void)
+{
+       int i;
+
+       if (RD_REG(&dsch, d_cr_dsch))
+               return -ENODEV;
+
+       for (i = 0; i < DNV_NUM_CHANNELS; i++)
+               if (RD_REGP(&ecc_ctrl[i], d_cr_ecc_ctrl, dnv_dports[i]) ||
+                       RD_REGP(&drp[i], d_cr_drp, dnv_dports[i]) ||
+                       RD_REGP(&dmap[i], d_cr_dmap, dnv_dports[i]) ||
+                       RD_REGP(&dmap1[i], d_cr_dmap1, dnv_dports[i]) ||
+                       RD_REGP(&dmap2[i], d_cr_dmap2, dnv_dports[i]) ||
+                       RD_REGP(&dmap3[i], d_cr_dmap3, dnv_dports[i]) ||
+                       RD_REGP(&dmap4[i], d_cr_dmap4, dnv_dports[i]) ||
+                       RD_REGP(&dmap5[i], d_cr_dmap5, dnv_dports[i]))
+                       return -ENODEV;
+
+       return 0;
+}
+
+/*
+ * Read all the h/w config registers once here (they don't
+ * change at run time. Figure out which address ranges have
+ * which interleave characteristics.
+ */
+static int get_registers(void)
+{
+       const int intlv[] = { 10, 11, 12, 12 };
+
+       if (RD_REG(&tolud, b_cr_tolud_pci) ||
+               RD_REG(&touud_lo, b_cr_touud_lo_pci) ||
+               RD_REG(&touud_hi, b_cr_touud_hi_pci) ||
+               RD_REG(&asym0, b_cr_asym_mem_region0_mchbar) ||
+               RD_REG(&asym1, b_cr_asym_mem_region1_mchbar) ||
+               RD_REG(&mot_base, b_cr_mot_out_base_mchbar) ||
+               RD_REG(&mot_mask, b_cr_mot_out_mask_mchbar) ||
+               RD_REG(&chash, b_cr_slice_channel_hash))
+               return -ENODEV;
+
+       if (ops->get_registers())
+               return -ENODEV;
+
+       if (ops->type == DNV) {
+               /* PMI channel idx (always 0) for asymmetric region */
+               asym0.slice0_asym_channel_select = 0;
+               asym1.slice1_asym_channel_select = 0;
+               /* PMI channel bitmap (always 1) for symmetric region */
+               chash.sym_slice0_channel_enabled = 0x1;
+               chash.sym_slice1_channel_enabled = 0x1;
+       }
+
+       if (asym0.slice0_asym_enable)
+               ops->mk_region("as0", &as0, &asym0);
+
+       if (asym1.slice1_asym_enable)
+               ops->mk_region("as1", &as1, &asym1);
+
+       if (asym_2way.asym_2way_interleave_enable) {
+               mk_region("as2way", &as2,
+                                 U64_LSHIFT(asym_2way.asym_2way_base, APL_ASYMSHIFT),
+                                 U64_LSHIFT(asym_2way.asym_2way_limit, APL_ASYMSHIFT) +
+                                 GENMASK_ULL(APL_ASYMSHIFT - 1, 0));
+       }
+
+       if (mot_base.imr_en) {
+               mk_region_mask("mot", &mot,
+                                          U64_LSHIFT(mot_base.mot_out_base, MOT_SHIFT),
+                                          U64_LSHIFT(mot_mask.mot_out_mask, MOT_SHIFT));
+       }
+
+       top_lm = U64_LSHIFT(tolud.tolud, 20);
+       top_hm = U64_LSHIFT(touud_hi.touud, 32) | U64_LSHIFT(touud_lo.touud, 20);
+
+       two_slices = !chash.slice_1_disabled &&
+                                !chash.slice_0_mem_disabled &&
+                                (chash.sym_slice0_channel_enabled != 0) &&
+                                (chash.sym_slice1_channel_enabled != 0);
+       two_channels = !chash.ch_1_disabled &&
+                                !chash.enable_pmi_dual_data_mode &&
+                                ((chash.sym_slice0_channel_enabled == 3) ||
+                                (chash.sym_slice1_channel_enabled == 3));
+
+       sym_chan_mask = gen_sym_mask(&chash);
+       asym_chan_mask = gen_asym_mask(&chash, &asym0, &asym1, &asym_2way);
+       chan_mask = sym_chan_mask | asym_chan_mask;
+
+       if (two_slices && !two_channels) {
+               if (chash.hvm_mode)
+                       slice_selector = 29;
+               else
+                       slice_selector = intlv[chash.interleave_mode];
+       } else if (!two_slices && two_channels) {
+               if (chash.hvm_mode)
+                       chan_selector = 29;
+               else
+                       chan_selector = intlv[chash.interleave_mode];
+       } else if (two_slices && two_channels) {
+               if (chash.hvm_mode) {
+                       slice_selector = 29;
+                       chan_selector = 30;
+               } else {
+                       slice_selector = intlv[chash.interleave_mode];
+                       chan_selector = intlv[chash.interleave_mode] + 1;
+               }
+       }
+
+       if (two_slices) {
+               if (!chash.hvm_mode)
+                       slice_hash_mask = chash.slice_hash_mask << SLICE_HASH_MASK_LSB;
+               if (!two_channels)
+                       slice_hash_mask |= BIT_ULL(slice_selector);
+       }
+
+       if (two_channels) {
+               if (!chash.hvm_mode)
+                       chan_hash_mask = chash.ch_hash_mask << CH_HASH_MASK_LSB;
+               if (!two_slices)
+                       chan_hash_mask |= BIT_ULL(chan_selector);
+       }
+
+       return 0;
+}
+
+/* Get a contiguous memory address (remove the MMIO gap) */
+static u64 remove_mmio_gap(u64 sys)
+{
+       return (sys < _4GB) ? sys : sys - (_4GB - top_lm);
+}
+
+/* Squeeze out one address bit, shift upper part down to fill gap */
+static void remove_addr_bit(u64 *addr, int bitidx)
+{
+       u64     mask;
+
+       if (bitidx == -1)
+               return;
+
+       mask = (1ull << bitidx) - 1;
+       *addr = ((*addr >> 1) & ~mask) | (*addr & mask);
+}
+
+/* XOR all the bits from addr specified in mask */
+static int hash_by_mask(u64 addr, u64 mask)
+{
+       u64 result = addr & mask;
+
+       result = (result >> 32) ^ result;
+       result = (result >> 16) ^ result;
+       result = (result >> 8) ^ result;
+       result = (result >> 4) ^ result;
+       result = (result >> 2) ^ result;
+       result = (result >> 1) ^ result;
+
+       return (int)result & 1;
+}
+
+/*
+ * First stage decode. Take the system address and figure out which
+ * second stage will deal with it based on interleave modes.
+ */
+static int sys2pmi(const u64 addr, u32 *pmiidx, u64 *pmiaddr, char *msg)
+{
+       u64 contig_addr, contig_base, contig_offset, contig_base_adj;
+       int mot_intlv_bit = two_slices ? MOT_CHAN_INTLV_BIT_2SLC_2CH :
+                                               MOT_CHAN_INTLV_BIT_1SLC_2CH;
+       int slice_intlv_bit_rm = SELECTOR_DISABLED;
+       int chan_intlv_bit_rm = SELECTOR_DISABLED;
+       /* Determine if address is in the MOT region. */
+       bool mot_hit = in_region(&mot, addr);
+       /* Calculate the number of symmetric regions enabled. */
+       int sym_channels = hweight8(sym_chan_mask);
+
+       /*
+        * The amount we need to shift the asym base can be determined by the
+        * number of enabled symmetric channels.
+        * NOTE: This can only work because symmetric memory is not supposed
+        * to do a 3-way interleave.
+        */
+       int sym_chan_shift = sym_channels >> 1;
+
+       /* Give up if address is out of range, or in MMIO gap */
+       if (addr >= (1ul << PND_MAX_PHYS_BIT) ||
+          (addr >= top_lm && addr < _4GB) || addr >= top_hm) {
+               snprintf(msg, PND2_MSG_SIZE, "Error address 0x%llx is not DRAM", addr);
+               return -EINVAL;
+       }
+
+       /* Get a contiguous memory address (remove the MMIO gap) */
+       contig_addr = remove_mmio_gap(addr);
+
+       if (in_region(&as0, addr)) {
+               *pmiidx = asym0.slice0_asym_channel_select;
+
+               contig_base = remove_mmio_gap(as0.base);
+               contig_offset = contig_addr - contig_base;
+               contig_base_adj = (contig_base >> sym_chan_shift) *
+                                                 ((chash.sym_slice0_channel_enabled >> (*pmiidx & 1)) & 1);
+               contig_addr = contig_offset + ((sym_channels > 0) ? contig_base_adj : 0ull);
+       } else if (in_region(&as1, addr)) {
+               *pmiidx = 2u + asym1.slice1_asym_channel_select;
+
+               contig_base = remove_mmio_gap(as1.base);
+               contig_offset = contig_addr - contig_base;
+               contig_base_adj = (contig_base >> sym_chan_shift) *
+                                                 ((chash.sym_slice1_channel_enabled >> (*pmiidx & 1)) & 1);
+               contig_addr = contig_offset + ((sym_channels > 0) ? contig_base_adj : 0ull);
+       } else if (in_region(&as2, addr) && (asym_2way.asym_2way_intlv_mode == 0x3ul)) {
+               bool channel1;
+
+               mot_intlv_bit = MOT_CHAN_INTLV_BIT_1SLC_2CH;
+               *pmiidx = (asym_2way.asym_2way_intlv_mode & 1) << 1;
+               channel1 = mot_hit ? ((bool)((addr >> mot_intlv_bit) & 1)) :
+                       hash_by_mask(contig_addr, chan_hash_mask);
+               *pmiidx |= (u32)channel1;
+
+               contig_base = remove_mmio_gap(as2.base);
+               chan_intlv_bit_rm = mot_hit ? mot_intlv_bit : chan_selector;
+               contig_offset = contig_addr - contig_base;
+               remove_addr_bit(&contig_offset, chan_intlv_bit_rm);
+               contig_addr = (contig_base >> sym_chan_shift) + contig_offset;
+       } else {
+               /* Otherwise we're in normal, boring symmetric mode. */
+               *pmiidx = 0u;
+
+               if (two_slices) {
+                       bool slice1;
+
+                       if (mot_hit) {
+                               slice_intlv_bit_rm = MOT_SLC_INTLV_BIT;
+                               slice1 = (addr >> MOT_SLC_INTLV_BIT) & 1;
+                       } else {
+                               slice_intlv_bit_rm = slice_selector;
+                               slice1 = hash_by_mask(addr, slice_hash_mask);
+                       }
+
+                       *pmiidx = (u32)slice1 << 1;
+               }
+
+               if (two_channels) {
+                       bool channel1;
+
+                       mot_intlv_bit = two_slices ? MOT_CHAN_INTLV_BIT_2SLC_2CH :
+                                                       MOT_CHAN_INTLV_BIT_1SLC_2CH;
+
+                       if (mot_hit) {
+                               chan_intlv_bit_rm = mot_intlv_bit;
+                               channel1 = (addr >> mot_intlv_bit) & 1;
+                       } else {
+                               chan_intlv_bit_rm = chan_selector;
+                               channel1 = hash_by_mask(contig_addr, chan_hash_mask);
+                       }
+
+                       *pmiidx |= (u32)channel1;
+               }
+       }
+
+       /* Remove the chan_selector bit first */
+       remove_addr_bit(&contig_addr, chan_intlv_bit_rm);
+       /* Remove the slice bit (we remove it second because it must be lower */
+       remove_addr_bit(&contig_addr, slice_intlv_bit_rm);
+       *pmiaddr = contig_addr;
+
+       return 0;
+}
+
+/* Translate PMI address to memory (rank, row, bank, column) */
+#define C(n) (0x10 | (n))      /* column */
+#define B(n) (0x20 | (n))      /* bank */
+#define R(n) (0x40 | (n))      /* row */
+#define RS   (0x80)                    /* rank */
+
+/* addrdec values */
+#define AMAP_1KB       0
+#define AMAP_2KB       1
+#define AMAP_4KB       2
+#define AMAP_RSVD      3
+
+/* dden values */
+#define DEN_4Gb                0
+#define DEN_8Gb                2
+
+/* dwid values */
+#define X8             0
+#define X16            1
+
+static struct dimm_geometry {
+       u8      addrdec;
+       u8      dden;
+       u8      dwid;
+       u8      rowbits, colbits;
+       u16     bits[PMI_ADDRESS_WIDTH];
+} dimms[] = {
+       {
+               .addrdec = AMAP_1KB, .dden = DEN_4Gb, .dwid = X16,
+               .rowbits = 15, .colbits = 10,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  B(0),  B(1),  B(2),  R(0),
+                       R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),  R(9),
+                       R(10), C(7),  C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+                       0,     0,     0,     0
+               }
+       },
+       {
+               .addrdec = AMAP_1KB, .dden = DEN_4Gb, .dwid = X8,
+               .rowbits = 16, .colbits = 10,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  B(0),  B(1),  B(2),  R(0),
+                       R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),  R(9),
+                       R(10), C(7),  C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+                       R(15), 0,     0,     0
+               }
+       },
+       {
+               .addrdec = AMAP_1KB, .dden = DEN_8Gb, .dwid = X16,
+               .rowbits = 16, .colbits = 10,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  B(0),  B(1),  B(2),  R(0),
+                       R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),  R(9),
+                       R(10), C(7),  C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+                       R(15), 0,     0,     0
+               }
+       },
+       {
+               .addrdec = AMAP_1KB, .dden = DEN_8Gb, .dwid = X8,
+               .rowbits = 16, .colbits = 11,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  B(0),  B(1),  B(2),  R(0),
+                       R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),  R(9),
+                       R(10), C(7),  C(8),  C(9),  R(11), RS,    C(11), R(12), R(13),
+                       R(14), R(15), 0,     0
+               }
+       },
+       {
+               .addrdec = AMAP_2KB, .dden = DEN_4Gb, .dwid = X16,
+               .rowbits = 15, .colbits = 10,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  B(0),  B(1),  B(2),
+                       R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),
+                       R(9),  R(10), C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+                       0,     0,     0,     0
+               }
+       },
+       {
+               .addrdec = AMAP_2KB, .dden = DEN_4Gb, .dwid = X8,
+               .rowbits = 16, .colbits = 10,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  B(0),  B(1),  B(2),
+                       R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),
+                       R(9),  R(10), C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+                       R(15), 0,     0,     0
+               }
+       },
+       {
+               .addrdec = AMAP_2KB, .dden = DEN_8Gb, .dwid = X16,
+               .rowbits = 16, .colbits = 10,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  B(0),  B(1),  B(2),
+                       R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),
+                       R(9),  R(10), C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+                       R(15), 0,     0,     0
+               }
+       },
+       {
+               .addrdec = AMAP_2KB, .dden = DEN_8Gb, .dwid = X8,
+               .rowbits = 16, .colbits = 11,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  B(0),  B(1),  B(2),
+                       R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),
+                       R(9),  R(10), C(8),  C(9),  R(11), RS,    C(11), R(12), R(13),
+                       R(14), R(15), 0,     0
+               }
+       },
+       {
+               .addrdec = AMAP_4KB, .dden = DEN_4Gb, .dwid = X16,
+               .rowbits = 15, .colbits = 10,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  C(8),  B(0),  B(1),
+                       B(2),  R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),
+                       R(8),  R(9),  R(10), C(9),  R(11), RS,    R(12), R(13), R(14),
+                       0,     0,     0,     0
+               }
+       },
+       {
+               .addrdec = AMAP_4KB, .dden = DEN_4Gb, .dwid = X8,
+               .rowbits = 16, .colbits = 10,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  C(8),  B(0),  B(1),
+                       B(2),  R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),
+                       R(8),  R(9),  R(10), C(9),  R(11), RS,    R(12), R(13), R(14),
+                       R(15), 0,     0,     0
+               }
+       },
+       {
+               .addrdec = AMAP_4KB, .dden = DEN_8Gb, .dwid = X16,
+               .rowbits = 16, .colbits = 10,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  C(8),  B(0),  B(1),
+                       B(2),  R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),
+                       R(8),  R(9),  R(10), C(9),  R(11), RS,    R(12), R(13), R(14),
+                       R(15), 0,     0,     0
+               }
+       },
+       {
+               .addrdec = AMAP_4KB, .dden = DEN_8Gb, .dwid = X8,
+               .rowbits = 16, .colbits = 11,
+               .bits = {
+                       C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  C(8),  B(0),  B(1),
+                       B(2),  R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),
+                       R(8),  R(9),  R(10), C(9),  R(11), RS,    C(11), R(12), R(13),
+                       R(14), R(15), 0,     0
+               }
+       }
+};
+
+static int bank_hash(u64 pmiaddr, int idx, int shft)
+{
+       int bhash = 0;
+
+       switch (idx) {
+       case 0:
+               bhash ^= ((pmiaddr >> (12 + shft)) ^ (pmiaddr >> (9 + shft))) & 1;
+               break;
+       case 1:
+               bhash ^= (((pmiaddr >> (10 + shft)) ^ (pmiaddr >> (8 + shft))) & 1) << 1;
+               bhash ^= ((pmiaddr >> 22) & 1) << 1;
+               break;
+       case 2:
+               bhash ^= (((pmiaddr >> (13 + shft)) ^ (pmiaddr >> (11 + shft))) & 1) << 2;
+               break;
+       }
+
+       return bhash;
+}
+
+static int rank_hash(u64 pmiaddr)
+{
+       return ((pmiaddr >> 16) ^ (pmiaddr >> 10)) & 1;
+}
+
+/* Second stage decode. Compute rank, bank, row & column. */
+static int apl_pmi2mem(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx,
+                      struct dram_addr *daddr, char *msg)
+{
+       struct d_cr_drp0 *cr_drp0 = &drp0[pmiidx];
+       struct pnd2_pvt *pvt = mci->pvt_info;
+       int g = pvt->dimm_geom[pmiidx];
+       struct dimm_geometry *d = &dimms[g];
+       int column = 0, bank = 0, row = 0, rank = 0;
+       int i, idx, type, skiprs = 0;
+
+       for (i = 0; i < PMI_ADDRESS_WIDTH; i++) {
+               int     bit = (pmiaddr >> i) & 1;
+
+               if (i + skiprs >= PMI_ADDRESS_WIDTH) {
+                       snprintf(msg, PND2_MSG_SIZE, "Bad dimm_geometry[] table\n");
+                       return -EINVAL;
+               }
+
+               type = d->bits[i + skiprs] & ~0xf;
+               idx = d->bits[i + skiprs] & 0xf;
+
+               /*
+                * On single rank DIMMs ignore the rank select bit
+                * and shift remainder of "bits[]" down one place.
+                */
+               if (type == RS && (cr_drp0->rken0 + cr_drp0->rken1) == 1) {
+                       skiprs = 1;
+                       type = d->bits[i + skiprs] & ~0xf;
+                       idx = d->bits[i + skiprs] & 0xf;
+               }
+
+               switch (type) {
+               case C(0):
+                       column |= (bit << idx);
+                       break;
+               case B(0):
+                       bank |= (bit << idx);
+                       if (cr_drp0->bahen)
+                               bank ^= bank_hash(pmiaddr, idx, d->addrdec);
+                       break;
+               case R(0):
+                       row |= (bit << idx);
+                       break;
+               case RS:
+                       rank = bit;
+                       if (cr_drp0->rsien)
+                               rank ^= rank_hash(pmiaddr);
+                       break;
+               default:
+                       if (bit) {
+                               snprintf(msg, PND2_MSG_SIZE, "Bad translation\n");
+                               return -EINVAL;
+                       }
+                       goto done;
+               }
+       }
+
+done:
+       daddr->col = column;
+       daddr->bank = bank;
+       daddr->row = row;
+       daddr->rank = rank;
+       daddr->dimm = 0;
+
+       return 0;
+}
+
+/* Pluck bit "in" from pmiaddr and return value shifted to bit "out" */
+#define dnv_get_bit(pmi, in, out) ((int)(((pmi) >> (in)) & 1u) << (out))
+
+static int dnv_pmi2mem(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx,
+                                          struct dram_addr *daddr, char *msg)
+{
+       /* Rank 0 or 1 */
+       daddr->rank = dnv_get_bit(pmiaddr, dmap[pmiidx].rs0 + 13, 0);
+       /* Rank 2 or 3 */
+       daddr->rank |= dnv_get_bit(pmiaddr, dmap[pmiidx].rs1 + 13, 1);
+
+       /*
+        * Normally ranks 0,1 are DIMM0, and 2,3 are DIMM1, but we
+        * flip them if DIMM1 is larger than DIMM0.
+        */
+       daddr->dimm = (daddr->rank >= 2) ^ drp[pmiidx].dimmflip;
+
+       daddr->bank = dnv_get_bit(pmiaddr, dmap[pmiidx].ba0 + 6, 0);
+       daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].ba1 + 6, 1);
+       daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].bg0 + 6, 2);
+       if (dsch.ddr4en)
+               daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].bg1 + 6, 3);
+       if (dmap1[pmiidx].bxor) {
+               if (dsch.ddr4en) {
+                       daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 0);
+                       daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row7 + 6, 1);
+                       if (dsch.chan_width == 0)
+                               /* 64/72 bit dram channel width */
+                               daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 2);
+                       else
+                               /* 32/40 bit dram channel width */
+                               daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 2);
+                       daddr->bank ^= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 3);
+               } else {
+                       daddr->bank ^= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 0);
+                       daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 1);
+                       if (dsch.chan_width == 0)
+                               daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 2);
+                       else
+                               daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 2);
+               }
+       }
+
+       daddr->row = dnv_get_bit(pmiaddr, dmap2[pmiidx].row0 + 6, 0);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row1 + 6, 1);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 2);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row3 + 6, 3);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row4 + 6, 4);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row5 + 6, 5);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 6);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row7 + 6, 7);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row8 + 6, 8);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row9 + 6, 9);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row10 + 6, 10);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row11 + 6, 11);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row12 + 6, 12);
+       daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row13 + 6, 13);
+       if (dmap4[pmiidx].row14 != 31)
+               daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row14 + 6, 14);
+       if (dmap4[pmiidx].row15 != 31)
+               daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row15 + 6, 15);
+       if (dmap4[pmiidx].row16 != 31)
+               daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row16 + 6, 16);
+       if (dmap4[pmiidx].row17 != 31)
+               daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row17 + 6, 17);
+
+       daddr->col = dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 3);
+       daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 4);
+       daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca5 + 6, 5);
+       daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca6 + 6, 6);
+       daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca7 + 6, 7);
+       daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca8 + 6, 8);
+       daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca9 + 6, 9);
+       if (!dsch.ddr4en && dmap1[pmiidx].ca11 != 0x3f)
+               daddr->col |= dnv_get_bit(pmiaddr, dmap1[pmiidx].ca11 + 13, 11);
+
+       return 0;
+}
+
+static int check_channel(int ch)
+{
+       if (drp0[ch].dramtype != 0) {
+               pnd2_printk(KERN_INFO, "Unsupported DIMM in channel %d\n", ch);
+               return 1;
+       } else if (drp0[ch].eccen == 0) {
+               pnd2_printk(KERN_INFO, "ECC disabled on channel %d\n", ch);
+               return 1;
+       }
+       return 0;
+}
+
+static int apl_check_ecc_active(void)
+{
+       int     i, ret = 0;
+
+       /* Check dramtype and ECC mode for each present DIMM */
+       for (i = 0; i < APL_NUM_CHANNELS; i++)
+               if (chan_mask & BIT(i))
+                       ret += check_channel(i);
+       return ret ? -EINVAL : 0;
+}
+
+#define DIMMS_PRESENT(d) ((d)->rken0 + (d)->rken1 + (d)->rken2 + (d)->rken3)
+
+static int check_unit(int ch)
+{
+       struct d_cr_drp *d = &drp[ch];
+
+       if (DIMMS_PRESENT(d) && !ecc_ctrl[ch].eccen) {
+               pnd2_printk(KERN_INFO, "ECC disabled on channel %d\n", ch);
+               return 1;
+       }
+       return 0;
+}
+
+static int dnv_check_ecc_active(void)
+{
+       int     i, ret = 0;
+
+       for (i = 0; i < DNV_NUM_CHANNELS; i++)
+               ret += check_unit(i);
+       return ret ? -EINVAL : 0;
+}
+
+static int get_memory_error_data(struct mem_ctl_info *mci, u64 addr,
+                                                                struct dram_addr *daddr, char *msg)
+{
+       u64     pmiaddr;
+       u32     pmiidx;
+       int     ret;
+
+       ret = sys2pmi(addr, &pmiidx, &pmiaddr, msg);
+       if (ret)
+               return ret;
+
+       pmiaddr >>= ops->pmiaddr_shift;
+       /* pmi channel idx to dimm channel idx */
+       pmiidx >>= ops->pmiidx_shift;
+       daddr->chan = pmiidx;
+
+       ret = ops->pmi2mem(mci, pmiaddr, pmiidx, daddr, msg);
+       if (ret)
+               return ret;
+
+       edac_dbg(0, "SysAddr=%llx PmiAddr=%llx Channel=%d DIMM=%d Rank=%d Bank=%d Row=%d Column=%d\n",
+                        addr, pmiaddr, daddr->chan, daddr->dimm, daddr->rank, daddr->bank, daddr->row, daddr->col);
+
+       return 0;
+}
+
+static void pnd2_mce_output_error(struct mem_ctl_info *mci, const struct mce *m,
+                                 struct dram_addr *daddr)
+{
+       enum hw_event_mc_err_type tp_event;
+       char *optype, msg[PND2_MSG_SIZE];
+       bool ripv = m->mcgstatus & MCG_STATUS_RIPV;
+       bool overflow = m->status & MCI_STATUS_OVER;
+       bool uc_err = m->status & MCI_STATUS_UC;
+       bool recov = m->status & MCI_STATUS_S;
+       u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
+       u32 mscod = GET_BITFIELD(m->status, 16, 31);
+       u32 errcode = GET_BITFIELD(m->status, 0, 15);
+       u32 optypenum = GET_BITFIELD(m->status, 4, 6);
+       int rc;
+
+       tp_event = uc_err ? (ripv ? HW_EVENT_ERR_FATAL : HW_EVENT_ERR_UNCORRECTED) :
+                                                HW_EVENT_ERR_CORRECTED;
+
+       /*
+        * According with Table 15-9 of the Intel Architecture spec vol 3A,
+        * memory errors should fit in this mask:
+        *      000f 0000 1mmm cccc (binary)
+        * where:
+        *      f = Correction Report Filtering Bit. If 1, subsequent errors
+        *          won't be shown
+        *      mmm = error type
+        *      cccc = channel
+        * If the mask doesn't match, report an error to the parsing logic
+        */
+       if (!((errcode & 0xef80) == 0x80)) {
+               optype = "Can't parse: it is not a mem";
+       } else {
+               switch (optypenum) {
+               case 0:
+                       optype = "generic undef request error";
+                       break;
+               case 1:
+                       optype = "memory read error";
+                       break;
+               case 2:
+                       optype = "memory write error";
+                       break;
+               case 3:
+                       optype = "addr/cmd error";
+                       break;
+               case 4:
+                       optype = "memory scrubbing error";
+                       break;
+               default:
+                       optype = "reserved";
+                       break;
+               }
+       }
+
+       /* Only decode errors with an valid address (ADDRV) */
+       if (!(m->status & MCI_STATUS_ADDRV))
+               return;
+
+       rc = get_memory_error_data(mci, m->addr, daddr, msg);
+       if (rc)
+               goto address_error;
+
+       snprintf(msg, sizeof(msg),
+                "%s%s err_code:%04x:%04x channel:%d DIMM:%d rank:%d row:%d bank:%d col:%d",
+                overflow ? " OVERFLOW" : "", (uc_err && recov) ? " recoverable" : "", mscod,
+                errcode, daddr->chan, daddr->dimm, daddr->rank, daddr->row, daddr->bank, daddr->col);
+
+       edac_dbg(0, "%s\n", msg);
+
+       /* Call the helper to output message */
+       edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT,
+                                                m->addr & ~PAGE_MASK, 0, daddr->chan, daddr->dimm, -1, optype, msg);
+
+       return;
+
+address_error:
+       edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, -1, -1, -1, msg, "");
+}
+
+static void apl_get_dimm_config(struct mem_ctl_info *mci)
+{
+       struct pnd2_pvt *pvt = mci->pvt_info;
+       struct dimm_info *dimm;
+       struct d_cr_drp0 *d;
+       u64     capacity;
+       int     i, g;
+
+       for (i = 0; i < APL_NUM_CHANNELS; i++) {
+               if (!(chan_mask & BIT(i)))
+                       continue;
+
+               dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, 0, 0);
+               if (!dimm) {
+                       edac_dbg(0, "No allocated DIMM for channel %d\n", i);
+                       continue;
+               }
+
+               d = &drp0[i];
+               for (g = 0; g < ARRAY_SIZE(dimms); g++)
+                       if (dimms[g].addrdec == d->addrdec &&
+                           dimms[g].dden == d->dden &&
+                           dimms[g].dwid == d->dwid)
+                               break;
+
+               if (g == ARRAY_SIZE(dimms)) {
+                       edac_dbg(0, "Channel %d: unrecognized DIMM\n", i);
+                       continue;
+               }
+
+               pvt->dimm_geom[i] = g;
+               capacity = (d->rken0 + d->rken1) * 8 * (1ul << dimms[g].rowbits) *
+                                  (1ul << dimms[g].colbits);
+               edac_dbg(0, "Channel %d: %lld MByte DIMM\n", i, capacity >> (20 - 3));
+               dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3));
+               dimm->grain = 32;
+               dimm->dtype = (d->dwid == 0) ? DEV_X8 : DEV_X16;
+               dimm->mtype = MEM_DDR3;
+               dimm->edac_mode = EDAC_SECDED;
+               snprintf(dimm->label, sizeof(dimm->label), "Slice#%d_Chan#%d", i / 2, i % 2);
+       }
+}
+
+static const int dnv_dtypes[] = {
+       DEV_X8, DEV_X4, DEV_X16, DEV_UNKNOWN
+};
+
+static void dnv_get_dimm_config(struct mem_ctl_info *mci)
+{
+       int     i, j, ranks_of_dimm[DNV_MAX_DIMMS], banks, rowbits, colbits, memtype;
+       struct dimm_info *dimm;
+       struct d_cr_drp *d;
+       u64     capacity;
+
+       if (dsch.ddr4en) {
+               memtype = MEM_DDR4;
+               banks = 16;
+               colbits = 10;
+       } else {
+               memtype = MEM_DDR3;
+               banks = 8;
+       }
+
+       for (i = 0; i < DNV_NUM_CHANNELS; i++) {
+               if (dmap4[i].row14 == 31)
+                       rowbits = 14;
+               else if (dmap4[i].row15 == 31)
+                       rowbits = 15;
+               else if (dmap4[i].row16 == 31)
+                       rowbits = 16;
+               else if (dmap4[i].row17 == 31)
+                       rowbits = 17;
+               else
+                       rowbits = 18;
+
+               if (memtype == MEM_DDR3) {
+                       if (dmap1[i].ca11 != 0x3f)
+                               colbits = 12;
+                       else
+                               colbits = 10;
+               }
+
+               d = &drp[i];
+               /* DIMM0 is present if rank0 and/or rank1 is enabled */
+               ranks_of_dimm[0] = d->rken0 + d->rken1;
+               /* DIMM1 is present if rank2 and/or rank3 is enabled */
+               ranks_of_dimm[1] = d->rken2 + d->rken3;
+
+               for (j = 0; j < DNV_MAX_DIMMS; j++) {
+                       if (!ranks_of_dimm[j])
+                               continue;
+
+                       dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0);
+                       if (!dimm) {
+                               edac_dbg(0, "No allocated DIMM for channel %d DIMM %d\n", i, j);
+                               continue;
+                       }
+
+                       capacity = ranks_of_dimm[j] * banks * (1ul << rowbits) * (1ul << colbits);
+                       edac_dbg(0, "Channel %d DIMM %d: %lld MByte DIMM\n", i, j, capacity >> (20 - 3));
+                       dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3));
+                       dimm->grain = 32;
+                       dimm->dtype = dnv_dtypes[j ? d->dimmdwid0 : d->dimmdwid1];
+                       dimm->mtype = memtype;
+                       dimm->edac_mode = EDAC_SECDED;
+                       snprintf(dimm->label, sizeof(dimm->label), "Chan#%d_DIMM#%d", i, j);
+               }
+       }
+}
+
+static int pnd2_register_mci(struct mem_ctl_info **ppmci)
+{
+       struct edac_mc_layer layers[2];
+       struct mem_ctl_info *mci;
+       struct pnd2_pvt *pvt;
+       int rc;
+
+       rc = ops->check_ecc();
+       if (rc < 0)
+               return rc;
+
+       /* Allocate a new MC control structure */
+       layers[0].type = EDAC_MC_LAYER_CHANNEL;
+       layers[0].size = ops->channels;
+       layers[0].is_virt_csrow = false;
+       layers[1].type = EDAC_MC_LAYER_SLOT;
+       layers[1].size = ops->dimms_per_channel;
+       layers[1].is_virt_csrow = true;
+       mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+       if (!mci)
+               return -ENOMEM;
+
+       pvt = mci->pvt_info;
+       memset(pvt, 0, sizeof(*pvt));
+
+       mci->mod_name = "pnd2_edac.c";
+       mci->dev_name = ops->name;
+       mci->ctl_name = "Pondicherry2";
+
+       /* Get dimm basic config and the memory layout */
+       ops->get_dimm_config(mci);
+
+       if (edac_mc_add_mc(mci)) {
+               edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+               edac_mc_free(mci);
+               return -EINVAL;
+       }
+
+       *ppmci = mci;
+
+       return 0;
+}
+
+static void pnd2_unregister_mci(struct mem_ctl_info *mci)
+{
+       if (unlikely(!mci || !mci->pvt_info)) {
+               pnd2_printk(KERN_ERR, "Couldn't find mci handler\n");
+               return;
+       }
+
+       /* Remove MC sysfs nodes */
+       edac_mc_del_mc(NULL);
+       edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
+       edac_mc_free(mci);
+}
+
+/*
+ * Callback function registered with core kernel mce code.
+ * Called once for each logged error.
+ */
+static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, void *data)
+{
+       struct mce *mce = (struct mce *)data;
+       struct mem_ctl_info *mci;
+       struct dram_addr daddr;
+       char *type;
+
+       if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
+               return NOTIFY_DONE;
+
+       mci = pnd2_mci;
+       if (!mci)
+               return NOTIFY_DONE;
+
+       /*
+        * Just let mcelog handle it if the error is
+        * outside the memory controller. A memory error
+        * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0.
+        * bit 12 has an special meaning.
+        */
+       if ((mce->status & 0xefff) >> 7 != 1)
+               return NOTIFY_DONE;
+
+       if (mce->mcgstatus & MCG_STATUS_MCIP)
+               type = "Exception";
+       else
+               type = "Event";
+
+       pnd2_mc_printk(mci, KERN_INFO, "HANDLING MCE MEMORY ERROR\n");
+       pnd2_mc_printk(mci, KERN_INFO, "CPU %u: Machine Check %s: %llx Bank %u: %llx\n",
+                                  mce->extcpu, type, mce->mcgstatus, mce->bank, mce->status);
+       pnd2_mc_printk(mci, KERN_INFO, "TSC %llx ", mce->tsc);
+       pnd2_mc_printk(mci, KERN_INFO, "ADDR %llx ", mce->addr);
+       pnd2_mc_printk(mci, KERN_INFO, "MISC %llx ", mce->misc);
+       pnd2_mc_printk(mci, KERN_INFO, "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
+                                  mce->cpuvendor, mce->cpuid, mce->time, mce->socketid, mce->apicid);
+
+       pnd2_mce_output_error(mci, mce, &daddr);
+
+       /* Advice mcelog that the error were handled */
+       return NOTIFY_STOP;
+}
+
+static struct notifier_block pnd2_mce_dec = {
+       .notifier_call  = pnd2_mce_check_error,
+};
+
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Write an address to this file to exercise the address decode
+ * logic in this driver.
+ */
+static u64 pnd2_fake_addr;
+#define PND2_BLOB_SIZE 1024
+static char pnd2_result[PND2_BLOB_SIZE];
+static struct dentry *pnd2_test;
+static struct debugfs_blob_wrapper pnd2_blob = {
+       .data = pnd2_result,
+       .size = 0
+};
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+       struct dram_addr daddr;
+       struct mce m;
+
+       *(u64 *)data = val;
+       m.mcgstatus = 0;
+       /* ADDRV + MemRd + Unknown channel */
+       m.status = MCI_STATUS_ADDRV + 0x9f;
+       m.addr = val;
+       pnd2_mce_output_error(pnd2_mci, &m, &daddr);
+       snprintf(pnd2_blob.data, PND2_BLOB_SIZE,
+                        "SysAddr=%llx Channel=%d DIMM=%d Rank=%d Bank=%d Row=%d Column=%d\n",
+                        m.addr, daddr.chan, daddr.dimm, daddr.rank, daddr.bank, daddr.row, daddr.col);
+       pnd2_blob.size = strlen(pnd2_blob.data);
+
+       return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+static void setup_pnd2_debug(void)
+{
+       pnd2_test = edac_debugfs_create_dir("pnd2_test");
+       edac_debugfs_create_file("pnd2_debug_addr", 0200, pnd2_test,
+                                                        &pnd2_fake_addr, &fops_u64_wo);
+       debugfs_create_blob("pnd2_debug_results", 0400, pnd2_test, &pnd2_blob);
+}
+
+static void teardown_pnd2_debug(void)
+{
+       debugfs_remove_recursive(pnd2_test);
+}
+#else
+static void setup_pnd2_debug(void)     {}
+static void teardown_pnd2_debug(void)  {}
+#endif /* CONFIG_EDAC_DEBUG */
+
+
+static int pnd2_probe(void)
+{
+       int rc;
+
+       edac_dbg(2, "\n");
+       rc = get_registers();
+       if (rc)
+               return rc;
+
+       return pnd2_register_mci(&pnd2_mci);
+}
+
+static void pnd2_remove(void)
+{
+       edac_dbg(0, "\n");
+       pnd2_unregister_mci(pnd2_mci);
+}
+
+static struct dunit_ops apl_ops = {
+               .name                   = "pnd2/apl",
+               .type                   = APL,
+               .pmiaddr_shift          = LOG2_PMI_ADDR_GRANULARITY,
+               .pmiidx_shift           = 0,
+               .channels               = APL_NUM_CHANNELS,
+               .dimms_per_channel      = 1,
+               .rd_reg                 = apl_rd_reg,
+               .get_registers          = apl_get_registers,
+               .check_ecc              = apl_check_ecc_active,
+               .mk_region              = apl_mk_region,
+               .get_dimm_config        = apl_get_dimm_config,
+               .pmi2mem                = apl_pmi2mem,
+};
+
+static struct dunit_ops dnv_ops = {
+               .name                   = "pnd2/dnv",
+               .type                   = DNV,
+               .pmiaddr_shift          = 0,
+               .pmiidx_shift           = 1,
+               .channels               = DNV_NUM_CHANNELS,
+               .dimms_per_channel      = 2,
+               .rd_reg                 = dnv_rd_reg,
+               .get_registers          = dnv_get_registers,
+               .check_ecc              = dnv_check_ecc_active,
+               .mk_region              = dnv_mk_region,
+               .get_dimm_config        = dnv_get_dimm_config,
+               .pmi2mem                = dnv_pmi2mem,
+};
+
+static const struct x86_cpu_id pnd2_cpuids[] = {
+       { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT, 0, (kernel_ulong_t)&apl_ops },
+       { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON, 0, (kernel_ulong_t)&dnv_ops },
+       { }
+};
+MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids);
+
+static int __init pnd2_init(void)
+{
+       const struct x86_cpu_id *id;
+       int rc;
+
+       edac_dbg(2, "\n");
+
+       id = x86_match_cpu(pnd2_cpuids);
+       if (!id)
+               return -ENODEV;
+
+       ops = (struct dunit_ops *)id->driver_data;
+
+       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+       opstate_init();
+
+       rc = pnd2_probe();
+       if (rc < 0) {
+               pnd2_printk(KERN_ERR, "Failed to register device with error %d.\n", rc);
+               return rc;
+       }
+
+       if (!pnd2_mci)
+               return -ENODEV;
+
+       mce_register_decode_chain(&pnd2_mce_dec);
+       setup_pnd2_debug();
+
+       return 0;
+}
+
+static void __exit pnd2_exit(void)
+{
+       edac_dbg(2, "\n");
+       teardown_pnd2_debug();
+       mce_unregister_decode_chain(&pnd2_mce_dec);
+       pnd2_remove();
+}
+
+module_init(pnd2_init);
+module_exit(pnd2_exit);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tony Luck");
+MODULE_DESCRIPTION("MC Driver for Intel SoC using Pondicherry memory controller");
diff --git a/drivers/edac/pnd2_edac.h b/drivers/edac/pnd2_edac.h

new file mode 100644 (file)

index 0000000..61b6e79
--- /dev/null
+++ b/drivers/edac/pnd2_edac.h
@@ -0,0 +1,301 @@
+/*
+ * Register bitfield descriptions for Pondicherry2 memory controller.
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _PND2_REGS_H
+#define _PND2_REGS_H
+
+struct b_cr_touud_lo_pci {
+       u32     lock : 1;
+       u32     reserved_1 : 19;
+       u32     touud : 12;
+};
+
+#define b_cr_touud_lo_pci_port 0x4c
+#define b_cr_touud_lo_pci_offset 0xa8
+#define b_cr_touud_lo_pci_r_opcode 0x04
+
+struct b_cr_touud_hi_pci {
+       u32     touud : 7;
+       u32     reserved_0 : 25;
+};
+
+#define b_cr_touud_hi_pci_port 0x4c
+#define b_cr_touud_hi_pci_offset 0xac
+#define b_cr_touud_hi_pci_r_opcode 0x04
+
+struct b_cr_tolud_pci {
+       u32     lock : 1;
+       u32     reserved_0 : 19;
+       u32     tolud : 12;
+};
+
+#define b_cr_tolud_pci_port 0x4c
+#define b_cr_tolud_pci_offset 0xbc
+#define b_cr_tolud_pci_r_opcode 0x04
+
+struct b_cr_mchbar_lo_pci {
+       u32 enable : 1;
+       u32 pad_3_1 : 3;
+       u32 pad_14_4: 11;
+       u32 base: 17;
+};
+
+struct b_cr_mchbar_hi_pci {
+       u32 base : 7;
+       u32 pad_31_7 : 25;
+};
+
+/* Symmetric region */
+struct b_cr_slice_channel_hash {
+       u64     slice_1_disabled : 1;
+       u64     hvm_mode : 1;
+       u64     interleave_mode : 2;
+       u64     slice_0_mem_disabled : 1;
+       u64     reserved_0 : 1;
+       u64     slice_hash_mask : 14;
+       u64     reserved_1 : 11;
+       u64     enable_pmi_dual_data_mode : 1;
+       u64     ch_1_disabled : 1;
+       u64     reserved_2 : 1;
+       u64     sym_slice0_channel_enabled : 2;
+       u64     sym_slice1_channel_enabled : 2;
+       u64     ch_hash_mask : 14;
+       u64     reserved_3 : 11;
+       u64     lock : 1;
+};
+
+#define b_cr_slice_channel_hash_port 0x4c
+#define b_cr_slice_channel_hash_offset 0x4c58
+#define b_cr_slice_channel_hash_r_opcode 0x06
+
+struct b_cr_mot_out_base_mchbar {
+       u32     reserved_0 : 14;
+       u32     mot_out_base : 15;
+       u32     reserved_1 : 1;
+       u32     tr_en : 1;
+       u32     imr_en : 1;
+};
+
+#define b_cr_mot_out_base_mchbar_port 0x4c
+#define b_cr_mot_out_base_mchbar_offset 0x6af0
+#define b_cr_mot_out_base_mchbar_r_opcode 0x00
+
+struct b_cr_mot_out_mask_mchbar {
+       u32     reserved_0 : 14;
+       u32     mot_out_mask : 15;
+       u32     reserved_1 : 1;
+       u32     ia_iwb_en : 1;
+       u32     gt_iwb_en : 1;
+};
+
+#define b_cr_mot_out_mask_mchbar_port 0x4c
+#define b_cr_mot_out_mask_mchbar_offset 0x6af4
+#define b_cr_mot_out_mask_mchbar_r_opcode 0x00
+
+struct b_cr_asym_mem_region0_mchbar {
+       u32     pad : 4;
+       u32     slice0_asym_base : 11;
+       u32     pad_18_15 : 4;
+       u32     slice0_asym_limit : 11;
+       u32     slice0_asym_channel_select : 1;
+       u32     slice0_asym_enable : 1;
+};
+
+#define b_cr_asym_mem_region0_mchbar_port 0x4c
+#define b_cr_asym_mem_region0_mchbar_offset 0x6e40
+#define b_cr_asym_mem_region0_mchbar_r_opcode 0x00
+
+struct b_cr_asym_mem_region1_mchbar {
+       u32     pad : 4;
+       u32     slice1_asym_base : 11;
+       u32     pad_18_15 : 4;
+       u32     slice1_asym_limit : 11;
+       u32     slice1_asym_channel_select : 1;
+       u32     slice1_asym_enable : 1;
+};
+
+#define b_cr_asym_mem_region1_mchbar_port 0x4c
+#define b_cr_asym_mem_region1_mchbar_offset 0x6e44
+#define b_cr_asym_mem_region1_mchbar_r_opcode 0x00
+
+/* Some bit fields moved in above two structs on Denverton */
+struct b_cr_asym_mem_region_denverton {
+       u32     pad : 4;
+       u32     slice_asym_base : 8;
+       u32     pad_19_12 : 8;
+       u32     slice_asym_limit : 8;
+       u32     pad_28_30 : 3;
+       u32     slice_asym_enable : 1;
+};
+
+struct b_cr_asym_2way_mem_region_mchbar {
+       u32     pad : 2;
+       u32     asym_2way_intlv_mode : 2;
+       u32     asym_2way_base : 11;
+       u32     pad_16_15 : 2;
+       u32     asym_2way_limit : 11;
+       u32     pad_30_28 : 3;
+       u32     asym_2way_interleave_enable : 1;
+};
+
+#define b_cr_asym_2way_mem_region_mchbar_port 0x4c
+#define b_cr_asym_2way_mem_region_mchbar_offset 0x6e50
+#define b_cr_asym_2way_mem_region_mchbar_r_opcode 0x00
+
+/* Apollo Lake d-unit */
+
+struct d_cr_drp0 {
+       u32     rken0 : 1;
+       u32     rken1 : 1;
+       u32     ddmen : 1;
+       u32     rsvd3 : 1;
+       u32     dwid : 2;
+       u32     dden : 3;
+       u32     rsvd13_9 : 5;
+       u32     rsien : 1;
+       u32     bahen : 1;
+       u32     rsvd18_16 : 3;
+       u32     caswizzle : 2;
+       u32     eccen : 1;
+       u32     dramtype : 3;
+       u32     blmode : 3;
+       u32     addrdec : 2;
+       u32     dramdevice_pr : 2;
+};
+
+#define d_cr_drp0_offset 0x1400
+#define d_cr_drp0_r_opcode 0x00
+
+/* Denverton d-unit */
+
+struct d_cr_dsch {
+       u32     ch0en : 1;
+       u32     ch1en : 1;
+       u32     ddr4en : 1;
+       u32     coldwake : 1;
+       u32     newbypdis : 1;
+       u32     chan_width : 1;
+       u32     rsvd6_6 : 1;
+       u32     ooodis : 1;
+       u32     rsvd18_8 : 11;
+       u32     ic : 1;
+       u32     rsvd31_20 : 12;
+};
+
+#define d_cr_dsch_port 0x16
+#define d_cr_dsch_offset 0x0
+#define d_cr_dsch_r_opcode 0x0
+
+struct d_cr_ecc_ctrl {
+       u32     eccen : 1;
+       u32     rsvd31_1 : 31;
+};
+
+#define d_cr_ecc_ctrl_offset 0x180
+#define d_cr_ecc_ctrl_r_opcode 0x0
+
+struct d_cr_drp {
+       u32     rken0 : 1;
+       u32     rken1 : 1;
+       u32     rken2 : 1;
+       u32     rken3 : 1;
+       u32     dimmdwid0 : 2;
+       u32     dimmdden0 : 2;
+       u32     dimmdwid1 : 2;
+       u32     dimmdden1 : 2;
+       u32     rsvd15_12 : 4;
+       u32     dimmflip : 1;
+       u32     rsvd31_17 : 15;
+};
+
+#define d_cr_drp_offset 0x158
+#define d_cr_drp_r_opcode 0x0
+
+struct d_cr_dmap {
+       u32     ba0 : 5;
+       u32     ba1 : 5;
+       u32     bg0 : 5; /* if ddr3, ba2 = bg0 */
+       u32     bg1 : 5; /* if ddr3, ba3 = bg1 */
+       u32     rs0 : 5;
+       u32     rs1 : 5;
+       u32     rsvd : 2;
+};
+
+#define d_cr_dmap_offset 0x174
+#define d_cr_dmap_r_opcode 0x0
+
+struct d_cr_dmap1 {
+       u32     ca11 : 6;
+       u32     bxor : 1;
+       u32     rsvd : 25;
+};
+
+#define d_cr_dmap1_offset 0xb4
+#define d_cr_dmap1_r_opcode 0x0
+
+struct d_cr_dmap2 {
+       u32     row0 : 5;
+       u32     row1 : 5;
+       u32     row2 : 5;
+       u32     row3 : 5;
+       u32     row4 : 5;
+       u32     row5 : 5;
+       u32     rsvd : 2;
+};
+
+#define d_cr_dmap2_offset 0x148
+#define d_cr_dmap2_r_opcode 0x0
+
+struct d_cr_dmap3 {
+       u32     row6 : 5;
+       u32     row7 : 5;
+       u32     row8 : 5;
+       u32     row9 : 5;
+       u32     row10 : 5;
+       u32     row11 : 5;
+       u32     rsvd : 2;
+};
+
+#define d_cr_dmap3_offset 0x14c
+#define d_cr_dmap3_r_opcode 0x0
+
+struct d_cr_dmap4 {
+       u32     row12 : 5;
+       u32     row13 : 5;
+       u32     row14 : 5;
+       u32     row15 : 5;
+       u32     row16 : 5;
+       u32     row17 : 5;
+       u32     rsvd : 2;
+};
+
+#define d_cr_dmap4_offset 0x150
+#define d_cr_dmap4_r_opcode 0x0
+
+struct d_cr_dmap5 {
+       u32     ca3 : 4;
+       u32     ca4 : 4;
+       u32     ca5 : 4;
+       u32     ca6 : 4;
+       u32     ca7 : 4;
+       u32     ca8 : 4;
+       u32     ca9 : 4;
+       u32     rsvd : 4;
+};
+
+#define d_cr_dmap5_offset 0x154
+#define d_cr_dmap5_r_opcode 0x0
+
+#endif /* _PND2_REGS_H */
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c

index a65ea44e3b0bf66294c0c32effb774c9d3c23622..ea21cb651b3c0b2b23be44b8a03feaaeb27834f7 100644 (file)
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -3075,7 +3075,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
         struct sbridge_pvt *pvt;
         char *type;
  
-       if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+       if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
                 return NOTIFY_DONE;
  
         mci = get_mci_for_node_id(mce->socketid);
@@ -3441,7 +3441,7 @@ static int __init sbridge_init(void)
  
         if (rc >= 0) {
                 mce_register_decode_chain(&sbridge_mce_dec);
-               if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+               if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
                         sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
                 return 0;
         }
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c

index 1159dba4671fef926afce7164cd278faf23c2e24..64bef6c9cfb4d3175838ec44cfda7a7aefc6a1a5 100644 (file)
--- a/drivers/edac/skx_edac.c
+++ b/drivers/edac/skx_edac.c
@@ -971,7 +971,7 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
         struct mem_ctl_info *mci;
         char *type;
  
-       if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+       if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
                 return NOTIFY_DONE;
  
         /* ignore unless this is memory related with an address */
diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c

new file mode 100644 (file)

index 0000000..86d585c
--- /dev/null
+++ b/drivers/edac/thunderx_edac.c
@@ -0,0 +1,2174 @@
+/*
+ * Cavium ThunderX memory controller kernel module
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/string.h>
+#include <linux/stop_machine.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/atomic.h>
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+
+#include <asm/page.h>
+
+#include "edac_module.h"
+
+#define phys_to_pfn(phys)      (PFN_DOWN(phys))
+
+#define THUNDERX_NODE          GENMASK(45, 44)
+
+enum {
+       ERR_CORRECTED   = 1,
+       ERR_UNCORRECTED = 2,
+       ERR_UNKNOWN     = 3,
+};
+
+#define MAX_SYNDROME_REGS 4
+
+struct error_syndrome {
+       u64 reg[MAX_SYNDROME_REGS];
+};
+
+struct error_descr {
+       int     type;
+       u64     mask;
+       char    *descr;
+};
+
+static void decode_register(char *str, size_t size,
+                          const struct error_descr *descr,
+                          const uint64_t reg)
+{
+       int ret = 0;
+
+       while (descr->type && descr->mask && descr->descr) {
+               if (reg & descr->mask) {
+                       ret = snprintf(str, size, "\n\t%s, %s",
+                                      descr->type == ERR_CORRECTED ?
+                                        "Corrected" : "Uncorrected",
+                                      descr->descr);
+                       str += ret;
+                       size -= ret;
+               }
+               descr++;
+       }
+}
+
+static unsigned long get_bits(unsigned long data, int pos, int width)
+{
+       return (data >> pos) & ((1 << width) - 1);
+}
+
+#define L2C_CTL                        0x87E080800000
+#define L2C_CTL_DISIDXALIAS    BIT(0)
+
+#define PCI_DEVICE_ID_THUNDER_LMC 0xa022
+
+#define LMC_FADR               0x20
+#define LMC_FADR_FDIMM(x)      ((x >> 37) & 0x1)
+#define LMC_FADR_FBUNK(x)      ((x >> 36) & 0x1)
+#define LMC_FADR_FBANK(x)      ((x >> 32) & 0xf)
+#define LMC_FADR_FROW(x)       ((x >> 14) & 0xffff)
+#define LMC_FADR_FCOL(x)       ((x >> 0) & 0x1fff)
+
+#define LMC_NXM_FADR           0x28
+#define LMC_ECC_SYND           0x38
+
+#define LMC_ECC_PARITY_TEST    0x108
+
+#define LMC_INT_W1S            0x150
+
+#define LMC_INT_ENA_W1C                0x158
+#define LMC_INT_ENA_W1S                0x160
+
+#define LMC_CONFIG             0x188
+
+#define LMC_CONFIG_BG2         BIT(62)
+#define LMC_CONFIG_RANK_ENA    BIT(42)
+#define LMC_CONFIG_PBANK_LSB(x)        (((x) >> 5) & 0xF)
+#define LMC_CONFIG_ROW_LSB(x)  (((x) >> 2) & 0x7)
+
+#define LMC_CONTROL            0x190
+#define LMC_CONTROL_XOR_BANK   BIT(16)
+
+#define LMC_INT                        0x1F0
+
+#define LMC_INT_DDR_ERR                BIT(11)
+#define LMC_INT_DED_ERR                (0xFUL << 5)
+#define LMC_INT_SEC_ERR         (0xFUL << 1)
+#define LMC_INT_NXM_WR_MASK    BIT(0)
+
+#define LMC_DDR_PLL_CTL                0x258
+#define LMC_DDR_PLL_CTL_DDR4   BIT(29)
+
+#define LMC_FADR_SCRAMBLED     0x330
+
+#define LMC_INT_UE              (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \
+                                LMC_INT_NXM_WR_MASK)
+
+#define LMC_INT_CE             (LMC_INT_SEC_ERR)
+
+static const struct error_descr lmc_errors[] = {
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = LMC_INT_SEC_ERR,
+               .descr = "Single-bit ECC error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = LMC_INT_DDR_ERR,
+               .descr = "DDR chip error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = LMC_INT_DED_ERR,
+               .descr = "Double-bit ECC error",
+       },
+       {
+               .type = ERR_UNCORRECTED,
+               .mask = LMC_INT_NXM_WR_MASK,
+               .descr = "Non-existent memory write",
+       },
+       {0, 0, NULL},
+};
+
+#define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5)
+#define LMC_INT_EN_DLCRAM_DED_ERR      BIT(4)
+#define LMC_INT_EN_DLCRAM_SEC_ERR      BIT(3)
+#define LMC_INT_INTR_DED_ENA           BIT(2)
+#define LMC_INT_INTR_SEC_ENA           BIT(1)
+#define LMC_INT_INTR_NXM_WR_ENA                BIT(0)
+
+#define LMC_INT_ENA_ALL                        GENMASK(5, 0)
+
+#define LMC_DDR_PLL_CTL                0x258
+#define LMC_DDR_PLL_CTL_DDR4   BIT(29)
+
+#define LMC_CONTROL            0x190
+#define LMC_CONTROL_RDIMM      BIT(0)
+
+#define LMC_SCRAM_FADR         0x330
+
+#define LMC_CHAR_MASK0         0x228
+#define LMC_CHAR_MASK2         0x238
+
+#define RING_ENTRIES   8
+
+struct debugfs_entry {
+       const char *name;
+       umode_t mode;
+       const struct file_operations fops;
+};
+
+struct lmc_err_ctx {
+       u64 reg_int;
+       u64 reg_fadr;
+       u64 reg_nxm_fadr;
+       u64 reg_scram_fadr;
+       u64 reg_ecc_synd;
+};
+
+struct thunderx_lmc {
+       void __iomem *regs;
+       struct pci_dev *pdev;
+       struct msix_entry msix_ent;
+
+       atomic_t ecc_int;
+
+       u64 mask0;
+       u64 mask2;
+       u64 parity_test;
+       u64 node;
+
+       int xbits;
+       int bank_width;
+       int pbank_lsb;
+       int dimm_lsb;
+       int rank_lsb;
+       int bank_lsb;
+       int row_lsb;
+       int col_hi_lsb;
+
+       int xor_bank;
+       int l2c_alias;
+
+       struct page *mem;
+
+       struct lmc_err_ctx err_ctx[RING_ENTRIES];
+       unsigned long ring_head;
+       unsigned long ring_tail;
+};
+
+#define ring_pos(pos, size) ((pos) & (size - 1))
+
+#define DEBUGFS_STRUCT(_name, _mode, _write, _read)                        \
+static struct debugfs_entry debugfs_##_name = {                                    \
+       .name = __stringify(_name),                                         \
+       .mode = VERIFY_OCTAL_PERMISSIONS(_mode),                            \
+       .fops = {                                                           \
+               .open = simple_open,                                        \
+               .write = _write,                                            \
+               .read  = _read,                                             \
+               .llseek = generic_file_llseek,                              \
+       },                                                                  \
+}
+
+#define DEBUGFS_FIELD_ATTR(_type, _field)                                  \
+static ssize_t thunderx_##_type##_##_field##_read(struct file *file,       \
+                                           char __user *data,              \
+                                           size_t count, loff_t *ppos)     \
+{                                                                          \
+       struct thunderx_##_type *pdata = file->private_data;                \
+       char buf[20];                                                       \
+                                                                           \
+       snprintf(buf, count, "0x%016llx", pdata->_field);                   \
+       return simple_read_from_buffer(data, count, ppos,                   \
+                                      buf, sizeof(buf));                   \
+}                                                                          \
+                                                                           \
+static ssize_t thunderx_##_type##_##_field##_write(struct file *file,      \
+                                            const char __user *data,       \
+                                            size_t count, loff_t *ppos)    \
+{                                                                          \
+       struct thunderx_##_type *pdata = file->private_data;                \
+       int res;                                                            \
+                                                                           \
+       res = kstrtoull_from_user(data, count, 0, &pdata->_field);          \
+                                                                           \
+       return res ? res : count;                                           \
+}                                                                          \
+                                                                           \
+DEBUGFS_STRUCT(_field, 0600,                                               \
+                  thunderx_##_type##_##_field##_write,                     \
+                  thunderx_##_type##_##_field##_read)                      \
+
+#define DEBUGFS_REG_ATTR(_type, _name, _reg)                               \
+static ssize_t thunderx_##_type##_##_name##_read(struct file *file,        \
+                                          char __user *data,               \
+                                          size_t count, loff_t *ppos)      \
+{                                                                          \
+       struct thunderx_##_type *pdata = file->private_data;                \
+       char buf[20];                                                       \
+                                                                           \
+       sprintf(buf, "0x%016llx", readq(pdata->regs + _reg));               \
+       return simple_read_from_buffer(data, count, ppos,                   \
+                                      buf, sizeof(buf));                   \
+}                                                                          \
+                                                                           \
+static ssize_t thunderx_##_type##_##_name##_write(struct file *file,       \
+                                           const char __user *data,        \
+                                           size_t count, loff_t *ppos)     \
+{                                                                          \
+       struct thunderx_##_type *pdata = file->private_data;                \
+       u64 val;                                                            \
+       int res;                                                            \
+                                                                           \
+       res = kstrtoull_from_user(data, count, 0, &val);                    \
+                                                                           \
+       if (!res) {                                                         \
+               writeq(val, pdata->regs + _reg);                            \
+               res = count;                                                \
+       }                                                                   \
+                                                                           \
+       return res;                                                         \
+}                                                                          \
+                                                                           \
+DEBUGFS_STRUCT(_name, 0600,                                                \
+              thunderx_##_type##_##_name##_write,                          \
+              thunderx_##_type##_##_name##_read)
+
+#define LMC_DEBUGFS_ENT(_field)        DEBUGFS_FIELD_ATTR(lmc, _field)
+
+/*
+ * To get an ECC error injected, the following steps are needed:
+ * - Setup the ECC injection by writing the appropriate parameters:
+ *     echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0
+ *     echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2
+ *     echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test
+ * - Do the actual injection:
+ *     echo 1 > /sys/kernel/debug/<device number>/inject_ecc
+ */
+static ssize_t thunderx_lmc_inject_int_write(struct file *file,
+                                            const char __user *data,
+                                            size_t count, loff_t *ppos)
+{
+       struct thunderx_lmc *lmc = file->private_data;
+       u64 val;
+       int res;
+
+       res = kstrtoull_from_user(data, count, 0, &val);
+
+       if (!res) {
+               /* Trigger the interrupt */
+               writeq(val, lmc->regs + LMC_INT_W1S);
+               res = count;
+       }
+
+       return res;
+}
+
+static ssize_t thunderx_lmc_int_read(struct file *file,
+                                    char __user *data,
+                                    size_t count, loff_t *ppos)
+{
+       struct thunderx_lmc *lmc = file->private_data;
+       char buf[20];
+       u64 lmc_int = readq(lmc->regs + LMC_INT);
+
+       snprintf(buf, sizeof(buf), "0x%016llx", lmc_int);
+       return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf));
+}
+
+#define TEST_PATTERN 0xa5
+
+static int inject_ecc_fn(void *arg)
+{
+       struct thunderx_lmc *lmc = arg;
+       uintptr_t addr, phys;
+       unsigned int cline_size = cache_line_size();
+       const unsigned int lines = PAGE_SIZE / cline_size;
+       unsigned int i, cl_idx;
+
+       addr = (uintptr_t)page_address(lmc->mem);
+       phys = (uintptr_t)page_to_phys(lmc->mem);
+
+       cl_idx = (phys & 0x7f) >> 4;
+       lmc->parity_test &= ~(7ULL << 8);
+       lmc->parity_test |= (cl_idx << 8);
+
+       writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0);
+       writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2);
+       writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST);
+
+       readq(lmc->regs + LMC_CHAR_MASK0);
+       readq(lmc->regs + LMC_CHAR_MASK2);
+       readq(lmc->regs + LMC_ECC_PARITY_TEST);
+
+       for (i = 0; i < lines; i++) {
+               memset((void *)addr, TEST_PATTERN, cline_size);
+               barrier();
+
+               /*
+                * Flush L1 cachelines to the PoC (L2).
+                * This will cause cacheline eviction to the L2.
+                */
+               asm volatile("dc civac, %0\n"
+                            "dsb sy\n"
+                            : : "r"(addr + i * cline_size));
+       }
+
+       for (i = 0; i < lines; i++) {
+               /*
+                * Flush L2 cachelines to the DRAM.
+                * This will cause cacheline eviction to the DRAM
+                * and ECC corruption according to the masks set.
+                */
+               __asm__ volatile("sys #0,c11,C1,#2, %0\n"
+                                : : "r"(phys + i * cline_size));
+       }
+
+       for (i = 0; i < lines; i++) {
+               /*
+                * Invalidate L2 cachelines.
+                * The subsequent load will cause cacheline fetch
+                * from the DRAM and an error interrupt
+                */
+               __asm__ volatile("sys #0,c11,C1,#1, %0"
+                                : : "r"(phys + i * cline_size));
+       }
+
+       for (i = 0; i < lines; i++) {
+               /*
+                * Invalidate L1 cachelines.
+                * The subsequent load will cause cacheline fetch
+                * from the L2 and/or DRAM
+                */
+               asm volatile("dc ivac, %0\n"
+                            "dsb sy\n"
+                            : : "r"(addr + i * cline_size));
+       }
+
+       return 0;
+}
+
+static ssize_t thunderx_lmc_inject_ecc_write(struct file *file,
+                                            const char __user *data,
+                                            size_t count, loff_t *ppos)
+{
+       struct thunderx_lmc *lmc = file->private_data;
+
+       unsigned int cline_size = cache_line_size();
+
+       u8 tmp[cline_size];
+       void __iomem *addr;
+       unsigned int offs, timeout = 100000;
+
+       atomic_set(&lmc->ecc_int, 0);
+
+       lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
+
+       if (!lmc->mem)
+               return -ENOMEM;
+
+       addr = page_address(lmc->mem);
+
+       while (!atomic_read(&lmc->ecc_int) && timeout--) {
+               stop_machine(inject_ecc_fn, lmc, NULL);
+
+               for (offs = 0; offs < PAGE_SIZE; offs += sizeof(tmp)) {
+                       /*
+                        * Do a load from the previously rigged location
+                        * This should generate an error interrupt.
+                        */
+                       memcpy(tmp, addr + offs, cline_size);
+                       asm volatile("dsb ld\n");
+               }
+       }
+
+       __free_pages(lmc->mem, 0);
+
+       return count;
+}
+
+LMC_DEBUGFS_ENT(mask0);
+LMC_DEBUGFS_ENT(mask2);
+LMC_DEBUGFS_ENT(parity_test);
+
+DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL);
+DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL);
+DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read);
+
+struct debugfs_entry *lmc_dfs_ents[] = {
+       &debugfs_mask0,
+       &debugfs_mask2,
+       &debugfs_parity_test,
+       &debugfs_inject_ecc,
+       &debugfs_inject_int,
+       &debugfs_int_w1c,
+};
+
+static int thunderx_create_debugfs_nodes(struct dentry *parent,
+                                         struct debugfs_entry *attrs[],
+                                         void *data,
+                                         size_t num)
+{
+       int i;
+       struct dentry *ent;
+
+       if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
+               return 0;
+
+       if (!parent)
+               return -ENOENT;
+
+       for (i = 0; i < num; i++) {
+               ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
+                                              parent, data, &attrs[i]->fops);
+
+               if (!ent)
+                       break;
+       }
+
+       return i;
+}
+
+static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc)
+{
+       phys_addr_t addr = 0;
+       int bank, xbits;
+
+       addr |= lmc->node << 40;
+       addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb;
+       addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb;
+       addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb;
+       addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb;
+
+       bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb;
+
+       if (lmc->xor_bank)
+               bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width);
+
+       addr |= bank << lmc->bank_lsb;
+
+       xbits = PCI_FUNC(lmc->pdev->devfn);
+
+       if (lmc->l2c_alias)
+               xbits ^= get_bits(addr, 20, lmc->xbits) ^
+                        get_bits(addr, 12, lmc->xbits);
+
+       addr |= xbits << 7;
+
+       return addr;
+}
+
+static unsigned int thunderx_get_num_lmcs(unsigned int node)
+{
+       unsigned int number = 0;
+       struct pci_dev *pdev = NULL;
+
+       do {
+               pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+                                     PCI_DEVICE_ID_THUNDER_LMC,
+                                     pdev);
+               if (pdev) {
+#ifdef CONFIG_NUMA
+                       if (pdev->dev.numa_node == node)
+                               number++;
+#else
+                       number++;
+#endif
+               }
+       } while (pdev);
+
+       return number;
+}
+
+#define LMC_MESSAGE_SIZE       120
+#define LMC_OTHER_SIZE         (50 * ARRAY_SIZE(lmc_errors))
+
+static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id)
+{
+       struct mem_ctl_info *mci = dev_id;
+       struct thunderx_lmc *lmc = mci->pvt_info;
+
+       unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx));
+       struct lmc_err_ctx *ctx = &lmc->err_ctx[head];
+
+       writeq(0, lmc->regs + LMC_CHAR_MASK0);
+       writeq(0, lmc->regs + LMC_CHAR_MASK2);
+       writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST);
+
+       ctx->reg_int = readq(lmc->regs + LMC_INT);
+       ctx->reg_fadr = readq(lmc->regs + LMC_FADR);
+       ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR);
+       ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR);
+       ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND);
+
+       lmc->ring_head++;
+
+       atomic_set(&lmc->ecc_int, 1);
+
+       /* Clear the interrupt */
+       writeq(ctx->reg_int, lmc->regs + LMC_INT);
+
+       return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id)
+{
+       struct mem_ctl_info *mci = dev_id;
+       struct thunderx_lmc *lmc = mci->pvt_info;
+       phys_addr_t phys_addr;
+
+       unsigned long tail;
+       struct lmc_err_ctx *ctx;
+
+       irqreturn_t ret = IRQ_NONE;
+
+       char *msg;
+       char *other;
+
+       msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL);
+       other =  kmalloc(LMC_OTHER_SIZE, GFP_KERNEL);
+
+       if (!msg || !other)
+               goto err_free;
+
+       while (CIRC_CNT(lmc->ring_head, lmc->ring_tail,
+               ARRAY_SIZE(lmc->err_ctx))) {
+               tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx));
+
+               ctx = &lmc->err_ctx[tail];
+
+               dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n",
+                       ctx->reg_int);
+               dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n",
+                       ctx->reg_fadr);
+               dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n",
+                       ctx->reg_nxm_fadr);
+               dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n",
+                       ctx->reg_scram_fadr);
+               dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n",
+                       ctx->reg_ecc_synd);
+
+               snprintf(msg, LMC_MESSAGE_SIZE,
+                        "DIMM %lld rank %lld bank %lld row %lld col %lld",
+                        LMC_FADR_FDIMM(ctx->reg_scram_fadr),
+                        LMC_FADR_FBUNK(ctx->reg_scram_fadr),
+                        LMC_FADR_FBANK(ctx->reg_scram_fadr),
+                        LMC_FADR_FROW(ctx->reg_scram_fadr),
+                        LMC_FADR_FCOL(ctx->reg_scram_fadr));
+
+               decode_register(other, LMC_OTHER_SIZE, lmc_errors,
+                               ctx->reg_int);
+
+               phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc);
+
+               if (ctx->reg_int & LMC_INT_UE)
+                       edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+                                            phys_to_pfn(phys_addr),
+                                            offset_in_page(phys_addr),
+                                            0, -1, -1, -1, msg, other);
+               else if (ctx->reg_int & LMC_INT_CE)
+                       edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+                                            phys_to_pfn(phys_addr),
+                                            offset_in_page(phys_addr),
+                                            0, -1, -1, -1, msg, other);
+
+               lmc->ring_tail++;
+       }
+
+       ret = IRQ_HANDLED;
+
+err_free:
+       kfree(msg);
+       kfree(other);
+
+       return ret;
+}
+
+#ifdef CONFIG_PM
+static int thunderx_lmc_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       pci_save_state(pdev);
+       pci_disable_device(pdev);
+
+       pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+       return 0;
+}
+
+static int thunderx_lmc_resume(struct pci_dev *pdev)
+{
+       pci_set_power_state(pdev, PCI_D0);
+       pci_enable_wake(pdev, PCI_D0, 0);
+       pci_restore_state(pdev);
+
+       return 0;
+}
+#endif
+
+static const struct pci_device_id thunderx_lmc_pci_tbl[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
+       { 0, },
+};
+
+static inline int pci_dev_to_mc_idx(struct pci_dev *pdev)
+{
+       int node = dev_to_node(&pdev->dev);
+       int ret = PCI_FUNC(pdev->devfn);
+
+       ret += max(node, 0) << 3;
+
+       return ret;
+}
+
+static int thunderx_lmc_probe(struct pci_dev *pdev,
+                               const struct pci_device_id *id)
+{
+       struct thunderx_lmc *lmc;
+       struct edac_mc_layer layer;
+       struct mem_ctl_info *mci;
+       u64 lmc_control, lmc_ddr_pll_ctl, lmc_config;
+       int ret;
+       u64 lmc_int;
+       void *l2c_ioaddr;
+
+       layer.type = EDAC_MC_LAYER_SLOT;
+       layer.size = 2;
+       layer.is_virt_csrow = false;
+
+       ret = pcim_enable_device(pdev);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
+               return ret;
+       }
+
+       ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc");
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
+               return ret;
+       }
+
+       mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer,
+                           sizeof(struct thunderx_lmc));
+       if (!mci)
+               return -ENOMEM;
+
+       mci->pdev = &pdev->dev;
+       lmc = mci->pvt_info;
+
+       pci_set_drvdata(pdev, mci);
+
+       lmc->regs = pcim_iomap_table(pdev)[0];
+
+       lmc_control = readq(lmc->regs + LMC_CONTROL);
+       lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL);
+       lmc_config = readq(lmc->regs + LMC_CONFIG);
+
+       if (lmc_control & LMC_CONTROL_RDIMM) {
+               mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
+                                          lmc_ddr_pll_ctl) ?
+                               MEM_RDDR4 : MEM_RDDR3;
+       } else {
+               mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
+                                          lmc_ddr_pll_ctl) ?
+                               MEM_DDR4 : MEM_DDR3;
+       }
+
+       mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+       mci->edac_cap = EDAC_FLAG_SECDED;
+
+       mci->mod_name = "thunderx-lmc";
+       mci->mod_ver = "1";
+       mci->ctl_name = "thunderx-lmc";
+       mci->dev_name = dev_name(&pdev->dev);
+       mci->scrub_mode = SCRUB_NONE;
+
+       lmc->pdev = pdev;
+       lmc->msix_ent.entry = 0;
+
+       lmc->ring_head = 0;
+       lmc->ring_tail = 0;
+
+       ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
+               goto err_free;
+       }
+
+       ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector,
+                                       thunderx_lmc_err_isr,
+                                       thunderx_lmc_threaded_isr, 0,
+                                       "[EDAC] ThunderX LMC", mci);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret);
+               goto err_free;
+       }
+
+       lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0));
+
+       lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1;
+       lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) &&
+                          FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3;
+
+       lmc->pbank_lsb = (lmc_config >> 5) & 0xf;
+       lmc->dimm_lsb  = 28 + lmc->pbank_lsb + lmc->xbits;
+       lmc->rank_lsb = lmc->dimm_lsb;
+       lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0;
+       lmc->bank_lsb = 7 + lmc->xbits;
+       lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits;
+
+       lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width;
+
+       lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK;
+
+       l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node),
+                            PAGE_SIZE);
+
+       if (!l2c_ioaddr) {
+               dev_err(&pdev->dev, "Cannot map L2C_CTL\n");
+               goto err_free;
+       }
+
+       lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS);
+
+       iounmap(l2c_ioaddr);
+
+       ret = edac_mc_add_mc(mci);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret);
+               goto err_free;
+       }
+
+       lmc_int = readq(lmc->regs + LMC_INT);
+       writeq(lmc_int, lmc->regs + LMC_INT);
+
+       writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S);
+
+       if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
+               ret = thunderx_create_debugfs_nodes(mci->debugfs,
+                                                   lmc_dfs_ents,
+                                                   lmc,
+                                                   ARRAY_SIZE(lmc_dfs_ents));
+
+               if (ret != ARRAY_SIZE(lmc_dfs_ents)) {
+                       dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
+                                ret, ret >= 0 ? " created" : "");
+               }
+       }
+
+       return 0;
+
+err_free:
+       pci_set_drvdata(pdev, NULL);
+       edac_mc_free(mci);
+
+       return ret;
+}
+
+static void thunderx_lmc_remove(struct pci_dev *pdev)
+{
+       struct mem_ctl_info *mci = pci_get_drvdata(pdev);
+       struct thunderx_lmc *lmc = mci->pvt_info;
+
+       writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C);
+
+       edac_mc_del_mc(&pdev->dev);
+       edac_mc_free(mci);
+}
+
+MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl);
+
+static struct pci_driver thunderx_lmc_driver = {
+       .name     = "thunderx_lmc_edac",
+       .probe    = thunderx_lmc_probe,
+       .remove   = thunderx_lmc_remove,
+#ifdef CONFIG_PM
+       .suspend  = thunderx_lmc_suspend,
+       .resume   = thunderx_lmc_resume,
+#endif
+       .id_table = thunderx_lmc_pci_tbl,
+};
+
+/*---------------------- OCX driver ---------------------------------*/
+
+#define PCI_DEVICE_ID_THUNDER_OCX 0xa013
+
+#define OCX_LINK_INTS          3
+#define OCX_INTS               (OCX_LINK_INTS + 1)
+#define OCX_RX_LANES           24
+#define OCX_RX_LANE_STATS      15
+
+#define OCX_COM_INT            0x100
+#define OCX_COM_INT_W1S                0x108
+#define OCX_COM_INT_ENA_W1S    0x110
+#define OCX_COM_INT_ENA_W1C    0x118
+
+#define OCX_COM_IO_BADID               BIT(54)
+#define OCX_COM_MEM_BADID              BIT(53)
+#define OCX_COM_COPR_BADID             BIT(52)
+#define OCX_COM_WIN_REQ_BADID          BIT(51)
+#define OCX_COM_WIN_REQ_TOUT           BIT(50)
+#define OCX_COM_RX_LANE                        GENMASK(23, 0)
+
+#define OCX_COM_INT_CE                 (OCX_COM_IO_BADID      | \
+                                        OCX_COM_MEM_BADID     | \
+                                        OCX_COM_COPR_BADID    | \
+                                        OCX_COM_WIN_REQ_BADID | \
+                                        OCX_COM_WIN_REQ_TOUT)
+
+static const struct error_descr ocx_com_errors[] = {
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_COM_IO_BADID,
+               .descr = "Invalid IO transaction node ID",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_COM_MEM_BADID,
+               .descr = "Invalid memory transaction node ID",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_COM_COPR_BADID,
+               .descr = "Invalid coprocessor transaction node ID",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_COM_WIN_REQ_BADID,
+               .descr = "Invalid SLI transaction node ID",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_COM_WIN_REQ_TOUT,
+               .descr = "Window/core request timeout",
+       },
+       {0, 0, NULL},
+};
+
+#define OCX_COM_LINKX_INT(x)           (0x120 + (x) * 8)
+#define OCX_COM_LINKX_INT_W1S(x)       (0x140 + (x) * 8)
+#define OCX_COM_LINKX_INT_ENA_W1S(x)   (0x160 + (x) * 8)
+#define OCX_COM_LINKX_INT_ENA_W1C(x)   (0x180 + (x) * 8)
+
+#define OCX_COM_LINK_BAD_WORD                  BIT(13)
+#define OCX_COM_LINK_ALIGN_FAIL                        BIT(12)
+#define OCX_COM_LINK_ALIGN_DONE                        BIT(11)
+#define OCX_COM_LINK_UP                                BIT(10)
+#define OCX_COM_LINK_STOP                      BIT(9)
+#define OCX_COM_LINK_BLK_ERR                   BIT(8)
+#define OCX_COM_LINK_REINIT                    BIT(7)
+#define OCX_COM_LINK_LNK_DATA                  BIT(6)
+#define OCX_COM_LINK_RXFIFO_DBE                        BIT(5)
+#define OCX_COM_LINK_RXFIFO_SBE                        BIT(4)
+#define OCX_COM_LINK_TXFIFO_DBE                        BIT(3)
+#define OCX_COM_LINK_TXFIFO_SBE                        BIT(2)
+#define OCX_COM_LINK_REPLAY_DBE                        BIT(1)
+#define OCX_COM_LINK_REPLAY_SBE                        BIT(0)
+
+static const struct error_descr ocx_com_link_errors[] = {
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_COM_LINK_REPLAY_SBE,
+               .descr = "Replay buffer single-bit error",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_COM_LINK_TXFIFO_SBE,
+               .descr = "TX FIFO single-bit error",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_COM_LINK_RXFIFO_SBE,
+               .descr = "RX FIFO single-bit error",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_COM_LINK_BLK_ERR,
+               .descr = "Block code error",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_COM_LINK_ALIGN_FAIL,
+               .descr = "Link alignment failure",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_COM_LINK_BAD_WORD,
+               .descr = "Bad code word",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = OCX_COM_LINK_REPLAY_DBE,
+               .descr = "Replay buffer double-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = OCX_COM_LINK_TXFIFO_DBE,
+               .descr = "TX FIFO double-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = OCX_COM_LINK_RXFIFO_DBE,
+               .descr = "RX FIFO double-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = OCX_COM_LINK_STOP,
+               .descr = "Link stopped",
+       },
+       {0, 0, NULL},
+};
+
+#define OCX_COM_LINK_INT_UE       (OCX_COM_LINK_REPLAY_DBE | \
+                                  OCX_COM_LINK_TXFIFO_DBE | \
+                                  OCX_COM_LINK_RXFIFO_DBE | \
+                                  OCX_COM_LINK_STOP)
+
+#define OCX_COM_LINK_INT_CE       (OCX_COM_LINK_REPLAY_SBE | \
+                                  OCX_COM_LINK_TXFIFO_SBE | \
+                                  OCX_COM_LINK_RXFIFO_SBE | \
+                                  OCX_COM_LINK_BLK_ERR    | \
+                                  OCX_COM_LINK_ALIGN_FAIL | \
+                                  OCX_COM_LINK_BAD_WORD)
+
+#define OCX_LNE_INT(x)                 (0x8018 + (x) * 0x100)
+#define OCX_LNE_INT_EN(x)              (0x8020 + (x) * 0x100)
+#define OCX_LNE_BAD_CNT(x)             (0x8028 + (x) * 0x100)
+#define OCX_LNE_CFG(x)                 (0x8000 + (x) * 0x100)
+#define OCX_LNE_STAT(x, y)             (0x8040 + (x) * 0x100 + (y) * 8)
+
+#define OCX_LNE_CFG_RX_BDRY_LOCK_DIS           BIT(8)
+#define OCX_LNE_CFG_RX_STAT_WRAP_DIS           BIT(2)
+#define OCX_LNE_CFG_RX_STAT_RDCLR              BIT(1)
+#define OCX_LNE_CFG_RX_STAT_ENA                        BIT(0)
+
+
+#define OCX_LANE_BAD_64B67B                    BIT(8)
+#define OCX_LANE_DSKEW_FIFO_OVFL               BIT(5)
+#define OCX_LANE_SCRM_SYNC_LOSS                        BIT(4)
+#define OCX_LANE_UKWN_CNTL_WORD                        BIT(3)
+#define OCX_LANE_CRC32_ERR                     BIT(2)
+#define OCX_LANE_BDRY_SYNC_LOSS                        BIT(1)
+#define OCX_LANE_SERDES_LOCK_LOSS              BIT(0)
+
+#define OCX_COM_LANE_INT_UE       (0)
+#define OCX_COM_LANE_INT_CE       (OCX_LANE_SERDES_LOCK_LOSS | \
+                                  OCX_LANE_BDRY_SYNC_LOSS   | \
+                                  OCX_LANE_CRC32_ERR        | \
+                                  OCX_LANE_UKWN_CNTL_WORD   | \
+                                  OCX_LANE_SCRM_SYNC_LOSS   | \
+                                  OCX_LANE_DSKEW_FIFO_OVFL  | \
+                                  OCX_LANE_BAD_64B67B)
+
+static const struct error_descr ocx_lane_errors[] = {
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_LANE_SERDES_LOCK_LOSS,
+               .descr = "RX SerDes lock lost",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_LANE_BDRY_SYNC_LOSS,
+               .descr = "RX word boundary lost",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_LANE_CRC32_ERR,
+               .descr = "CRC32 error",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_LANE_UKWN_CNTL_WORD,
+               .descr = "Unknown control word",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_LANE_SCRM_SYNC_LOSS,
+               .descr = "Scrambler synchronization lost",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_LANE_DSKEW_FIFO_OVFL,
+               .descr = "RX deskew FIFO overflow",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = OCX_LANE_BAD_64B67B,
+               .descr = "Bad 64B/67B codeword",
+       },
+       {0, 0, NULL},
+};
+
+#define OCX_LNE_INT_ENA_ALL            (GENMASK(9, 8) | GENMASK(6, 0))
+#define OCX_COM_INT_ENA_ALL            (GENMASK(54, 50) | GENMASK(23, 0))
+#define OCX_COM_LINKX_INT_ENA_ALL      (GENMASK(13, 12) | \
+                                        GENMASK(9, 7) | GENMASK(5, 0))
+
+#define OCX_TLKX_ECC_CTL(x)            (0x10018 + (x) * 0x2000)
+#define OCX_RLKX_ECC_CTL(x)            (0x18018 + (x) * 0x2000)
+
+struct ocx_com_err_ctx {
+       u64 reg_com_int;
+       u64 reg_lane_int[OCX_RX_LANES];
+       u64 reg_lane_stat11[OCX_RX_LANES];
+};
+
+struct ocx_link_err_ctx {
+       u64 reg_com_link_int;
+       int link;
+};
+
+struct thunderx_ocx {
+       void __iomem *regs;
+       int com_link;
+       struct pci_dev *pdev;
+       struct edac_device_ctl_info *edac_dev;
+
+       struct dentry *debugfs;
+       struct msix_entry msix_ent[OCX_INTS];
+
+       struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES];
+       struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES];
+
+       unsigned long com_ring_head;
+       unsigned long com_ring_tail;
+
+       unsigned long link_ring_head;
+       unsigned long link_ring_tail;
+};
+
+#define OCX_MESSAGE_SIZE       SZ_1K
+#define OCX_OTHER_SIZE         (50 * ARRAY_SIZE(ocx_com_link_errors))
+
+/* This handler is threaded */
+static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id)
+{
+       struct msix_entry *msix = irq_id;
+       struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
+                                               msix_ent[msix->entry]);
+
+       int lane;
+       unsigned long head = ring_pos(ocx->com_ring_head,
+                                     ARRAY_SIZE(ocx->com_err_ctx));
+       struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head];
+
+       ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT);
+
+       for (lane = 0; lane < OCX_RX_LANES; lane++) {
+               ctx->reg_lane_int[lane] =
+                       readq(ocx->regs + OCX_LNE_INT(lane));
+               ctx->reg_lane_stat11[lane] =
+                       readq(ocx->regs + OCX_LNE_STAT(lane, 11));
+
+               writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane));
+       }
+
+       writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT);
+
+       ocx->com_ring_head++;
+
+       return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
+{
+       struct msix_entry *msix = irq_id;
+       struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
+                                               msix_ent[msix->entry]);
+
+       irqreturn_t ret = IRQ_NONE;
+
+       unsigned long tail;
+       struct ocx_com_err_ctx *ctx;
+       int lane;
+       char *msg;
+       char *other;
+
+       msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
+       other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
+
+       if (!msg || !other)
+               goto err_free;
+
+       while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail,
+                       ARRAY_SIZE(ocx->com_err_ctx))) {
+               tail = ring_pos(ocx->com_ring_tail,
+                               ARRAY_SIZE(ocx->com_err_ctx));
+               ctx = &ocx->com_err_ctx[tail];
+
+               snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx",
+                       ocx->edac_dev->ctl_name, ctx->reg_com_int);
+
+               decode_register(other, OCX_OTHER_SIZE,
+                               ocx_com_errors, ctx->reg_com_int);
+
+               strncat(msg, other, OCX_MESSAGE_SIZE);
+
+               for (lane = 0; lane < OCX_RX_LANES; lane++)
+                       if (ctx->reg_com_int & BIT(lane)) {
+                               snprintf(other, OCX_OTHER_SIZE,
+                                        "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx",
+                                        lane, ctx->reg_lane_int[lane],
+                                        lane, ctx->reg_lane_stat11[lane]);
+
+                               strncat(msg, other, OCX_MESSAGE_SIZE);
+
+                               decode_register(other, OCX_OTHER_SIZE,
+                                               ocx_lane_errors,
+                                               ctx->reg_lane_int[lane]);
+                               strncat(msg, other, OCX_MESSAGE_SIZE);
+                       }
+
+               if (ctx->reg_com_int & OCX_COM_INT_CE)
+                       edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
+
+               ocx->com_ring_tail++;
+       }
+
+       ret = IRQ_HANDLED;
+
+err_free:
+       kfree(other);
+       kfree(msg);
+
+       return ret;
+}
+
+static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id)
+{
+       struct msix_entry *msix = irq_id;
+       struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
+                                               msix_ent[msix->entry]);
+       unsigned long head = ring_pos(ocx->link_ring_head,
+                                     ARRAY_SIZE(ocx->link_err_ctx));
+       struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head];
+
+       ctx->link = msix->entry;
+       ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link));
+
+       writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link));
+
+       ocx->link_ring_head++;
+
+       return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id)
+{
+       struct msix_entry *msix = irq_id;
+       struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
+                                               msix_ent[msix->entry]);
+       irqreturn_t ret = IRQ_NONE;
+       unsigned long tail;
+       struct ocx_link_err_ctx *ctx;
+
+       char *msg;
+       char *other;
+
+       msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
+       other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
+
+       if (!msg || !other)
+               goto err_free;
+
+       while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail,
+                       ARRAY_SIZE(ocx->link_err_ctx))) {
+               tail = ring_pos(ocx->link_ring_head,
+                               ARRAY_SIZE(ocx->link_err_ctx));
+
+               ctx = &ocx->link_err_ctx[tail];
+
+               snprintf(msg, OCX_MESSAGE_SIZE,
+                        "%s: OCX_COM_LINK_INT[%d]: %016llx",
+                        ocx->edac_dev->ctl_name,
+                        ctx->link, ctx->reg_com_link_int);
+
+               decode_register(other, OCX_OTHER_SIZE,
+                               ocx_com_link_errors, ctx->reg_com_link_int);
+
+               strncat(msg, other, OCX_MESSAGE_SIZE);
+
+               if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE)
+                       edac_device_handle_ue(ocx->edac_dev, 0, 0, msg);
+               else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE)
+                       edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
+
+               ocx->link_ring_tail++;
+       }
+
+       ret = IRQ_HANDLED;
+err_free:
+       kfree(other);
+       kfree(msg);
+
+       return ret;
+}
+
+#define OCX_DEBUGFS_ATTR(_name, _reg)  DEBUGFS_REG_ATTR(ocx, _name, _reg)
+
+OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0));
+OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1));
+OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2));
+
+OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0));
+OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1));
+OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2));
+
+OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0));
+OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1));
+OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2));
+
+OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0));
+OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1));
+OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2));
+OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3));
+OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4));
+OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5));
+OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6));
+OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7));
+
+OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8));
+OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9));
+OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10));
+OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11));
+OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12));
+OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13));
+OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14));
+OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15));
+
+OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16));
+OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17));
+OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18));
+OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19));
+OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20));
+OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21));
+OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22));
+OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23));
+
+OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S);
+
+struct debugfs_entry *ocx_dfs_ents[] = {
+       &debugfs_tlk0_ecc_ctl,
+       &debugfs_tlk1_ecc_ctl,
+       &debugfs_tlk2_ecc_ctl,
+
+       &debugfs_rlk0_ecc_ctl,
+       &debugfs_rlk1_ecc_ctl,
+       &debugfs_rlk2_ecc_ctl,
+
+       &debugfs_com_link0_int,
+       &debugfs_com_link1_int,
+       &debugfs_com_link2_int,
+
+       &debugfs_lne00_badcnt,
+       &debugfs_lne01_badcnt,
+       &debugfs_lne02_badcnt,
+       &debugfs_lne03_badcnt,
+       &debugfs_lne04_badcnt,
+       &debugfs_lne05_badcnt,
+       &debugfs_lne06_badcnt,
+       &debugfs_lne07_badcnt,
+       &debugfs_lne08_badcnt,
+       &debugfs_lne09_badcnt,
+       &debugfs_lne10_badcnt,
+       &debugfs_lne11_badcnt,
+       &debugfs_lne12_badcnt,
+       &debugfs_lne13_badcnt,
+       &debugfs_lne14_badcnt,
+       &debugfs_lne15_badcnt,
+       &debugfs_lne16_badcnt,
+       &debugfs_lne17_badcnt,
+       &debugfs_lne18_badcnt,
+       &debugfs_lne19_badcnt,
+       &debugfs_lne20_badcnt,
+       &debugfs_lne21_badcnt,
+       &debugfs_lne22_badcnt,
+       &debugfs_lne23_badcnt,
+
+       &debugfs_com_int,
+};
+
+static const struct pci_device_id thunderx_ocx_pci_tbl[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) },
+       { 0, },
+};
+
+static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx)
+{
+       int lane, stat, cfg;
+
+       for (lane = 0; lane < OCX_RX_LANES; lane++) {
+               cfg = readq(ocx->regs + OCX_LNE_CFG(lane));
+               cfg |= OCX_LNE_CFG_RX_STAT_RDCLR;
+               cfg &= ~OCX_LNE_CFG_RX_STAT_ENA;
+               writeq(cfg, ocx->regs + OCX_LNE_CFG(lane));
+
+               for (stat = 0; stat < OCX_RX_LANE_STATS; stat++)
+                       readq(ocx->regs + OCX_LNE_STAT(lane, stat));
+       }
+}
+
+static int thunderx_ocx_probe(struct pci_dev *pdev,
+                             const struct pci_device_id *id)
+{
+       struct thunderx_ocx *ocx;
+       struct edac_device_ctl_info *edac_dev;
+       char name[32];
+       int idx;
+       int i;
+       int ret;
+       u64 reg;
+
+       ret = pcim_enable_device(pdev);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
+               return ret;
+       }
+
+       ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx");
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
+               return ret;
+       }
+
+       idx = edac_device_alloc_index();
+       snprintf(name, sizeof(name), "OCX%d", idx);
+       edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx),
+                                             name, 1, "CCPI", 1,
+                                             0, NULL, 0, idx);
+       if (!edac_dev) {
+               dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret);
+               return -ENOMEM;
+       }
+       ocx = edac_dev->pvt_info;
+       ocx->edac_dev = edac_dev;
+       ocx->com_ring_head = 0;
+       ocx->com_ring_tail = 0;
+       ocx->link_ring_head = 0;
+       ocx->link_ring_tail = 0;
+
+       ocx->regs = pcim_iomap_table(pdev)[0];
+       if (!ocx->regs) {
+               dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
+               ret = -ENODEV;
+               goto err_free;
+       }
+
+       ocx->pdev = pdev;
+
+       for (i = 0; i < OCX_INTS; i++) {
+               ocx->msix_ent[i].entry = i;
+               ocx->msix_ent[i].vector = 0;
+       }
+
+       ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
+               goto err_free;
+       }
+
+       for (i = 0; i < OCX_INTS; i++) {
+               ret = devm_request_threaded_irq(&pdev->dev,
+                                               ocx->msix_ent[i].vector,
+                                               (i == 3) ?
+                                                thunderx_ocx_com_isr :
+                                                thunderx_ocx_lnk_isr,
+                                               (i == 3) ?
+                                                thunderx_ocx_com_threaded_isr :
+                                                thunderx_ocx_lnk_threaded_isr,
+                                               0, "[EDAC] ThunderX OCX",
+                                               &ocx->msix_ent[i]);
+               if (ret)
+                       goto err_free;
+       }
+
+       edac_dev->dev = &pdev->dev;
+       edac_dev->dev_name = dev_name(&pdev->dev);
+       edac_dev->mod_name = "thunderx-ocx";
+       edac_dev->ctl_name = "thunderx-ocx";
+
+       ret = edac_device_add_device(edac_dev);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
+               goto err_free;
+       }
+
+       if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
+               ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
+
+               ret = thunderx_create_debugfs_nodes(ocx->debugfs,
+                                                   ocx_dfs_ents,
+                                                   ocx,
+                                                   ARRAY_SIZE(ocx_dfs_ents));
+               if (ret != ARRAY_SIZE(ocx_dfs_ents)) {
+                       dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
+                                ret, ret >= 0 ? " created" : "");
+               }
+       }
+
+       pci_set_drvdata(pdev, edac_dev);
+
+       thunderx_ocx_clearstats(ocx);
+
+       for (i = 0; i < OCX_RX_LANES; i++) {
+               writeq(OCX_LNE_INT_ENA_ALL,
+                      ocx->regs + OCX_LNE_INT_EN(i));
+
+               reg = readq(ocx->regs + OCX_LNE_INT(i));
+               writeq(reg, ocx->regs + OCX_LNE_INT(i));
+
+       }
+
+       for (i = 0; i < OCX_LINK_INTS; i++) {
+               reg = readq(ocx->regs + OCX_COM_LINKX_INT(i));
+               writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i));
+
+               writeq(OCX_COM_LINKX_INT_ENA_ALL,
+                      ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i));
+       }
+
+       reg = readq(ocx->regs + OCX_COM_INT);
+       writeq(reg, ocx->regs + OCX_COM_INT);
+
+       writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S);
+
+       return 0;
+err_free:
+       edac_device_free_ctl_info(edac_dev);
+
+       return ret;
+}
+
+static void thunderx_ocx_remove(struct pci_dev *pdev)
+{
+       struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
+       struct thunderx_ocx *ocx = edac_dev->pvt_info;
+       int i;
+
+       writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C);
+
+       for (i = 0; i < OCX_INTS; i++) {
+               writeq(OCX_COM_LINKX_INT_ENA_ALL,
+                      ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i));
+       }
+
+       edac_debugfs_remove_recursive(ocx->debugfs);
+
+       edac_device_del_device(&pdev->dev);
+       edac_device_free_ctl_info(edac_dev);
+}
+
+MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl);
+
+static struct pci_driver thunderx_ocx_driver = {
+       .name     = "thunderx_ocx_edac",
+       .probe    = thunderx_ocx_probe,
+       .remove   = thunderx_ocx_remove,
+       .id_table = thunderx_ocx_pci_tbl,
+};
+
+/*---------------------- L2C driver ---------------------------------*/
+
+#define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e
+#define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f
+#define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030
+
+#define L2C_TAD_INT_W1C                0x40000
+#define L2C_TAD_INT_W1S                0x40008
+
+#define L2C_TAD_INT_ENA_W1C    0x40020
+#define L2C_TAD_INT_ENA_W1S    0x40028
+
+
+#define L2C_TAD_INT_L2DDBE      BIT(1)
+#define L2C_TAD_INT_SBFSBE      BIT(2)
+#define L2C_TAD_INT_SBFDBE      BIT(3)
+#define L2C_TAD_INT_FBFSBE      BIT(4)
+#define L2C_TAD_INT_FBFDBE      BIT(5)
+#define L2C_TAD_INT_TAGDBE      BIT(9)
+#define L2C_TAD_INT_RDDISLMC    BIT(15)
+#define L2C_TAD_INT_WRDISLMC    BIT(16)
+#define L2C_TAD_INT_LFBTO       BIT(17)
+#define L2C_TAD_INT_GSYNCTO     BIT(18)
+#define L2C_TAD_INT_RTGSBE      BIT(32)
+#define L2C_TAD_INT_RTGDBE      BIT(33)
+#define L2C_TAD_INT_RDDISOCI    BIT(34)
+#define L2C_TAD_INT_WRDISOCI    BIT(35)
+
+#define L2C_TAD_INT_ECC                (L2C_TAD_INT_L2DDBE | \
+                                L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \
+                                L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE)
+
+#define L2C_TAD_INT_CE          (L2C_TAD_INT_SBFSBE | \
+                                L2C_TAD_INT_FBFSBE)
+
+#define L2C_TAD_INT_UE          (L2C_TAD_INT_L2DDBE | \
+                                L2C_TAD_INT_SBFDBE | \
+                                L2C_TAD_INT_FBFDBE | \
+                                L2C_TAD_INT_TAGDBE | \
+                                L2C_TAD_INT_RTGDBE | \
+                                L2C_TAD_INT_WRDISOCI | \
+                                L2C_TAD_INT_RDDISOCI | \
+                                L2C_TAD_INT_WRDISLMC | \
+                                L2C_TAD_INT_RDDISLMC | \
+                                L2C_TAD_INT_LFBTO    | \
+                                L2C_TAD_INT_GSYNCTO)
+
+static const struct error_descr l2_tad_errors[] = {
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = L2C_TAD_INT_SBFSBE,
+               .descr = "SBF single-bit error",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = L2C_TAD_INT_FBFSBE,
+               .descr = "FBF single-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_TAD_INT_L2DDBE,
+               .descr = "L2D double-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_TAD_INT_SBFDBE,
+               .descr = "SBF double-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_TAD_INT_FBFDBE,
+               .descr = "FBF double-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_TAD_INT_TAGDBE,
+               .descr = "TAG double-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_TAD_INT_RTGDBE,
+               .descr = "RTG double-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_TAD_INT_WRDISOCI,
+               .descr = "Write to a disabled CCPI",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_TAD_INT_RDDISOCI,
+               .descr = "Read from a disabled CCPI",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_TAD_INT_WRDISLMC,
+               .descr = "Write to a disabled LMC",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_TAD_INT_RDDISLMC,
+               .descr = "Read from a disabled LMC",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_TAD_INT_LFBTO,
+               .descr = "LFB entry timeout",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_TAD_INT_GSYNCTO,
+               .descr = "Global sync CCPI timeout",
+       },
+       {0, 0, NULL},
+};
+
+#define L2C_TAD_INT_TAG                (L2C_TAD_INT_TAGDBE)
+
+#define L2C_TAD_INT_RTG                (L2C_TAD_INT_RTGDBE)
+
+#define L2C_TAD_INT_DISLMC     (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC)
+
+#define L2C_TAD_INT_DISOCI     (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI)
+
+#define L2C_TAD_INT_ENA_ALL    (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \
+                                L2C_TAD_INT_RTG | \
+                                L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \
+                                L2C_TAD_INT_LFBTO)
+
+#define L2C_TAD_TIMETWO                0x50000
+#define L2C_TAD_TIMEOUT                0x50100
+#define L2C_TAD_ERR            0x60000
+#define L2C_TAD_TQD_ERR                0x60100
+#define L2C_TAD_TTG_ERR                0x60200
+
+
+#define L2C_CBC_INT_W1C                0x60000
+
+#define L2C_CBC_INT_RSDSBE      BIT(0)
+#define L2C_CBC_INT_RSDDBE      BIT(1)
+
+#define L2C_CBC_INT_RSD                 (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE)
+
+#define L2C_CBC_INT_MIBSBE      BIT(4)
+#define L2C_CBC_INT_MIBDBE      BIT(5)
+
+#define L2C_CBC_INT_MIB                 (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE)
+
+#define L2C_CBC_INT_IORDDISOCI  BIT(6)
+#define L2C_CBC_INT_IOWRDISOCI  BIT(7)
+
+#define L2C_CBC_INT_IODISOCI    (L2C_CBC_INT_IORDDISOCI | \
+                                 L2C_CBC_INT_IOWRDISOCI)
+
+#define L2C_CBC_INT_CE          (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE)
+#define L2C_CBC_INT_UE          (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE)
+
+
+static const struct error_descr l2_cbc_errors[] = {
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = L2C_CBC_INT_RSDSBE,
+               .descr = "RSD single-bit error",
+       },
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = L2C_CBC_INT_MIBSBE,
+               .descr = "MIB single-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_CBC_INT_RSDDBE,
+               .descr = "RSD double-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_CBC_INT_MIBDBE,
+               .descr = "MIB double-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_CBC_INT_IORDDISOCI,
+               .descr = "Read from a disabled CCPI",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_CBC_INT_IOWRDISOCI,
+               .descr = "Write to a disabled CCPI",
+       },
+       {0, 0, NULL},
+};
+
+#define L2C_CBC_INT_W1S                0x60008
+#define L2C_CBC_INT_ENA_W1C    0x60020
+
+#define L2C_CBC_INT_ENA_ALL     (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \
+                                 L2C_CBC_INT_IODISOCI)
+
+#define L2C_CBC_INT_ENA_W1S    0x60028
+
+#define L2C_CBC_IODISOCIERR    0x80008
+#define L2C_CBC_IOCERR         0x80010
+#define L2C_CBC_RSDERR         0x80018
+#define L2C_CBC_MIBERR         0x80020
+
+
+#define L2C_MCI_INT_W1C                0x0
+
+#define L2C_MCI_INT_VBFSBE      BIT(0)
+#define L2C_MCI_INT_VBFDBE      BIT(1)
+
+static const struct error_descr l2_mci_errors[] = {
+       {
+               .type  = ERR_CORRECTED,
+               .mask  = L2C_MCI_INT_VBFSBE,
+               .descr = "VBF single-bit error",
+       },
+       {
+               .type  = ERR_UNCORRECTED,
+               .mask  = L2C_MCI_INT_VBFDBE,
+               .descr = "VBF double-bit error",
+       },
+       {0, 0, NULL},
+};
+
+#define L2C_MCI_INT_W1S                0x8
+#define L2C_MCI_INT_ENA_W1C    0x20
+
+#define L2C_MCI_INT_ENA_ALL     (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE)
+
+#define L2C_MCI_INT_ENA_W1S    0x28
+
+#define L2C_MCI_ERR            0x10000
+
+#define L2C_MESSAGE_SIZE       SZ_1K
+#define L2C_OTHER_SIZE         (50 * ARRAY_SIZE(l2_tad_errors))
+
+struct l2c_err_ctx {
+       char *reg_ext_name;
+       u64  reg_int;
+       u64  reg_ext;
+};
+
+struct thunderx_l2c {
+       void __iomem *regs;
+       struct pci_dev *pdev;
+       struct edac_device_ctl_info *edac_dev;
+
+       struct dentry *debugfs;
+
+       int index;
+
+       struct msix_entry msix_ent;
+
+       struct l2c_err_ctx err_ctx[RING_ENTRIES];
+       unsigned long ring_head;
+       unsigned long ring_tail;
+};
+
+static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id)
+{
+       struct msix_entry *msix = irq_id;
+       struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c,
+                                               msix_ent);
+
+       unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx));
+       struct l2c_err_ctx *ctx = &tad->err_ctx[head];
+
+       ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C);
+
+       if (ctx->reg_int & L2C_TAD_INT_ECC) {
+               ctx->reg_ext_name = "TQD_ERR";
+               ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR);
+       } else if (ctx->reg_int & L2C_TAD_INT_TAG) {
+               ctx->reg_ext_name = "TTG_ERR";
+               ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR);
+       } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) {
+               ctx->reg_ext_name = "TIMEOUT";
+               ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT);
+       } else if (ctx->reg_int & L2C_TAD_INT_DISOCI) {
+               ctx->reg_ext_name = "ERR";
+               ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR);
+       }
+
+       writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C);
+
+       tad->ring_head++;
+
+       return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id)
+{
+       struct msix_entry *msix = irq_id;
+       struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c,
+                                               msix_ent);
+
+       unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx));
+       struct l2c_err_ctx *ctx = &cbc->err_ctx[head];
+
+       ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C);
+
+       if (ctx->reg_int & L2C_CBC_INT_RSD) {
+               ctx->reg_ext_name = "RSDERR";
+               ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR);
+       } else if (ctx->reg_int & L2C_CBC_INT_MIB) {
+               ctx->reg_ext_name = "MIBERR";
+               ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR);
+       } else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) {
+               ctx->reg_ext_name = "IODISOCIERR";
+               ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR);
+       }
+
+       writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C);
+
+       cbc->ring_head++;
+
+       return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id)
+{
+       struct msix_entry *msix = irq_id;
+       struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c,
+                                               msix_ent);
+
+       unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx));
+       struct l2c_err_ctx *ctx = &mci->err_ctx[head];
+
+       ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C);
+       ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR);
+
+       writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C);
+
+       ctx->reg_ext_name = "ERR";
+
+       mci->ring_head++;
+
+       return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
+{
+       struct msix_entry *msix = irq_id;
+       struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c,
+                                               msix_ent);
+
+       unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx));
+       struct l2c_err_ctx *ctx = &l2c->err_ctx[tail];
+       irqreturn_t ret = IRQ_NONE;
+
+       u64 mask_ue, mask_ce;
+       const struct error_descr *l2_errors;
+       char *reg_int_name;
+
+       char *msg;
+       char *other;
+
+       msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
+       other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
+
+       if (!msg || !other)
+               goto err_free;
+
+       switch (l2c->pdev->device) {
+       case PCI_DEVICE_ID_THUNDER_L2C_TAD:
+               reg_int_name = "L2C_TAD_INT";
+               mask_ue = L2C_TAD_INT_UE;
+               mask_ce = L2C_TAD_INT_CE;
+               l2_errors = l2_tad_errors;
+               break;
+       case PCI_DEVICE_ID_THUNDER_L2C_CBC:
+               reg_int_name = "L2C_CBC_INT";
+               mask_ue = L2C_CBC_INT_UE;
+               mask_ce = L2C_CBC_INT_CE;
+               l2_errors = l2_cbc_errors;
+               break;
+       case PCI_DEVICE_ID_THUNDER_L2C_MCI:
+               reg_int_name = "L2C_MCI_INT";
+               mask_ue = L2C_MCI_INT_VBFDBE;
+               mask_ce = L2C_MCI_INT_VBFSBE;
+               l2_errors = l2_mci_errors;
+               break;
+       default:
+               dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
+                       l2c->pdev->device);
+               return IRQ_NONE;
+       }
+
+       while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
+                       ARRAY_SIZE(l2c->err_ctx))) {
+               snprintf(msg, L2C_MESSAGE_SIZE,
+                        "%s: %s: %016llx, %s: %016llx",
+                        l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int,
+                        ctx->reg_ext_name, ctx->reg_ext);
+
+               decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int);
+
+               strncat(msg, other, L2C_MESSAGE_SIZE);
+
+               if (ctx->reg_int & mask_ue)
+                       edac_device_handle_ue(l2c->edac_dev, 0, 0, msg);
+               else if (ctx->reg_int & mask_ce)
+                       edac_device_handle_ce(l2c->edac_dev, 0, 0, msg);
+
+               l2c->ring_tail++;
+       }
+
+       return IRQ_HANDLED;
+
+err_free:
+       kfree(other);
+       kfree(msg);
+
+       return ret;
+}
+
+#define L2C_DEBUGFS_ATTR(_name, _reg)  DEBUGFS_REG_ATTR(l2c, _name, _reg)
+
+L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S);
+
+struct debugfs_entry *l2c_tad_dfs_ents[] = {
+       &debugfs_tad_int,
+};
+
+L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S);
+
+struct debugfs_entry *l2c_cbc_dfs_ents[] = {
+       &debugfs_cbc_int,
+};
+
+L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S);
+
+struct debugfs_entry *l2c_mci_dfs_ents[] = {
+       &debugfs_mci_int,
+};
+
+static const struct pci_device_id thunderx_l2c_pci_tbl[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), },
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), },
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), },
+       { 0, },
+};
+
+static int thunderx_l2c_probe(struct pci_dev *pdev,
+                             const struct pci_device_id *id)
+{
+       struct thunderx_l2c *l2c;
+       struct edac_device_ctl_info *edac_dev;
+       struct debugfs_entry **l2c_devattr;
+       size_t dfs_entries;
+       irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL;
+       char name[32];
+       const char *fmt;
+       u64 reg_en_offs, reg_en_mask;
+       int idx;
+       int ret;
+
+       ret = pcim_enable_device(pdev);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
+               return ret;
+       }
+
+       ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c");
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
+               return ret;
+       }
+
+       switch (pdev->device) {
+       case PCI_DEVICE_ID_THUNDER_L2C_TAD:
+               thunderx_l2c_isr = thunderx_l2c_tad_isr;
+               l2c_devattr = l2c_tad_dfs_ents;
+               dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents);
+               fmt = "L2C-TAD%d";
+               reg_en_offs = L2C_TAD_INT_ENA_W1S;
+               reg_en_mask = L2C_TAD_INT_ENA_ALL;
+               break;
+       case PCI_DEVICE_ID_THUNDER_L2C_CBC:
+               thunderx_l2c_isr = thunderx_l2c_cbc_isr;
+               l2c_devattr = l2c_cbc_dfs_ents;
+               dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents);
+               fmt = "L2C-CBC%d";
+               reg_en_offs = L2C_CBC_INT_ENA_W1S;
+               reg_en_mask = L2C_CBC_INT_ENA_ALL;
+               break;
+       case PCI_DEVICE_ID_THUNDER_L2C_MCI:
+               thunderx_l2c_isr = thunderx_l2c_mci_isr;
+               l2c_devattr = l2c_mci_dfs_ents;
+               dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents);
+               fmt = "L2C-MCI%d";
+               reg_en_offs = L2C_MCI_INT_ENA_W1S;
+               reg_en_mask = L2C_MCI_INT_ENA_ALL;
+               break;
+       default:
+               //Should never ever get here
+               dev_err(&pdev->dev, "Unsupported PCI device: %04x\n",
+                       pdev->device);
+               return -EINVAL;
+       }
+
+       idx = edac_device_alloc_index();
+       snprintf(name, sizeof(name), fmt, idx);
+
+       edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c),
+                                             name, 1, "L2C", 1, 0,
+                                             NULL, 0, idx);
+       if (!edac_dev) {
+               dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
+               return -ENOMEM;
+       }
+
+       l2c = edac_dev->pvt_info;
+       l2c->edac_dev = edac_dev;
+
+       l2c->regs = pcim_iomap_table(pdev)[0];
+       if (!l2c->regs) {
+               dev_err(&pdev->dev, "Cannot map PCI resources\n");
+               ret = -ENODEV;
+               goto err_free;
+       }
+
+       l2c->pdev = pdev;
+
+       l2c->ring_head = 0;
+       l2c->ring_tail = 0;
+
+       l2c->msix_ent.entry = 0;
+       l2c->msix_ent.vector = 0;
+
+       ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
+               goto err_free;
+       }
+
+       ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector,
+                                       thunderx_l2c_isr,
+                                       thunderx_l2c_threaded_isr,
+                                       0, "[EDAC] ThunderX L2C",
+                                       &l2c->msix_ent);
+       if (ret)
+               goto err_free;
+
+       edac_dev->dev = &pdev->dev;
+       edac_dev->dev_name = dev_name(&pdev->dev);
+       edac_dev->mod_name = "thunderx-l2c";
+       edac_dev->ctl_name = "thunderx-l2c";
+
+       ret = edac_device_add_device(edac_dev);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
+               goto err_free;
+       }
+
+       if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
+               l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
+
+               thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
+                                             l2c, dfs_entries);
+
+               if (ret != dfs_entries) {
+                       dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
+                                ret, ret >= 0 ? " created" : "");
+               }
+       }
+
+       pci_set_drvdata(pdev, edac_dev);
+
+       writeq(reg_en_mask, l2c->regs + reg_en_offs);
+
+       return 0;
+
+err_free:
+       edac_device_free_ctl_info(edac_dev);
+
+       return ret;
+}
+
+static void thunderx_l2c_remove(struct pci_dev *pdev)
+{
+       struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
+       struct thunderx_l2c *l2c = edac_dev->pvt_info;
+
+       switch (pdev->device) {
+       case PCI_DEVICE_ID_THUNDER_L2C_TAD:
+               writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C);
+               break;
+       case PCI_DEVICE_ID_THUNDER_L2C_CBC:
+               writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C);
+               break;
+       case PCI_DEVICE_ID_THUNDER_L2C_MCI:
+               writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C);
+               break;
+       }
+
+       edac_debugfs_remove_recursive(l2c->debugfs);
+
+       edac_device_del_device(&pdev->dev);
+       edac_device_free_ctl_info(edac_dev);
+}
+
+MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl);
+
+static struct pci_driver thunderx_l2c_driver = {
+       .name     = "thunderx_l2c_edac",
+       .probe    = thunderx_l2c_probe,
+       .remove   = thunderx_l2c_remove,
+       .id_table = thunderx_l2c_pci_tbl,
+};
+
+static int __init thunderx_edac_init(void)
+{
+       int rc = 0;
+
+       rc = pci_register_driver(&thunderx_lmc_driver);
+       if (rc)
+               return rc;
+
+       rc = pci_register_driver(&thunderx_ocx_driver);
+       if (rc)
+               goto err_lmc;
+
+       rc = pci_register_driver(&thunderx_l2c_driver);
+       if (rc)
+               goto err_ocx;
+
+       return rc;
+err_ocx:
+       pci_unregister_driver(&thunderx_ocx_driver);
+err_lmc:
+       pci_unregister_driver(&thunderx_lmc_driver);
+
+       return rc;
+}
+
+static void __exit thunderx_edac_exit(void)
+{
+       pci_unregister_driver(&thunderx_l2c_driver);
+       pci_unregister_driver(&thunderx_ocx_driver);
+       pci_unregister_driver(&thunderx_lmc_driver);
+
+}
+
+module_init(thunderx_edac_init);
+module_exit(thunderx_edac_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Cavium, Inc.");
+MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX");
diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c

index 6c270d9d304a8eafb7464f576fdff119cb4cf27a..669246056812e8d361040fb4ce6f098574460768 100644 (file)
--- a/drivers/edac/xgene_edac.c
+++ b/drivers/edac/xgene_edac.c
@@ -1596,7 +1596,7 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
         reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
         if (!reg)
                 goto chk_iob_axi0;
-       dev_err(edac_dev->dev, "IOB procesing agent (PA) transaction error\n");
+       dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n");
         if (reg & IOBPA_RDATA_CORRUPT_MASK)
                 dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
         if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig

index 96bbae579c0b01cfdc3798e8fdb03948cb391280..fc09c76248b41630c8e849db0d8bf6826fb0e9aa 100644 (file)
--- a/drivers/extcon/Kconfig
+++ b/drivers/extcon/Kconfig
@@ -44,7 +44,7 @@ config EXTCON_GPIO
  
  config EXTCON_INTEL_INT3496
         tristate "Intel INT3496 ACPI device extcon driver"
-       depends on GPIOLIB && ACPI
+       depends on GPIOLIB && ACPI && (X86 || COMPILE_TEST)
         help
           Say Y here to enable extcon support for USB OTG ports controlled by
           an Intel INT3496 ACPI device.
diff --git a/drivers/extcon/devres.c b/drivers/extcon/devres.c

index b40eb18059273715b9ab5e7b30edfaf3ab448743..186fd735eb2866ba087ac155bbcb845de6159bc9 100644 (file)
--- a/drivers/extcon/devres.c
+++ b/drivers/extcon/devres.c
@@ -50,6 +50,13 @@ static void devm_extcon_dev_notifier_unreg(struct device *dev, void *res)
         extcon_unregister_notifier(this->edev, this->id, this->nb);
  }
  
+static void devm_extcon_dev_notifier_all_unreg(struct device *dev, void *res)
+{
+       struct extcon_dev_notifier_devres *this = res;
+
+       extcon_unregister_notifier_all(this->edev, this->nb);
+}
+
  /**
   * devm_extcon_dev_allocate - Allocate managed extcon device
   * @dev:               device owning the extcon device being created
@@ -214,3 +221,57 @@ void devm_extcon_unregister_notifier(struct device *dev,
                                devm_extcon_dev_match, edev));
  }
  EXPORT_SYMBOL(devm_extcon_unregister_notifier);
+
+/**
+ * devm_extcon_register_notifier_all()
+ *             - Resource-managed extcon_register_notifier_all()
+ * @dev:       device to allocate extcon device
+ * @edev:      the extcon device that has the external connecotr.
+ * @nb:                a notifier block to be registered.
+ *
+ * This function manages automatically the notifier of extcon device using
+ * device resource management and simplify the control of unregistering
+ * the notifier of extcon device. To get more information, refer that function.
+ *
+ * Returns 0 if success or negaive error number if failure.
+ */
+int devm_extcon_register_notifier_all(struct device *dev, struct extcon_dev *edev,
+                               struct notifier_block *nb)
+{
+       struct extcon_dev_notifier_devres *ptr;
+       int ret;
+
+       ptr = devres_alloc(devm_extcon_dev_notifier_all_unreg, sizeof(*ptr),
+                               GFP_KERNEL);
+       if (!ptr)
+               return -ENOMEM;
+
+       ret = extcon_register_notifier_all(edev, nb);
+       if (ret) {
+               devres_free(ptr);
+               return ret;
+       }
+
+       ptr->edev = edev;
+       ptr->nb = nb;
+       devres_add(dev, ptr);
+
+       return 0;
+}
+EXPORT_SYMBOL(devm_extcon_register_notifier_all);
+
+/**
+ * devm_extcon_unregister_notifier_all()
+ *             - Resource-managed extcon_unregister_notifier_all()
+ * @dev:       device to allocate extcon device
+ * @edev:      the extcon device that has the external connecotr.
+ * @nb:                a notifier block to be registered.
+ */
+void devm_extcon_unregister_notifier_all(struct device *dev,
+                               struct extcon_dev *edev,
+                               struct notifier_block *nb)
+{
+       WARN_ON(devres_release(dev, devm_extcon_dev_notifier_all_unreg,
+                              devm_extcon_dev_match, edev));
+}
+EXPORT_SYMBOL(devm_extcon_unregister_notifier_all);
diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c

index a3131b036de6810281854730c429a17117e6832d..9d17984bbbd49a810ac749f06013c282a006c46f 100644 (file)
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -45,6 +45,17 @@ static const unsigned int int3496_cable[] = {
         EXTCON_NONE,
  };
  
+static const struct acpi_gpio_params id_gpios = { INT3496_GPIO_USB_ID, 0, false };
+static const struct acpi_gpio_params vbus_gpios = { INT3496_GPIO_VBUS_EN, 0, false };
+static const struct acpi_gpio_params mux_gpios = { INT3496_GPIO_USB_MUX, 0, false };
+
+static const struct acpi_gpio_mapping acpi_int3496_default_gpios[] = {
+       { "id-gpios", &id_gpios, 1 },
+       { "vbus-gpios", &vbus_gpios, 1 },
+       { "mux-gpios", &mux_gpios, 1 },
+       { },
+};
+
  static void int3496_do_usb_id(struct work_struct *work)
  {
         struct int3496_data *data =
@@ -83,6 +94,13 @@ static int int3496_probe(struct platform_device *pdev)
         struct int3496_data *data;
         int ret;
  
+       ret = acpi_dev_add_driver_gpios(ACPI_COMPANION(dev),
+                                       acpi_int3496_default_gpios);
+       if (ret) {
+               dev_err(dev, "can't add GPIO ACPI mapping\n");
+               return ret;
+       }
+
         data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
         if (!data)
                 return -ENOMEM;
@@ -90,30 +108,27 @@ static int int3496_probe(struct platform_device *pdev)
         data->dev = dev;
         INIT_DELAYED_WORK(&data->work, int3496_do_usb_id);
  
-       data->gpio_usb_id = devm_gpiod_get_index(dev, "id",
-                                               INT3496_GPIO_USB_ID,
-                                               GPIOD_IN);
+       data->gpio_usb_id = devm_gpiod_get(dev, "id", GPIOD_IN);
         if (IS_ERR(data->gpio_usb_id)) {
                 ret = PTR_ERR(data->gpio_usb_id);
                 dev_err(dev, "can't request USB ID GPIO: %d\n", ret);
                 return ret;
+       } else if (gpiod_get_direction(data->gpio_usb_id) != GPIOF_DIR_IN) {
+               dev_warn(dev, FW_BUG "USB ID GPIO not in input mode, fixing\n");
+               gpiod_direction_input(data->gpio_usb_id);
         }
  
         data->usb_id_irq = gpiod_to_irq(data->gpio_usb_id);
-       if (data->usb_id_irq <= 0) {
+       if (data->usb_id_irq < 0) {
                 dev_err(dev, "can't get USB ID IRQ: %d\n", data->usb_id_irq);
-               return -EINVAL;
+               return data->usb_id_irq;
         }
  
-       data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus en",
-                                                INT3496_GPIO_VBUS_EN,
-                                                GPIOD_ASIS);
+       data->gpio_vbus_en = devm_gpiod_get(dev, "vbus", GPIOD_ASIS);
         if (IS_ERR(data->gpio_vbus_en))
                 dev_info(dev, "can't request VBUS EN GPIO\n");
  
-       data->gpio_usb_mux = devm_gpiod_get_index(dev, "usb mux",
-                                                INT3496_GPIO_USB_MUX,
-                                                GPIOD_ASIS);
+       data->gpio_usb_mux = devm_gpiod_get(dev, "mux", GPIOD_ASIS);
         if (IS_ERR(data->gpio_usb_mux))
                 dev_info(dev, "can't request USB MUX GPIO\n");
  
@@ -154,6 +169,8 @@ static int int3496_remove(struct platform_device *pdev)
         devm_free_irq(&pdev->dev, data->usb_id_irq, data);
         cancel_delayed_work_sync(&data->work);
  
+       acpi_dev_remove_driver_gpios(ACPI_COMPANION(&pdev->dev));
+
         return 0;
  }
  
diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c

index 09ac5e70c2f38706909d27a8369d357178d989bd..e7750545469f8f86ec0c787e0da0ec835e9a36e0 100644 (file)
--- a/drivers/extcon/extcon.c
+++ b/drivers/extcon/extcon.c
@@ -448,8 +448,19 @@ int extcon_sync(struct extcon_dev *edev, unsigned int id)
         spin_lock_irqsave(&edev->lock, flags);
  
         state = !!(edev->state & BIT(index));
+
+       /*
+        * Call functions in a raw notifier chain for the specific one
+        * external connector.
+        */
         raw_notifier_call_chain(&edev->nh[index], state, edev);
  
+       /*
+        * Call functions in a raw notifier chain for the all supported
+        * external connectors.
+        */
+       raw_notifier_call_chain(&edev->nh_all, state, edev);
+
         /* This could be in interrupt handler */
         prop_buf = (char *)get_zeroed_page(GFP_ATOMIC);
         if (!prop_buf) {
@@ -954,6 +965,59 @@ int extcon_unregister_notifier(struct extcon_dev *edev, unsigned int id,
  }
  EXPORT_SYMBOL_GPL(extcon_unregister_notifier);
  
+/**
+ * extcon_register_notifier_all() - Register a notifier block for all connectors
+ * @edev:      the extcon device that has the external connecotr.
+ * @nb:                a notifier block to be registered.
+ *
+ * This fucntion registers a notifier block in order to receive the state
+ * change of all supported external connectors from extcon device.
+ * And The second parameter given to the callback of nb (val) is
+ * the current state and third parameter is the edev pointer.
+ *
+ * Returns 0 if success or error number if fail
+ */
+int extcon_register_notifier_all(struct extcon_dev *edev,
+                               struct notifier_block *nb)
+{
+       unsigned long flags;
+       int ret;
+
+       if (!edev || !nb)
+               return -EINVAL;
+
+       spin_lock_irqsave(&edev->lock, flags);
+       ret = raw_notifier_chain_register(&edev->nh_all, nb);
+       spin_unlock_irqrestore(&edev->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(extcon_register_notifier_all);
+
+/**
+ * extcon_unregister_notifier_all() - Unregister a notifier block from extcon.
+ * @edev:      the extcon device that has the external connecotr.
+ * @nb:                a notifier block to be registered.
+ *
+ * Returns 0 if success or error number if fail
+ */
+int extcon_unregister_notifier_all(struct extcon_dev *edev,
+                               struct notifier_block *nb)
+{
+       unsigned long flags;
+       int ret;
+
+       if (!edev || !nb)
+               return -EINVAL;
+
+       spin_lock_irqsave(&edev->lock, flags);
+       ret = raw_notifier_chain_unregister(&edev->nh_all, nb);
+       spin_unlock_irqrestore(&edev->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(extcon_unregister_notifier_all);
+
  static struct attribute *extcon_attrs[] = {
         &dev_attr_state.attr,
         &dev_attr_name.attr,
@@ -1212,6 +1276,8 @@ int extcon_dev_register(struct extcon_dev *edev)
         for (index = 0; index < edev->max_supported; index++)
                 RAW_INIT_NOTIFIER_HEAD(&edev->nh[index]);
  
+       RAW_INIT_NOTIFIER_HEAD(&edev->nh_all);
+
         dev_set_drvdata(&edev->dev, edev);
         edev->state = 0;
  
diff --git a/drivers/extcon/extcon.h b/drivers/extcon/extcon.h

index 993ddccafe113af072151ebbdc0b16571c63f793..dddddcfa05873ed9ad8e77779515c509f2499d8e 100644 (file)
--- a/drivers/extcon/extcon.h
+++ b/drivers/extcon/extcon.h
@@ -21,6 +21,8 @@
   * @dev:               Device of this extcon.
   * @state:             Attach/detach state of this extcon. Do not provide at
   *                     register-time.
+ * @nh_all:            Notifier for the state change events for all supported
+ *                     external connectors from this extcon.
   * @nh:                        Notifier for the state change events from this extcon
   * @entry:             To support list of extcon devices so that users can
   *                     search for extcon devices based on the extcon name.
@@ -43,6 +45,7 @@ struct extcon_dev {
  
         /* Internal data. Please do not set. */
         struct device dev;
+       struct raw_notifier_head nh_all;
         struct raw_notifier_head *nh;
         struct list_head entry;
         int max_supported;
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c

index 349dc3e1e52e0a67bea26d971aa7d7e2764caabe..974c5a31a00598e0bcdb1742967cc7cca691a7cb 100644 (file)
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -65,6 +65,7 @@ static bool __init efi_virtmap_init(void)
         bool systab_found;
  
         efi_mm.pgd = pgd_alloc(&efi_mm);
+       mm_init_cpumask(&efi_mm);
         init_new_context(NULL, &efi_mm);
  
         systab_found = false;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c

index e7d404059b7316a5c5668f609ceb5957fdcdd97d..b372aad3b449c39a85daa7d8df1a417741a83a73 100644 (file)
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -389,7 +389,6 @@ int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
                         return 0;
                 }
         }
-       pr_err_once("requested map not found.\n");
         return -ENOENT;
  }
  
diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c

index 08b026864d4e7d5f00b76cbd95b7398c6db943f8..8554d7aec31c640b6e845ec6502fb2d538408fae 100644 (file)
--- a/drivers/firmware/efi/esrt.c
+++ b/drivers/firmware/efi/esrt.c
@@ -254,7 +254,7 @@ void __init efi_esrt_init(void)
  
         rc = efi_mem_desc_lookup(efi.esrt, &md);
         if (rc < 0) {
-               pr_err("ESRT header is not in the memory map.\n");
+               pr_warn("ESRT header is not in the memory map.\n");
                 return;
         }
  
diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c

index 932742e4cf23147e304d6df2a1c8f37c8f89d296..24c461dea7afb146a509e097b581aa2fdaede132 100644 (file)
--- a/drivers/firmware/efi/libstub/gop.c
+++ b/drivers/firmware/efi/libstub/gop.c
@@ -149,7 +149,8 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
  
                 status = __gop_query32(sys_table_arg, gop32, &info, &size,
                                        &current_fb_base);
-               if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
+               if (status == EFI_SUCCESS && (!first_gop || conout_found) &&
+                   info->pixel_format != PIXEL_BLT_ONLY) {
                         /*
                          * Systems that use the UEFI Console Splitter may
                          * provide multiple GOP devices, not all of which are
@@ -266,7 +267,8 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
  
                 status = __gop_query64(sys_table_arg, gop64, &info, &size,
                                        &current_fb_base);
-               if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
+               if (status == EFI_SUCCESS && (!first_gop || conout_found) &&
+                   info->pixel_format != PIXEL_BLT_ONLY) {
                         /*
                          * Systems that use the UEFI Console Splitter may
                          * provide multiple GOP devices, not all of which are
diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c

index 6def402bf5691f504292b88ff994b9a472fa2bc5..5da36e56b36a1cc29c971934559e5852bed0e7bc 100644 (file)
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -45,6 +45,8 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
         size = sizeof(secboot);
         status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid,
                              NULL, &size, &secboot);
+       if (status == EFI_NOT_FOUND)
+               return efi_secureboot_mode_disabled;
         if (status != EFI_SUCCESS)
                 goto out_efi_err;
  
@@ -78,7 +80,5 @@ secure_boot_enabled:
  
  out_efi_err:
         pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n");
-       if (status == EFI_NOT_FOUND)
-               return efi_secureboot_mode_disabled;
         return efi_secureboot_mode_unknown;
  }
diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c

index 9e1a138fed53372a56dd1b7d2982ec198f46b3f1..16a8951b2beda389368c858848beb2edaf06949f 100644 (file)
--- a/drivers/gpio/gpio-altera-a10sr.c
+++ b/drivers/gpio/gpio-altera-a10sr.c
@@ -96,7 +96,7 @@ static int altr_a10sr_gpio_probe(struct platform_device *pdev)
         gpio->regmap = a10sr->regmap;
  
         gpio->gp = altr_a10sr_gc;
-
+       gpio->gp.parent = pdev->dev.parent;
         gpio->gp.of_node = pdev->dev.of_node;
  
         ret = devm_gpiochip_add_data(&pdev->dev, &gpio->gp, gpio);
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c

index 5bddbd507ca9f105aa18cfe5f43b673b676d551d..3fe6a21e05a5718d8769bf2dd505cb5968f41207 100644 (file)
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d,
  
         altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d));
  
-       if (type == IRQ_TYPE_NONE)
+       if (type == IRQ_TYPE_NONE) {
+               irq_set_handler_locked(d, handle_bad_irq);
                 return 0;
-       if (type == IRQ_TYPE_LEVEL_HIGH &&
-               altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH)
-               return 0;
-       if (type == IRQ_TYPE_EDGE_RISING &&
-               altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING)
-               return 0;
-       if (type == IRQ_TYPE_EDGE_FALLING &&
-               altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING)
-               return 0;
-       if (type == IRQ_TYPE_EDGE_BOTH &&
-               altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH)
+       }
+       if (type == altera_gc->interrupt_trigger) {
+               if (type == IRQ_TYPE_LEVEL_HIGH)
+                       irq_set_handler_locked(d, handle_level_irq);
+               else
+                       irq_set_handler_locked(d, handle_simple_irq);
                 return 0;
-
+       }
+       irq_set_handler_locked(d, handle_bad_irq);
         return -EINVAL;
  }
  
@@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc)
         chained_irq_exit(chip, desc);
  }
  
-
  static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc)
  {
         struct altera_gpio_chip *altera_gc;
@@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev)
         altera_gc->interrupt_trigger = reg;
  
         ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0,
-               handle_simple_irq, IRQ_TYPE_NONE);
+               handle_bad_irq, IRQ_TYPE_NONE);
  
         if (ret) {
                 dev_err(&pdev->dev, "could not add irqchip\n");
diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c

index bdb692345428ccc99c8f22bd3b460f25b41e3156..2a57d024481db8c354badd976843f83a365c72a9 100644 (file)
--- a/drivers/gpio/gpio-mcp23s08.c
+++ b/drivers/gpio/gpio-mcp23s08.c
@@ -270,8 +270,10 @@ mcp23s08_direction_output(struct gpio_chip *chip, unsigned offset, int value)
  static irqreturn_t mcp23s08_irq(int irq, void *data)
  {
         struct mcp23s08 *mcp = data;
-       int intcap, intf, i;
+       int intcap, intf, i, gpio, gpio_orig, intcap_mask;
         unsigned int child_irq;
+       bool intf_set, intcap_changed, gpio_bit_changed,
+               defval_changed, gpio_set;
  
         mutex_lock(&mcp->lock);
         if (mcp_read(mcp, MCP_INTF, &intf) < 0) {
@@ -287,14 +289,67 @@ static irqreturn_t mcp23s08_irq(int irq, void *data)
         }
  
         mcp->cache[MCP_INTCAP] = intcap;
+
+       /* This clears the interrupt(configurable on S18) */
+       if (mcp_read(mcp, MCP_GPIO, &gpio) < 0) {
+               mutex_unlock(&mcp->lock);
+               return IRQ_HANDLED;
+       }
+       gpio_orig = mcp->cache[MCP_GPIO];
+       mcp->cache[MCP_GPIO] = gpio;
         mutex_unlock(&mcp->lock);
  
+       if (mcp->cache[MCP_INTF] == 0) {
+               /* There is no interrupt pending */
+               return IRQ_HANDLED;
+       }
+
+       dev_dbg(mcp->chip.parent,
+               "intcap 0x%04X intf 0x%04X gpio_orig 0x%04X gpio 0x%04X\n",
+               intcap, intf, gpio_orig, gpio);
  
         for (i = 0; i < mcp->chip.ngpio; i++) {
-               if ((BIT(i) & mcp->cache[MCP_INTF]) &&
-                   ((BIT(i) & intcap & mcp->irq_rise) ||
-                    (mcp->irq_fall & ~intcap & BIT(i)) ||
-                    (BIT(i) & mcp->cache[MCP_INTCON]))) {
+               /* We must check all of the inputs on the chip,
+                * otherwise we may not notice a change on >=2 pins.
+                *
+                * On at least the mcp23s17, INTCAP is only updated
+                * one byte at a time(INTCAPA and INTCAPB are
+                * not written to at the same time - only on a per-bank
+                * basis).
+                *
+                * INTF only contains the single bit that caused the
+                * interrupt per-bank.  On the mcp23s17, there is
+                * INTFA and INTFB.  If two pins are changed on the A
+                * side at the same time, INTF will only have one bit
+                * set.  If one pin on the A side and one pin on the B
+                * side are changed at the same time, INTF will have
+                * two bits set.  Thus, INTF can't be the only check
+                * to see if the input has changed.
+                */
+
+               intf_set = BIT(i) & mcp->cache[MCP_INTF];
+               if (i < 8 && intf_set)
+                       intcap_mask = 0x00FF;
+               else if (i >= 8 && intf_set)
+                       intcap_mask = 0xFF00;
+               else
+                       intcap_mask = 0x00;
+
+               intcap_changed = (intcap_mask &
+                       (BIT(i) & mcp->cache[MCP_INTCAP])) !=
+                       (intcap_mask & (BIT(i) & gpio_orig));
+               gpio_set = BIT(i) & mcp->cache[MCP_GPIO];
+               gpio_bit_changed = (BIT(i) & gpio_orig) !=
+                       (BIT(i) & mcp->cache[MCP_GPIO]);
+               defval_changed = (BIT(i) & mcp->cache[MCP_INTCON]) &&
+                       ((BIT(i) & mcp->cache[MCP_GPIO]) !=
+                       (BIT(i) & mcp->cache[MCP_DEFVAL]));
+
+               if (((gpio_bit_changed || intcap_changed) &&
+                       (BIT(i) & mcp->irq_rise) && gpio_set) ||
+                   ((gpio_bit_changed || intcap_changed) &&
+                       (BIT(i) & mcp->irq_fall) && !gpio_set) ||
+                   defval_changed) {
                         child_irq = irq_find_mapping(mcp->chip.irqdomain, i);
                         handle_nested_irq(child_irq);
                 }
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c

index 06dac72cb69c0c1c6e9005c748a613985dea111b..d993386892138757b67be09b4df8a822e39e4017 100644 (file)
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -197,7 +197,7 @@ static ssize_t gpio_mockup_event_write(struct file *file,
         struct seq_file *sfile;
         struct gpio_desc *desc;
         struct gpio_chip *gc;
-       int status, val;
+       int val;
         char buf;
  
         sfile = file->private_data;
@@ -206,9 +206,8 @@ static ssize_t gpio_mockup_event_write(struct file *file,
         chip = priv->chip;
         gc = &chip->gc;
  
-       status = copy_from_user(&buf, usr_buf, 1);
-       if (status)
-               return status;
+       if (copy_from_user(&buf, usr_buf, 1))
+               return -EFAULT;
  
         if (buf == '0')
                 val = 0;
diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c

index 40a8881c2ce882bc1eef7eb59fff492afa6f378b..f1c6ec17b90a8352ecaf2e350aa8309a317925d8 100644 (file)
--- a/drivers/gpio/gpio-xgene.c
+++ b/drivers/gpio/gpio-xgene.c
@@ -42,9 +42,7 @@ struct xgene_gpio {
         struct gpio_chip        chip;
         void __iomem            *base;
         spinlock_t              lock;
-#ifdef CONFIG_PM
         u32                     set_dr_val[XGENE_MAX_GPIO_BANKS];
-#endif
  };
  
  static int xgene_gpio_get(struct gpio_chip *gc, unsigned int offset)
@@ -138,8 +136,7 @@ static int xgene_gpio_dir_out(struct gpio_chip *gc,
         return 0;
  }
  
-#ifdef CONFIG_PM
-static int xgene_gpio_suspend(struct device *dev)
+static __maybe_unused int xgene_gpio_suspend(struct device *dev)
  {
         struct xgene_gpio *gpio = dev_get_drvdata(dev);
         unsigned long bank_offset;
@@ -152,7 +149,7 @@ static int xgene_gpio_suspend(struct device *dev)
         return 0;
  }
  
-static int xgene_gpio_resume(struct device *dev)
+static __maybe_unused int xgene_gpio_resume(struct device *dev)
  {
         struct xgene_gpio *gpio = dev_get_drvdata(dev);
         unsigned long bank_offset;
@@ -166,10 +163,6 @@ static int xgene_gpio_resume(struct device *dev)
  }
  
  static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume);
-#define XGENE_GPIO_PM_OPS      (&xgene_gpio_pm)
-#else
-#define XGENE_GPIO_PM_OPS      NULL
-#endif
  
  static int xgene_gpio_probe(struct platform_device *pdev)
  {
@@ -241,7 +234,7 @@ static struct platform_driver xgene_gpio_driver = {
                 .name = "xgene-gpio",
                 .of_match_table = xgene_gpio_of_match,
                 .acpi_match_table = ACPI_PTR(xgene_gpio_acpi_match),
-               .pm     = XGENE_GPIO_PM_OPS,
+               .pm     = &xgene_gpio_pm,
         },
         .probe = xgene_gpio_probe,
  };
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c

index 9b37a3692b3feed18578c39f4acc22ba03182e8c..2bd683e2be022dd2c9a2b5c6cd7285d4c68e69eb 100644 (file)
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -266,6 +266,9 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
                 goto fail_free_event;
         }
  
+       if (agpio->wake_capable == ACPI_WAKE_CAPABLE)
+               enable_irq_wake(irq);
+
         list_add_tail(&event->node, &acpi_gpio->events);
         return AE_OK;
  
@@ -339,6 +342,9 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip)
         list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) {
                 struct gpio_desc *desc;
  
+               if (irqd_is_wakeup_set(irq_get_irq_data(event->irq)))
+                       disable_irq_wake(event->irq);
+
                 free_irq(event->irq, event);
                 desc = event->desc;
                 if (WARN_ON(IS_ERR(desc)))
@@ -571,8 +577,10 @@ struct gpio_desc *acpi_find_gpio(struct device *dev,
                 }
  
                 desc = acpi_get_gpiod_by_index(adev, propname, idx, &info);
-               if (!IS_ERR(desc) || (PTR_ERR(desc) == -EPROBE_DEFER))
+               if (!IS_ERR(desc))
                         break;
+               if (PTR_ERR(desc) == -EPROBE_DEFER)
+                       return ERR_CAST(desc);
         }
  
         /* Then from plain _CRS GPIOs */
diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile

index 8363cb57915b0b726c704b8be37805ecef2a18ee..8a08e81ee90d579774ca96bc70853093ba623f09 100644 (file)
--- a/drivers/gpu/drm/amd/acp/Makefile
+++ b/drivers/gpu/drm/amd/acp/Makefile
@@ -3,6 +3,4 @@
  # of AMDSOC/AMDGPU drm driver.
  # It provides the HW control for ACP related functionalities.
  
-subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
-
  AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index d2d0f60ff36d1f2fd4a80ef8b43d2d3d9737e1f9..99424cb8020bdf914b5627bffce01155ba8f6b73 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -240,6 +240,8 @@ free_partial_kdata:
         for (; i >= 0; i--)
                 drm_free_large(p->chunks[i].kdata);
         kfree(p->chunks);
+       p->chunks = NULL;
+       p->nchunks = 0;
  put_ctx:
         amdgpu_ctx_put(p->ctx);
  free_chunk:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 6abb238b25c97e8acc9f76887fa4b042e6025c1c..de0cf3315484c997877b30a65a15e7e1af3b87a4 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -475,7 +475,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev)
         int r;
  
         if (adev->wb.wb_obj == NULL) {
-               r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * 4,
+               r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t),
                                             PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
                                             &adev->wb.wb_obj, &adev->wb.gpu_addr,
                                             (void **)&adev->wb.wb);
@@ -488,7 +488,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev)
                 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
  
                 /* clear wb memory */
-               memset((char *)adev->wb.wb, 0, AMDGPU_GPU_PAGE_SIZE);
+               memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t));
         }
  
         return 0;
@@ -2094,8 +2094,11 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
         }
  
         r = amdgpu_late_init(adev);
-       if (r)
+       if (r) {
+               if (fbcon)
+                       console_unlock();
                 return r;
+       }
  
         /* pin cursors */
         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -2587,7 +2590,7 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
                 use_bank = 0;
         }
  
-       *pos &= 0x3FFFF;
+       *pos &= (1UL << 22) - 1;
  
         if (use_bank) {
                 if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
@@ -2663,7 +2666,7 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
                 use_bank = 0;
         }
  
-       *pos &= 0x3FFFF;
+       *pos &= (1UL << 22) - 1;
  
         if (use_bank) {
                 if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 75fc376ba735874e3598d72a06e17f32eb6ec0da..b76cd699eb0d7357d79d68f9da4fd9bd02f3e132 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -59,9 +59,10 @@
   * - 3.7.0 - Add support for VCE clock list packet
   * - 3.8.0 - Add support raster config init in the kernel
   * - 3.9.0 - Add support for memory query info about VRAM and GTT.
+ * - 3.10.0 - Add support for new fences ioctl, new gem ioctl flags
   */
  #define KMS_DRIVER_MAJOR       3
-#define KMS_DRIVER_MINOR       9
+#define KMS_DRIVER_MINOR       10
  #define KMS_DRIVER_PATCHLEVEL  0
  
  int amdgpu_vram_limit = 0;
@@ -420,6 +421,7 @@ static const struct pci_device_id pciidlist[] = {
         {0x1002, 0x6985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
         {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
         {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+       {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
         {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
  
         {0, 0, 0}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

index 51d759463384602ef0c3ca90b9667b6de6e0a865..106cf83c2e6b46aa711b7e82381e22b8dd449aa7 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -202,6 +202,27 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
         bool kernel = false;
         int r;
  
+       /* reject invalid gem flags */
+       if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+                                     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+                                     AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+                                     AMDGPU_GEM_CREATE_VRAM_CLEARED|
+                                     AMDGPU_GEM_CREATE_SHADOW |
+                                     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
+               r = -EINVAL;
+               goto error_unlock;
+       }
+       /* reject invalid gem domains */
+       if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
+                                AMDGPU_GEM_DOMAIN_GTT |
+                                AMDGPU_GEM_DOMAIN_VRAM |
+                                AMDGPU_GEM_DOMAIN_GDS |
+                                AMDGPU_GEM_DOMAIN_GWS |
+                                AMDGPU_GEM_DOMAIN_OA)) {
+               r = -EINVAL;
+               goto error_unlock;
+       }
+
         /* create a gem object to contain this object in */
         if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
             AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c

index 31375bdde6f1769ec674082141c77fb7383e302a..011800f621c6ce5574f740e85188aec215e1f2e5 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -788,7 +788,7 @@ static int sdma_v3_0_start(struct amdgpu_device *adev)
                 }
         }
  
-       /* disble sdma engine before programing it */
+       /* disable sdma engine before programing it */
         sdma_v3_0_ctx_switch_enable(adev, false);
         sdma_v3_0_enable(adev, false);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c

index f55e45b52fbce2b658135bc5fc48b084332f811c..c5dec210d5299995c72ea79f95c05415306f034f 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -3464,6 +3464,16 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
                     (adev->pdev->device == 0x6667)) {
                         max_sclk = 75000;
                 }
+       } else if (adev->asic_type == CHIP_OLAND) {
+               if ((adev->pdev->revision == 0xC7) ||
+                   (adev->pdev->revision == 0x80) ||
+                   (adev->pdev->revision == 0x81) ||
+                   (adev->pdev->revision == 0x83) ||
+                   (adev->pdev->revision == 0x87) ||
+                   (adev->pdev->device == 0x6604) ||
+                   (adev->pdev->device == 0x6605)) {
+                       max_sclk = 75000;
+               }
         }
  
         if (rps->vce_active) {
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c

index 50bdb24ef8d6e9f7e828ea661d873659beb3ce42..4a785d6acfb9afbde3b4f4b86116512134075759 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1051,7 +1051,7 @@ static int vi_common_early_init(void *handle)
                 /* rev0 hardware requires workarounds to support PG */
                 adev->pg_flags = 0;
                 if (adev->rev_id != 0x00) {
-                       adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+                       adev->pg_flags |=
                                 AMD_PG_SUPPORT_GFX_SMG |
                                 AMD_PG_SUPPORT_GFX_PIPELINE |
                                 AMD_PG_SUPPORT_CP |
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c

index 8cf71f3c6d0ea4706096222574c9d85871baba6c..261b828ad59086990f9f054906448a5526f4cbc4 100644 (file)
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
@@ -178,7 +178,7 @@ int smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
         if (bgate) {
                 cgs_set_powergating_state(hwmgr->device,
                                                 AMD_IP_BLOCK_TYPE_VCE,
-                                               AMD_PG_STATE_UNGATE);
+                                               AMD_PG_STATE_GATE);
                 cgs_set_clockgating_state(hwmgr->device,
                                 AMD_IP_BLOCK_TYPE_VCE,
                                 AMD_CG_STATE_GATE);
diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c

index 08e6a71f5d05f412946496f39ee82303d19a56a4..294b53697334cc0855daa73925b8c58a19cf2222 100644 (file)
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -63,8 +63,7 @@ static void malidp_crtc_enable(struct drm_crtc *crtc)
  
         clk_prepare_enable(hwdev->pxlclk);
  
-       /* mclk needs to be set to the same or higher rate than pxlclk */
-       clk_set_rate(hwdev->mclk, crtc->state->adjusted_mode.crtc_clock * 1000);
+       /* We rely on firmware to set mclk to a sensible level. */
         clk_set_rate(hwdev->pxlclk, crtc->state->adjusted_mode.crtc_clock * 1000);
  
         hwdev->modeset(hwdev, &vm);
diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c

index 488aedf5b58d54e7997b2339c75b7a90f30dcfc1..9f5513006eeef8b4e54f6727b44b0e97562935d6 100644 (file)
--- a/drivers/gpu/drm/arm/malidp_hw.c
+++ b/drivers/gpu/drm/arm/malidp_hw.c
@@ -83,7 +83,7 @@ static const struct malidp_layer malidp550_layers[] = {
         { DE_VIDEO1, MALIDP550_DE_LV1_BASE, MALIDP550_DE_LV1_PTR_BASE, MALIDP_DE_LV_STRIDE0 },
         { DE_GRAPHICS1, MALIDP550_DE_LG_BASE, MALIDP550_DE_LG_PTR_BASE, MALIDP_DE_LG_STRIDE },
         { DE_VIDEO2, MALIDP550_DE_LV2_BASE, MALIDP550_DE_LV2_PTR_BASE, MALIDP_DE_LV_STRIDE0 },
-       { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, 0 },
+       { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, MALIDP550_DE_LS_R1_STRIDE },
  };
  
  #define MALIDP_DE_DEFAULT_PREFETCH_START       5
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c

index 414aada10fe5e7d43392aa835b4c01aba594bcb7..d5aec082294cbdde5a19986a5b1908aef974bb19 100644 (file)
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -37,6 +37,8 @@
  #define   LAYER_V_VAL(x)               (((x) & 0x1fff) << 16)
  #define MALIDP_LAYER_COMP_SIZE         0x010
  #define MALIDP_LAYER_OFFSET            0x014
+#define MALIDP550_LS_ENABLE            0x01c
+#define MALIDP550_LS_R1_IN_SIZE                0x020
  
  /*
   * This 4-entry look-up-table is used to determine the full 8-bit alpha value
@@ -242,6 +244,11 @@ static void malidp_de_plane_update(struct drm_plane *plane,
                         LAYER_V_VAL(plane->state->crtc_y),
                         mp->layer->base + MALIDP_LAYER_OFFSET);
  
+       if (mp->layer->id == DE_SMART)
+               malidp_hw_write(mp->hwdev,
+                               LAYER_H_VAL(src_w) | LAYER_V_VAL(src_h),
+                               mp->layer->base + MALIDP550_LS_R1_IN_SIZE);
+
         /* first clear the rotation bits */
         val = malidp_hw_read(mp->hwdev, mp->layer->base + MALIDP_LAYER_CONTROL);
         val &= ~LAYER_ROT_MASK;
@@ -330,9 +337,16 @@ int malidp_de_planes_init(struct drm_device *drm)
                 plane->hwdev = malidp->dev;
                 plane->layer = &map->layers[i];
  
-               /* Skip the features which the SMART layer doesn't have */
-               if (id == DE_SMART)
+               if (id == DE_SMART) {
+                       /*
+                        * Enable the first rectangle in the SMART layer to be
+                        * able to use it as a drm plane.
+                        */
+                       malidp_hw_write(malidp->dev, 1,
+                                       plane->layer->base + MALIDP550_LS_ENABLE);
+                       /* Skip the features which the SMART layer doesn't have. */
                         continue;
+               }
  
                 drm_plane_create_rotation_property(&plane->base, DRM_ROTATE_0, flags);
                 malidp_hw_write(malidp->dev, MALIDP_ALPHA_LUT,
diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h

index aff6d4a84e998c6cc1d01e3067d0f52712daa145..b816067a65c5727ab120000c5d5d080e022fee2c 100644 (file)
--- a/drivers/gpu/drm/arm/malidp_regs.h
+++ b/drivers/gpu/drm/arm/malidp_regs.h
@@ -84,6 +84,7 @@
  /* Stride register offsets relative to Lx_BASE */
  #define MALIDP_DE_LG_STRIDE            0x18
  #define MALIDP_DE_LV_STRIDE0           0x18
+#define MALIDP550_DE_LS_R1_STRIDE      0x28
  
  /* macros to set values into registers */
  #define MALIDP_DE_H_FRONTPORCH(x)      (((x) & 0xfff) << 0)
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c

index c8baab9bee0d05cf904153021db5601ade303586..ba58f1b11d1e16b141fe01359967a229a54da6b1 100644 (file)
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -148,6 +148,9 @@ static const struct edid_quirk {
  
         /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */
         { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC },
+
+       /* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/
+       { "ETR", 13896, EDID_QUIRK_FORCE_8BPC },
  };
  
  /*
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c

index f6d4d9700734e6d48792e90d47c7bfe4081116a1..324a688b3f3013e9020ce2b9f0f76083ad91f890 100644 (file)
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1260,9 +1260,9 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
          * to KMS, hence fail if different settings are requested.
          */
         if (var->bits_per_pixel != fb->format->cpp[0] * 8 ||
-           var->xres != fb->width || var->yres != fb->height ||
-           var->xres_virtual != fb->width || var->yres_virtual != fb->height) {
-               DRM_DEBUG("fb userspace requested width/height/bpp different than current fb "
+           var->xres > fb->width || var->yres > fb->height ||
+           var->xres_virtual > fb->width || var->yres_virtual > fb->height) {
+               DRM_DEBUG("fb requested width/height/bpp can't fit in current fb "
                           "request %dx%d-%d (virtual %dx%d) > %dx%d-%d\n",
                           var->xres, var->yres, var->bits_per_pixel,
                           var->xres_virtual, var->yres_virtual,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c

index 130d7d517a19a180ca7f2e744131aeabc554564b..b78d9239e48fb0fc3b02129fe97795a6b94bed70 100644 (file)
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1311,15 +1311,15 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
                 goto out_pm_put;
         }
  
+       mutex_lock(&gpu->lock);
+
         fence = etnaviv_gpu_fence_alloc(gpu);
         if (!fence) {
                 event_free(gpu, event);
                 ret = -ENOMEM;
-               goto out_pm_put;
+               goto out_unlock;
         }
  
-       mutex_lock(&gpu->lock);
-
         gpu->event[event].fence = fence;
         submit->fence = fence->seqno;
         gpu->active_fence = submit->fence;
@@ -1357,6 +1357,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
         hangcheck_timer_reset(gpu);
         ret = 0;
  
+out_unlock:
         mutex_unlock(&gpu->lock);
  
  out_pm_put:
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c

index 0fd6f7a18364a69ba67cb000674593fc94041e19..c0e8d3302292c9c9329ca4795b4d9ffb3465a178 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -68,6 +68,8 @@ struct decon_context {
         unsigned long                   flags;
         unsigned long                   out_type;
         int                             first_win;
+       spinlock_t                      vblank_lock;
+       u32                             frame_id;
  };
  
  static const uint32_t decon_formats[] = {
@@ -103,7 +105,7 @@ static int decon_enable_vblank(struct exynos_drm_crtc *crtc)
                 if (ctx->out_type & IFTYPE_I80)
                         val |= VIDINTCON0_FRAMEDONE;
                 else
-                       val |= VIDINTCON0_INTFRMEN;
+                       val |= VIDINTCON0_INTFRMEN | VIDINTCON0_FRAMESEL_FP;
  
                 writel(val, ctx->addr + DECON_VIDINTCON0);
         }
@@ -122,14 +124,56 @@ static void decon_disable_vblank(struct exynos_drm_crtc *crtc)
                 writel(0, ctx->addr + DECON_VIDINTCON0);
  }
  
+/* return number of starts/ends of frame transmissions since reset */
+static u32 decon_get_frame_count(struct decon_context *ctx, bool end)
+{
+       u32 frm, pfrm, status, cnt = 2;
+
+       /* To get consistent result repeat read until frame id is stable.
+        * Usually the loop will be executed once, in rare cases when the loop
+        * is executed at frame change time 2nd pass will be needed.
+        */
+       frm = readl(ctx->addr + DECON_CRFMID);
+       do {
+               status = readl(ctx->addr + DECON_VIDCON1);
+               pfrm = frm;
+               frm = readl(ctx->addr + DECON_CRFMID);
+       } while (frm != pfrm && --cnt);
+
+       /* CRFMID is incremented on BPORCH in case of I80 and on VSYNC in case
+        * of RGB, it should be taken into account.
+        */
+       if (!frm)
+               return 0;
+
+       switch (status & (VIDCON1_VSTATUS_MASK | VIDCON1_I80_ACTIVE)) {
+       case VIDCON1_VSTATUS_VS:
+               if (!(ctx->out_type & IFTYPE_I80))
+                       --frm;
+               break;
+       case VIDCON1_VSTATUS_BP:
+               --frm;
+               break;
+       case VIDCON1_I80_ACTIVE:
+       case VIDCON1_VSTATUS_AC:
+               if (end)
+                       --frm;
+               break;
+       default:
+               break;
+       }
+
+       return frm;
+}
+
  static void decon_setup_trigger(struct decon_context *ctx)
  {
         if (!(ctx->out_type & (IFTYPE_I80 | I80_HW_TRG)))
                 return;
  
         if (!(ctx->out_type & I80_HW_TRG)) {
-               writel(TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN
-                      | TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN,
+               writel(TRIGCON_TRIGEN_PER_F | TRIGCON_TRIGEN_F |
+                      TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN,
                        ctx->addr + DECON_TRIGCON);
                 return;
         }
@@ -365,11 +409,14 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc,
  static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
  {
         struct decon_context *ctx = crtc->ctx;
+       unsigned long flags;
         int i;
  
         if (test_bit(BIT_SUSPENDED, &ctx->flags))
                 return;
  
+       spin_lock_irqsave(&ctx->vblank_lock, flags);
+
         for (i = ctx->first_win; i < WINDOWS_NR; i++)
                 decon_shadow_protect_win(ctx, i, false);
  
@@ -378,11 +425,18 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
  
         if (ctx->out_type & IFTYPE_I80)
                 set_bit(BIT_WIN_UPDATED, &ctx->flags);
+
+       ctx->frame_id = decon_get_frame_count(ctx, true);
+
+       exynos_crtc_handle_event(crtc);
+
+       spin_unlock_irqrestore(&ctx->vblank_lock, flags);
  }
  
  static void decon_swreset(struct decon_context *ctx)
  {
         unsigned int tries;
+       unsigned long flags;
  
         writel(0, ctx->addr + DECON_VIDCON0);
         for (tries = 2000; tries; --tries) {
@@ -400,6 +454,10 @@ static void decon_swreset(struct decon_context *ctx)
  
         WARN(tries == 0, "failed to software reset DECON\n");
  
+       spin_lock_irqsave(&ctx->vblank_lock, flags);
+       ctx->frame_id = 0;
+       spin_unlock_irqrestore(&ctx->vblank_lock, flags);
+
         if (!(ctx->out_type & IFTYPE_HDMI))
                 return;
  
@@ -578,6 +636,24 @@ static const struct component_ops decon_component_ops = {
         .unbind = decon_unbind,
  };
  
+static void decon_handle_vblank(struct decon_context *ctx)
+{
+       u32 frm;
+
+       spin_lock(&ctx->vblank_lock);
+
+       frm = decon_get_frame_count(ctx, true);
+
+       if (frm != ctx->frame_id) {
+               /* handle only if incremented, take care of wrap-around */
+               if ((s32)(frm - ctx->frame_id) > 0)
+                       drm_crtc_handle_vblank(&ctx->crtc->base);
+               ctx->frame_id = frm;
+       }
+
+       spin_unlock(&ctx->vblank_lock);
+}
+
  static irqreturn_t decon_irq_handler(int irq, void *dev_id)
  {
         struct decon_context *ctx = dev_id;
@@ -598,7 +674,7 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id)
                             (VIDOUT_INTERLACE_EN_F | VIDOUT_INTERLACE_FIELD_F))
                                 return IRQ_HANDLED;
                 }
-               drm_crtc_handle_vblank(&ctx->crtc->base);
+               decon_handle_vblank(ctx);
         }
  
  out:
@@ -671,6 +747,7 @@ static int exynos5433_decon_probe(struct platform_device *pdev)
         __set_bit(BIT_SUSPENDED, &ctx->flags);
         ctx->dev = dev;
         ctx->out_type = (unsigned long)of_device_get_match_data(dev);
+       spin_lock_init(&ctx->vblank_lock);
  
         if (ctx->out_type & IFTYPE_HDMI) {
                 ctx->first_win = 1;
@@ -678,7 +755,7 @@ static int exynos5433_decon_probe(struct platform_device *pdev)
                 ctx->out_type |= IFTYPE_I80;
         }
  
-       if (ctx->out_type | I80_HW_TRG) {
+       if (ctx->out_type & I80_HW_TRG) {
                 ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
                                                         "samsung,disp-sysreg");
                 if (IS_ERR(ctx->sysreg)) {
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c

index f9ab19e205e243d931412257064455582747e92a..48811806fa2727c540e5f5e9d0ffb1700dc08c4f 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -526,6 +526,7 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
  
         for (i = 0; i < WINDOWS_NR; i++)
                 decon_shadow_protect_win(ctx, i, false);
+       exynos_crtc_handle_event(crtc);
  }
  
  static void decon_init(struct decon_context *ctx)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c

index 5367b6664fe37d6ea1825eac48e620a7c6d902e7..c65f4509932c56f18f869f81b293d707ce2e47d7 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -85,16 +85,28 @@ static void exynos_crtc_atomic_flush(struct drm_crtc *crtc,
                                      struct drm_crtc_state *old_crtc_state)
  {
         struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
-       struct drm_pending_vblank_event *event;
-       unsigned long flags;
  
         if (exynos_crtc->ops->atomic_flush)
                 exynos_crtc->ops->atomic_flush(exynos_crtc);
+}
+
+static const struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
+       .enable         = exynos_drm_crtc_enable,
+       .disable        = exynos_drm_crtc_disable,
+       .mode_set_nofb  = exynos_drm_crtc_mode_set_nofb,
+       .atomic_check   = exynos_crtc_atomic_check,
+       .atomic_begin   = exynos_crtc_atomic_begin,
+       .atomic_flush   = exynos_crtc_atomic_flush,
+};
+
+void exynos_crtc_handle_event(struct exynos_drm_crtc *exynos_crtc)
+{
+       struct drm_crtc *crtc = &exynos_crtc->base;
+       struct drm_pending_vblank_event *event = crtc->state->event;
+       unsigned long flags;
  
-       event = crtc->state->event;
         if (event) {
                 crtc->state->event = NULL;
-
                 spin_lock_irqsave(&crtc->dev->event_lock, flags);
                 if (drm_crtc_vblank_get(crtc) == 0)
                         drm_crtc_arm_vblank_event(crtc, event);
@@ -105,15 +117,6 @@ static void exynos_crtc_atomic_flush(struct drm_crtc *crtc,
  
  }
  
-static const struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
-       .enable         = exynos_drm_crtc_enable,
-       .disable        = exynos_drm_crtc_disable,
-       .mode_set_nofb  = exynos_drm_crtc_mode_set_nofb,
-       .atomic_check   = exynos_crtc_atomic_check,
-       .atomic_begin   = exynos_crtc_atomic_begin,
-       .atomic_flush   = exynos_crtc_atomic_flush,
-};
-
  static void exynos_drm_crtc_destroy(struct drm_crtc *crtc)
  {
         struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.h b/drivers/gpu/drm/exynos/exynos_drm_crtc.h

index 6a581a8af4650fcf5f07ea3fa84c6b1dd5e81ad4..abd5d6ceac0c2fa0500650a139876d20ca9d3ea8 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
@@ -40,4 +40,6 @@ int exynos_drm_crtc_get_pipe_from_type(struct drm_device *drm_dev,
   */
  void exynos_drm_crtc_te_handler(struct drm_crtc *crtc);
  
+void exynos_crtc_handle_event(struct exynos_drm_crtc *exynos_crtc);
+
  #endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c

index 812e2ec0761d0b2b6ad17c71c99a37995e09ca51..d7ef26370e67c59fa2825a8be9303ab4f5b0c708 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -86,7 +86,7 @@
  #define DSIM_SYNC_INFORM               (1 << 27)
  #define DSIM_EOT_DISABLE               (1 << 28)
  #define DSIM_MFLUSH_VS                 (1 << 29)
-/* This flag is valid only for exynos3250/3472/4415/5260/5430 */
+/* This flag is valid only for exynos3250/3472/5260/5430 */
  #define DSIM_CLKLANE_STOP              (1 << 30)
  
  /* DSIM_ESCMODE */
@@ -473,17 +473,6 @@ static const struct exynos_dsi_driver_data exynos4_dsi_driver_data = {
         .reg_values = reg_values,
  };
  
-static const struct exynos_dsi_driver_data exynos4415_dsi_driver_data = {
-       .reg_ofs = exynos_reg_ofs,
-       .plltmr_reg = 0x58,
-       .has_clklane_stop = 1,
-       .num_clks = 2,
-       .max_freq = 1000,
-       .wait_for_reset = 1,
-       .num_bits_resol = 11,
-       .reg_values = reg_values,
-};
-
  static const struct exynos_dsi_driver_data exynos5_dsi_driver_data = {
         .reg_ofs = exynos_reg_ofs,
         .plltmr_reg = 0x58,
@@ -521,8 +510,6 @@ static const struct of_device_id exynos_dsi_of_match[] = {
           .data = &exynos3_dsi_driver_data },
         { .compatible = "samsung,exynos4210-mipi-dsi",
           .data = &exynos4_dsi_driver_data },
-       { .compatible = "samsung,exynos4415-mipi-dsi",
-         .data = &exynos4415_dsi_driver_data },
         { .compatible = "samsung,exynos5410-mipi-dsi",
           .data = &exynos5_dsi_driver_data },
         { .compatible = "samsung,exynos5422-mipi-dsi",
@@ -979,7 +966,7 @@ static void exynos_dsi_send_to_fifo(struct exynos_dsi *dsi,
         bool first = !xfer->tx_done;
         u32 reg;
  
-       dev_dbg(dev, "< xfer %p: tx len %u, done %u, rx len %u, done %u\n",
+       dev_dbg(dev, "< xfer %pK: tx len %u, done %u, rx len %u, done %u\n",
                 xfer, length, xfer->tx_done, xfer->rx_len, xfer->rx_done);
  
         if (length > DSI_TX_FIFO_SIZE)
@@ -1177,7 +1164,7 @@ static bool exynos_dsi_transfer_finish(struct exynos_dsi *dsi)
         spin_unlock_irqrestore(&dsi->transfer_lock, flags);
  
         dev_dbg(dsi->dev,
-               "> xfer %p, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n",
+               "> xfer %pK, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n",
                 xfer, xfer->packet.payload_length, xfer->tx_done, xfer->rx_len,
                 xfer->rx_done);
  
@@ -1348,9 +1335,12 @@ static int exynos_dsi_register_te_irq(struct exynos_dsi *dsi)
         int te_gpio_irq;
  
         dsi->te_gpio = of_get_named_gpio(dsi->panel_node, "te-gpios", 0);
+       if (dsi->te_gpio == -ENOENT)
+               return 0;
+
         if (!gpio_is_valid(dsi->te_gpio)) {
-               dev_err(dsi->dev, "no te-gpios specified\n");
                 ret = dsi->te_gpio;
+               dev_err(dsi->dev, "cannot get te-gpios, %d\n", ret);
                 goto out;
         }
  
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c

index 95871577015d8a5389cdae3a0c87413af7f0448f..5b18b5c5fdf255f262d68fc6fc7103f8d4d06910 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -1695,7 +1695,7 @@ static int fimc_probe(struct platform_device *pdev)
                 goto err_put_clk;
         }
  
-       DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv);
+       DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
  
         spin_lock_init(&ctx->lock);
         platform_set_drvdata(pdev, ctx);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c

index a9fa444c6053c0c8d048598198938d7fa2647cce..3f04d72c448d386fc9f646e1033c8528406b107e 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -71,10 +71,10 @@
  #define TRIGCON                                0x1A4
  #define TRGMODE_ENABLE                 (1 << 0)
  #define SWTRGCMD_ENABLE                        (1 << 1)
-/* Exynos3250, 3472, 4415, 5260 5410, 5420 and 5422 only supported. */
+/* Exynos3250, 3472, 5260 5410, 5420 and 5422 only supported. */
  #define HWTRGEN_ENABLE                 (1 << 3)
  #define HWTRGMASK_ENABLE               (1 << 4)
-/* Exynos3250, 3472, 4415, 5260, 5420 and 5422 only supported. */
+/* Exynos3250, 3472, 5260, 5420 and 5422 only supported. */
  #define HWTRIGEN_PER_ENABLE            (1 << 31)
  
  /* display mode change control register except exynos4 */
@@ -138,18 +138,6 @@ static struct fimd_driver_data exynos4_fimd_driver_data = {
         .has_vtsel = 1,
  };
  
-static struct fimd_driver_data exynos4415_fimd_driver_data = {
-       .timing_base = 0x20000,
-       .lcdblk_offset = 0x210,
-       .lcdblk_vt_shift = 10,
-       .lcdblk_bypass_shift = 1,
-       .trg_type = I80_HW_TRG,
-       .has_shadowcon = 1,
-       .has_vidoutcon = 1,
-       .has_vtsel = 1,
-       .has_trigger_per_te = 1,
-};
-
  static struct fimd_driver_data exynos5_fimd_driver_data = {
         .timing_base = 0x20000,
         .lcdblk_offset = 0x214,
@@ -210,8 +198,6 @@ static const struct of_device_id fimd_driver_dt_match[] = {
           .data = &exynos3_fimd_driver_data },
         { .compatible = "samsung,exynos4210-fimd",
           .data = &exynos4_fimd_driver_data },
-       { .compatible = "samsung,exynos4415-fimd",
-         .data = &exynos4415_fimd_driver_data },
         { .compatible = "samsung,exynos5250-fimd",
           .data = &exynos5_fimd_driver_data },
         { .compatible = "samsung,exynos5420-fimd",
@@ -257,7 +243,7 @@ static int fimd_enable_vblank(struct exynos_drm_crtc *crtc)
                         val |= VIDINTCON0_INT_FRAME;
  
                         val &= ~VIDINTCON0_FRAMESEL0_MASK;
-                       val |= VIDINTCON0_FRAMESEL0_VSYNC;
+                       val |= VIDINTCON0_FRAMESEL0_FRONTPORCH;
                         val &= ~VIDINTCON0_FRAMESEL1_MASK;
                         val |= VIDINTCON0_FRAMESEL1_NONE;
                 }
@@ -723,6 +709,8 @@ static void fimd_atomic_flush(struct exynos_drm_crtc *crtc)
  
         for (i = 0; i < WINDOWS_NR; i++)
                 fimd_shadow_protect_win(ctx, i, false);
+
+       exynos_crtc_handle_event(crtc);
  }
  
  static void fimd_update_plane(struct exynos_drm_crtc *crtc,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c

index 4c28f7ffcc4dd1a0593e5c37bb878f6481f1ed70..55a1579d11b3d7c1ce604eeba2988de78de61e0b 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -218,7 +218,7 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev,
                 return ERR_PTR(ret);
         }
  
-       DRM_DEBUG_KMS("created file object = %p\n", obj->filp);
+       DRM_DEBUG_KMS("created file object = %pK\n", obj->filp);
  
         return exynos_gem;
  }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c

index bef57987759d2c8d2da9451a80c02b7b1c370b7f..0506b2b17ac1c45a1bea639cb37ed90075be3677 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1723,7 +1723,7 @@ static int gsc_probe(struct platform_device *pdev)
                 return ret;
         }
  
-       DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv);
+       DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
  
         mutex_init(&ctx->lock);
         platform_set_drvdata(pdev, ctx);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c

index 9c84ee76f18adc0ab1b8dcc3f406a4021346ece8..3edda18cc2d2d61b7010b57817aeecababe1d4c5 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -208,7 +208,7 @@ static struct exynos_drm_ippdrv *ipp_find_drv_by_handle(u32 prop_id)
          * e.g PAUSE state, queue buf, command control.
          */
         list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
-               DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n", count++, ippdrv);
+               DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n", count++, ippdrv);
  
                 mutex_lock(&ippdrv->cmd_lock);
                 list_for_each_entry(c_node, &ippdrv->cmd_list, list) {
@@ -388,7 +388,7 @@ int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data,
         }
         property->prop_id = ret;
  
-       DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%p]\n",
+       DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%pK]\n",
                 property->prop_id, property->cmd, ippdrv);
  
         /* stored property information and ippdrv in private data */
@@ -518,7 +518,7 @@ static int ipp_put_mem_node(struct drm_device *drm_dev,
  {
         int i;
  
-       DRM_DEBUG_KMS("node[%p]\n", m_node);
+       DRM_DEBUG_KMS("node[%pK]\n", m_node);
  
         if (!m_node) {
                 DRM_ERROR("invalid dequeue node.\n");
@@ -562,7 +562,7 @@ static struct drm_exynos_ipp_mem_node
         m_node->buf_id = qbuf->buf_id;
         INIT_LIST_HEAD(&m_node->list);
  
-       DRM_DEBUG_KMS("m_node[%p]ops_id[%d]\n", m_node, qbuf->ops_id);
+       DRM_DEBUG_KMS("m_node[%pK]ops_id[%d]\n", m_node, qbuf->ops_id);
         DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]\n", qbuf->prop_id, m_node->buf_id);
  
         for_each_ipp_planar(i) {
@@ -659,7 +659,7 @@ static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node,
  
         mutex_lock(&c_node->event_lock);
         list_for_each_entry_safe(e, te, &c_node->event_list, base.link) {
-               DRM_DEBUG_KMS("count[%d]e[%p]\n", count++, e);
+               DRM_DEBUG_KMS("count[%d]e[%pK]\n", count++, e);
  
                 /*
                  * qbuf == NULL condition means all event deletion.
@@ -750,7 +750,7 @@ static struct drm_exynos_ipp_mem_node
  
         /* find memory node from memory list */
         list_for_each_entry(m_node, head, list) {
-               DRM_DEBUG_KMS("count[%d]m_node[%p]\n", count++, m_node);
+               DRM_DEBUG_KMS("count[%d]m_node[%pK]\n", count++, m_node);
  
                 /* compare buffer id */
                 if (m_node->buf_id == qbuf->buf_id)
@@ -767,7 +767,7 @@ static int ipp_set_mem_node(struct exynos_drm_ippdrv *ippdrv,
         struct exynos_drm_ipp_ops *ops = NULL;
         int ret = 0;
  
-       DRM_DEBUG_KMS("node[%p]\n", m_node);
+       DRM_DEBUG_KMS("node[%pK]\n", m_node);
  
         if (!m_node) {
                 DRM_ERROR("invalid queue node.\n");
@@ -1232,7 +1232,7 @@ static int ipp_start_property(struct exynos_drm_ippdrv *ippdrv,
                         m_node = list_first_entry(head,
                                 struct drm_exynos_ipp_mem_node, list);
  
-                       DRM_DEBUG_KMS("m_node[%p]\n", m_node);
+                       DRM_DEBUG_KMS("m_node[%pK]\n", m_node);
  
                         ret = ipp_set_mem_node(ippdrv, c_node, m_node);
                         if (ret) {
@@ -1601,7 +1601,7 @@ static int ipp_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
                 }
                 ippdrv->prop_list.ipp_id = ret;
  
-               DRM_DEBUG_KMS("count[%d]ippdrv[%p]ipp_id[%d]\n",
+               DRM_DEBUG_KMS("count[%d]ippdrv[%pK]ipp_id[%d]\n",
                         count++, ippdrv, ret);
  
                 /* store parent device for node */
@@ -1659,7 +1659,7 @@ static int ipp_subdrv_open(struct drm_device *drm_dev, struct device *dev,
  
         file_priv->ipp_dev = dev;
  
-       DRM_DEBUG_KMS("done priv[%p]\n", dev);
+       DRM_DEBUG_KMS("done priv[%pK]\n", dev);
  
         return 0;
  }
@@ -1676,7 +1676,7 @@ static void ipp_subdrv_close(struct drm_device *drm_dev, struct device *dev,
                 mutex_lock(&ippdrv->cmd_lock);
                 list_for_each_entry_safe(c_node, tc_node,
                         &ippdrv->cmd_list, list) {
-                       DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n",
+                       DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n",
                                 count++, ippdrv);
  
                         if (c_node->filp == file) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c

index 6591e406084c164d30c1945aa2f6d7ce559c67fd..79282a820ecce104a59c828beac7be68b65a21ee 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -748,7 +748,7 @@ static int rotator_probe(struct platform_device *pdev)
                 goto err_ippdrv_register;
         }
  
-       DRM_DEBUG_KMS("ippdrv[%p]\n", ippdrv);
+       DRM_DEBUG_KMS("ippdrv[%pK]\n", ippdrv);
  
         platform_set_drvdata(pdev, rot);
  
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c

index 57fe514d5c5bf9adc9f423d9f46a51e25aedfa46..5d9a62a87eec75f574ea0e696f31059d5802fa7d 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -170,6 +170,7 @@ static const struct exynos_drm_crtc_ops vidi_crtc_ops = {
         .enable_vblank = vidi_enable_vblank,
         .disable_vblank = vidi_disable_vblank,
         .update_plane = vidi_update_plane,
+       .atomic_flush = exynos_crtc_handle_event,
  };
  
  static void vidi_fake_vblank_timer(unsigned long arg)
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c

index 72143ac1052526ffc03332c5b6b211ae7bf30216..25edb635a197621871d583e26b3d31d3717a82fa 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -1012,6 +1012,7 @@ static void mixer_atomic_flush(struct exynos_drm_crtc *crtc)
                 return;
  
         mixer_vsync_set_update(mixer_ctx, true);
+       exynos_crtc_handle_event(crtc);
  }
  
  static void mixer_enable(struct exynos_drm_crtc *crtc)
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c

index 3b6caaca975135d6b8d595393ffc372bc14107bc..325618d969feedf035196c9e3d211affe2fd82f6 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -242,7 +242,7 @@ static int alloc_resource(struct intel_vgpu *vgpu,
         const char *item;
  
         if (!param->low_gm_sz || !param->high_gm_sz || !param->fence_sz) {
-               gvt_err("Invalid vGPU creation params\n");
+               gvt_vgpu_err("Invalid vGPU creation params\n");
                 return -EINVAL;
         }
  
@@ -285,9 +285,9 @@ static int alloc_resource(struct intel_vgpu *vgpu,
         return 0;
  
  no_enough_resource:
-       gvt_err("vgpu%d: fail to allocate resource %s\n", vgpu->id, item);
-       gvt_err("vgpu%d: request %luMB avail %luMB max %luMB taken %luMB\n",
-               vgpu->id, BYTES_TO_MB(request), BYTES_TO_MB(avail),
+       gvt_vgpu_err("fail to allocate resource %s\n", item);
+       gvt_vgpu_err("request %luMB avail %luMB max %luMB taken %luMB\n",
+               BYTES_TO_MB(request), BYTES_TO_MB(avail),
                 BYTES_TO_MB(max), BYTES_TO_MB(taken));
         return -ENOSPC;
  }
diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c

index 4a6a2ed65732e1fde39457148165274deda52db6..40af17ec6312533d4080cc1581faa4683a0405b9 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -41,6 +41,54 @@ enum {
         INTEL_GVT_PCI_BAR_MAX,
  };
  
+/* bitmap for writable bits (RW or RW1C bits, but cannot co-exist in one
+ * byte) byte by byte in standard pci configuration space. (not the full
+ * 256 bytes.)
+ */
+static const u8 pci_cfg_space_rw_bmp[PCI_INTERRUPT_LINE + 4] = {
+       [PCI_COMMAND]           = 0xff, 0x07,
+       [PCI_STATUS]            = 0x00, 0xf9, /* the only one RW1C byte */
+       [PCI_CACHE_LINE_SIZE]   = 0xff,
+       [PCI_BASE_ADDRESS_0 ... PCI_CARDBUS_CIS - 1] = 0xff,
+       [PCI_ROM_ADDRESS]       = 0x01, 0xf8, 0xff, 0xff,
+       [PCI_INTERRUPT_LINE]    = 0xff,
+};
+
+/**
+ * vgpu_pci_cfg_mem_write - write virtual cfg space memory
+ *
+ * Use this function to write virtual cfg space memory.
+ * For standard cfg space, only RW bits can be changed,
+ * and we emulates the RW1C behavior of PCI_STATUS register.
+ */
+static void vgpu_pci_cfg_mem_write(struct intel_vgpu *vgpu, unsigned int off,
+                                  u8 *src, unsigned int bytes)
+{
+       u8 *cfg_base = vgpu_cfg_space(vgpu);
+       u8 mask, new, old;
+       int i = 0;
+
+       for (; i < bytes && (off + i < sizeof(pci_cfg_space_rw_bmp)); i++) {
+               mask = pci_cfg_space_rw_bmp[off + i];
+               old = cfg_base[off + i];
+               new = src[i] & mask;
+
+               /**
+                * The PCI_STATUS high byte has RW1C bits, here
+                * emulates clear by writing 1 for these bits.
+                * Writing a 0b to RW1C bits has no effect.
+                */
+               if (off + i == PCI_STATUS + 1)
+                       new = (~new & old) & mask;
+
+               cfg_base[off + i] = (old & ~mask) | new;
+       }
+
+       /* For other configuration space directly copy as it is. */
+       if (i < bytes)
+               memcpy(cfg_base + off + i, src + i, bytes - i);
+}
+
  /**
   * intel_vgpu_emulate_cfg_read - emulate vGPU configuration space read
   *
@@ -123,7 +171,7 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu,
         u8 changed = old ^ new;
         int ret;
  
-       memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+       vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
         if (!(changed & PCI_COMMAND_MEMORY))
                 return 0;
  
@@ -274,10 +322,10 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
                 if (ret)
                         return ret;
  
-               memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+               vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
                 break;
         default:
-               memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+               vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
                 break;
         }
         return 0;
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c

index b9c8e2407682fc5af454d70d8b1e881aed9e171f..2b92cc8a7d1aa551778917ed038bc6aa7961ce3e 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -668,7 +668,7 @@ static inline void print_opcode(u32 cmd, int ring_id)
         if (d_info == NULL)
                 return;
  
-       gvt_err("opcode=0x%x %s sub_ops:",
+       gvt_dbg_cmd("opcode=0x%x %s sub_ops:",
                         cmd >> (32 - d_info->op_len), d_info->name);
  
         for (i = 0; i < d_info->nr_sub_op; i++)
@@ -693,23 +693,23 @@ static void parser_exec_state_dump(struct parser_exec_state *s)
         int cnt = 0;
         int i;
  
-       gvt_err("  vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)"
+       gvt_dbg_cmd("  vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)"
                         " ring_head(%08lx) ring_tail(%08lx)\n", s->vgpu->id,
                         s->ring_id, s->ring_start, s->ring_start + s->ring_size,
                         s->ring_head, s->ring_tail);
  
-       gvt_err("  %s %s ip_gma(%08lx) ",
+       gvt_dbg_cmd("  %s %s ip_gma(%08lx) ",
                         s->buf_type == RING_BUFFER_INSTRUCTION ?
                         "RING_BUFFER" : "BATCH_BUFFER",
                         s->buf_addr_type == GTT_BUFFER ?
                         "GTT" : "PPGTT", s->ip_gma);
  
         if (s->ip_va == NULL) {
-               gvt_err(" ip_va(NULL)");
+               gvt_dbg_cmd(" ip_va(NULL)");
                 return;
         }
  
-       gvt_err("  ip_va=%p: %08x %08x %08x %08x\n",
+       gvt_dbg_cmd("  ip_va=%p: %08x %08x %08x %08x\n",
                         s->ip_va, cmd_val(s, 0), cmd_val(s, 1),
                         cmd_val(s, 2), cmd_val(s, 3));
  
@@ -817,6 +817,25 @@ static bool is_shadowed_mmio(unsigned int offset)
         return ret;
  }
  
+static inline bool is_force_nonpriv_mmio(unsigned int offset)
+{
+       return (offset >= 0x24d0 && offset < 0x2500);
+}
+
+static int force_nonpriv_reg_handler(struct parser_exec_state *s,
+                                    unsigned int offset, unsigned int index)
+{
+       struct intel_gvt *gvt = s->vgpu->gvt;
+       unsigned int data = cmd_val(s, index + 1);
+
+       if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) {
+               gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n",
+                       offset, data);
+               return -EINVAL;
+       }
+       return 0;
+}
+
  static int cmd_reg_handler(struct parser_exec_state *s,
         unsigned int offset, unsigned int index, char *cmd)
  {
@@ -824,23 +843,26 @@ static int cmd_reg_handler(struct parser_exec_state *s,
         struct intel_gvt *gvt = vgpu->gvt;
  
         if (offset + 4 > gvt->device_info.mmio_size) {
-               gvt_err("%s access to (%x) outside of MMIO range\n",
+               gvt_vgpu_err("%s access to (%x) outside of MMIO range\n",
                                 cmd, offset);
                 return -EINVAL;
         }
  
         if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) {
-               gvt_err("vgpu%d: %s access to non-render register (%x)\n",
-                               s->vgpu->id, cmd, offset);
+               gvt_vgpu_err("%s access to non-render register (%x)\n",
+                               cmd, offset);
                 return 0;
         }
  
         if (is_shadowed_mmio(offset)) {
-               gvt_err("vgpu%d: found access of shadowed MMIO %x\n",
-                               s->vgpu->id, offset);
+               gvt_vgpu_err("found access of shadowed MMIO %x\n", offset);
                 return 0;
         }
  
+       if (is_force_nonpriv_mmio(offset) &&
+           force_nonpriv_reg_handler(s, offset, index))
+               return -EINVAL;
+
         if (offset == i915_mmio_reg_offset(DERRMR) ||
                 offset == i915_mmio_reg_offset(FORCEWAKE_MT)) {
                 /* Writing to HW VGT_PVINFO_PAGE offset will be discarded */
@@ -1008,7 +1030,7 @@ static int cmd_handler_pipe_control(struct parser_exec_state *s)
                         ret = cmd_reg_handler(s, 0x2358, 1, "pipe_ctrl");
                 else if (post_sync == 1) {
                         /* check ggtt*/
-                       if ((cmd_val(s, 2) & (1 << 2))) {
+                       if ((cmd_val(s, 1) & PIPE_CONTROL_GLOBAL_GTT_IVB)) {
                                 gma = cmd_val(s, 2) & GENMASK(31, 3);
                                 if (gmadr_bytes == 8)
                                         gma |= (cmd_gma_hi(s, 3)) << 32;
@@ -1129,6 +1151,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
                 struct mi_display_flip_command_info *info)
  {
         struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
+       struct intel_vgpu *vgpu = s->vgpu;
         u32 dword0 = cmd_val(s, 0);
         u32 dword1 = cmd_val(s, 1);
         u32 dword2 = cmd_val(s, 2);
@@ -1167,7 +1190,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
                 break;
  
         default:
-               gvt_err("unknown plane code %d\n", plane);
+               gvt_vgpu_err("unknown plane code %d\n", plane);
                 return -EINVAL;
         }
  
@@ -1274,25 +1297,26 @@ static int update_plane_mmio_from_mi_display_flip(
  static int cmd_handler_mi_display_flip(struct parser_exec_state *s)
  {
         struct mi_display_flip_command_info info;
+       struct intel_vgpu *vgpu = s->vgpu;
         int ret;
         int i;
         int len = cmd_length(s);
  
         ret = decode_mi_display_flip(s, &info);
         if (ret) {
-               gvt_err("fail to decode MI display flip command\n");
+               gvt_vgpu_err("fail to decode MI display flip command\n");
                 return ret;
         }
  
         ret = check_mi_display_flip(s, &info);
         if (ret) {
-               gvt_err("invalid MI display flip command\n");
+               gvt_vgpu_err("invalid MI display flip command\n");
                 return ret;
         }
  
         ret = update_plane_mmio_from_mi_display_flip(s, &info);
         if (ret) {
-               gvt_err("fail to update plane mmio\n");
+               gvt_vgpu_err("fail to update plane mmio\n");
                 return ret;
         }
  
@@ -1350,7 +1374,8 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
         int ret;
  
         if (op_size > max_surface_size) {
-               gvt_err("command address audit fail name %s\n", s->info->name);
+               gvt_vgpu_err("command address audit fail name %s\n",
+                       s->info->name);
                 return -EINVAL;
         }
  
@@ -1367,7 +1392,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
         }
         return 0;
  err:
-       gvt_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
+       gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
                         s->info->name, guest_gma, op_size);
  
         pr_err("cmd dump: ");
@@ -1412,8 +1437,10 @@ static int cmd_handler_mi_store_data_imm(struct parser_exec_state *s)
  
  static inline int unexpected_cmd(struct parser_exec_state *s)
  {
-       gvt_err("vgpu%d: Unexpected %s in command buffer!\n",
-                       s->vgpu->id, s->info->name);
+       struct intel_vgpu *vgpu = s->vgpu;
+
+       gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name);
+
         return -EINVAL;
  }
  
@@ -1516,7 +1543,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm,
         while (gma != end_gma) {
                 gpa = intel_vgpu_gma_to_gpa(mm, gma);
                 if (gpa == INTEL_GVT_INVALID_ADDR) {
-                       gvt_err("invalid gma address: %lx\n", gma);
+                       gvt_vgpu_err("invalid gma address: %lx\n", gma);
                         return -EFAULT;
                 }
  
@@ -1557,6 +1584,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
         uint32_t bb_size = 0;
         uint32_t cmd_len = 0;
         bool met_bb_end = false;
+       struct intel_vgpu *vgpu = s->vgpu;
         u32 cmd;
  
         /* get the start gm address of the batch buffer */
@@ -1565,7 +1593,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
  
         info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
         if (info == NULL) {
-               gvt_err("unknown cmd 0x%x, opcode=0x%x\n",
+               gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
                                 cmd, get_opcode(cmd, s->ring_id));
                 return -EINVAL;
         }
@@ -1574,7 +1602,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
                                 gma, gma + 4, &cmd);
                 info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
                 if (info == NULL) {
-                       gvt_err("unknown cmd 0x%x, opcode=0x%x\n",
+                       gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
                                 cmd, get_opcode(cmd, s->ring_id));
                         return -EINVAL;
                 }
@@ -1599,6 +1627,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
  static int perform_bb_shadow(struct parser_exec_state *s)
  {
         struct intel_shadow_bb_entry *entry_obj;
+       struct intel_vgpu *vgpu = s->vgpu;
         unsigned long gma = 0;
         uint32_t bb_size;
         void *dst = NULL;
@@ -1633,7 +1662,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
  
         ret = i915_gem_object_set_to_cpu_domain(entry_obj->obj, false);
         if (ret) {
-               gvt_err("failed to set shadow batch to CPU\n");
+               gvt_vgpu_err("failed to set shadow batch to CPU\n");
                 goto unmap_src;
         }
  
@@ -1645,7 +1674,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
                               gma, gma + bb_size,
                               dst);
         if (ret) {
-               gvt_err("fail to copy guest ring buffer\n");
+               gvt_vgpu_err("fail to copy guest ring buffer\n");
                 goto unmap_src;
         }
  
@@ -1676,15 +1705,16 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
  {
         bool second_level;
         int ret = 0;
+       struct intel_vgpu *vgpu = s->vgpu;
  
         if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) {
-               gvt_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
+               gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
                 return -EINVAL;
         }
  
         second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1;
         if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) {
-               gvt_err("Jumping to 2nd level BB from RB is not allowed\n");
+               gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n");
                 return -EINVAL;
         }
  
@@ -1702,7 +1732,7 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
         if (batch_buffer_needs_scan(s)) {
                 ret = perform_bb_shadow(s);
                 if (ret < 0)
-                       gvt_err("invalid shadow batch buffer\n");
+                       gvt_vgpu_err("invalid shadow batch buffer\n");
         } else {
                 /* emulate a batch buffer end to do return right */
                 ret = cmd_handler_mi_batch_buffer_end(s);
@@ -2429,6 +2459,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
         int ret = 0;
         cycles_t t0, t1, t2;
         struct parser_exec_state s_before_advance_custom;
+       struct intel_vgpu *vgpu = s->vgpu;
  
         t0 = get_cycles();
  
@@ -2436,7 +2467,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
  
         info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
         if (info == NULL) {
-               gvt_err("unknown cmd 0x%x, opcode=0x%x\n",
+               gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
                                 cmd, get_opcode(cmd, s->ring_id));
                 return -EINVAL;
         }
@@ -2452,7 +2483,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
         if (info->handler) {
                 ret = info->handler(s);
                 if (ret < 0) {
-                       gvt_err("%s handler error\n", info->name);
+                       gvt_vgpu_err("%s handler error\n", info->name);
                         return ret;
                 }
         }
@@ -2463,7 +2494,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
         if (!(info->flag & F_IP_ADVANCE_CUSTOM)) {
                 ret = cmd_advance_default(s);
                 if (ret) {
-                       gvt_err("%s IP advance error\n", info->name);
+                       gvt_vgpu_err("%s IP advance error\n", info->name);
                         return ret;
                 }
         }
@@ -2486,6 +2517,7 @@ static int command_scan(struct parser_exec_state *s,
  
         unsigned long gma_head, gma_tail, gma_bottom;
         int ret = 0;
+       struct intel_vgpu *vgpu = s->vgpu;
  
         gma_head = rb_start + rb_head;
         gma_tail = rb_start + rb_tail;
@@ -2497,7 +2529,7 @@ static int command_scan(struct parser_exec_state *s,
                 if (s->buf_type == RING_BUFFER_INSTRUCTION) {
                         if (!(s->ip_gma >= rb_start) ||
                                 !(s->ip_gma < gma_bottom)) {
-                               gvt_err("ip_gma %lx out of ring scope."
+                               gvt_vgpu_err("ip_gma %lx out of ring scope."
                                         "(base:0x%lx, bottom: 0x%lx)\n",
                                         s->ip_gma, rb_start,
                                         gma_bottom);
@@ -2505,7 +2537,7 @@ static int command_scan(struct parser_exec_state *s,
                                 return -EINVAL;
                         }
                         if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) {
-                               gvt_err("ip_gma %lx out of range."
+                               gvt_vgpu_err("ip_gma %lx out of range."
                                         "base 0x%lx head 0x%lx tail 0x%lx\n",
                                         s->ip_gma, rb_start,
                                         rb_head, rb_tail);
@@ -2515,7 +2547,7 @@ static int command_scan(struct parser_exec_state *s,
                 }
                 ret = cmd_parser_exec(s);
                 if (ret) {
-                       gvt_err("cmd parser error\n");
+                       gvt_vgpu_err("cmd parser error\n");
                         parser_exec_state_dump(s);
                         break;
                 }
@@ -2639,7 +2671,7 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
                                 gma_head, gma_top,
                                 workload->shadow_ring_buffer_va);
                 if (ret) {
-                       gvt_err("fail to copy guest ring buffer\n");
+                       gvt_vgpu_err("fail to copy guest ring buffer\n");
                         return ret;
                 }
                 copy_len = gma_top - gma_head;
@@ -2651,7 +2683,7 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
                         gma_head, gma_tail,
                         workload->shadow_ring_buffer_va + copy_len);
         if (ret) {
-               gvt_err("fail to copy guest ring buffer\n");
+               gvt_vgpu_err("fail to copy guest ring buffer\n");
                 return ret;
         }
         ring->tail += workload->rb_len;
@@ -2662,16 +2694,17 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
  int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
  {
         int ret;
+       struct intel_vgpu *vgpu = workload->vgpu;
  
         ret = shadow_workload_ring_buffer(workload);
         if (ret) {
-               gvt_err("fail to shadow workload ring_buffer\n");
+               gvt_vgpu_err("fail to shadow workload ring_buffer\n");
                 return ret;
         }
  
         ret = scan_workload(workload);
         if (ret) {
-               gvt_err("scan workload error\n");
+               gvt_vgpu_err("scan workload error\n");
                 return ret;
         }
         return 0;
@@ -2681,6 +2714,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
  {
         int ctx_size = wa_ctx->indirect_ctx.size;
         unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma;
+       struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
         struct drm_i915_gem_object *obj;
         int ret = 0;
         void *map;
@@ -2694,14 +2728,14 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
         /* get the va of the shadow batch buffer */
         map = i915_gem_object_pin_map(obj, I915_MAP_WB);
         if (IS_ERR(map)) {
-               gvt_err("failed to vmap shadow indirect ctx\n");
+               gvt_vgpu_err("failed to vmap shadow indirect ctx\n");
                 ret = PTR_ERR(map);
                 goto put_obj;
         }
  
         ret = i915_gem_object_set_to_cpu_domain(obj, false);
         if (ret) {
-               gvt_err("failed to set shadow indirect ctx to CPU\n");
+               gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
                 goto unmap_src;
         }
  
@@ -2710,7 +2744,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
                                 guest_gma, guest_gma + ctx_size,
                                 map);
         if (ret) {
-               gvt_err("fail to copy guest indirect ctx\n");
+               gvt_vgpu_err("fail to copy guest indirect ctx\n");
                 goto unmap_src;
         }
  
@@ -2744,13 +2778,14 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
  int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
  {
         int ret;
+       struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
  
         if (wa_ctx->indirect_ctx.size == 0)
                 return 0;
  
         ret = shadow_indirect_ctx(wa_ctx);
         if (ret) {
-               gvt_err("fail to shadow indirect ctx\n");
+               gvt_vgpu_err("fail to shadow indirect ctx\n");
                 return ret;
         }
  
@@ -2758,7 +2793,7 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
  
         ret = scan_wa_ctx(wa_ctx);
         if (ret) {
-               gvt_err("scan wa ctx error\n");
+               gvt_vgpu_err("scan wa ctx error\n");
                 return ret;
         }
  
diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h

index 68cba7bd980af8cb9a855ef3ff4a4c446947ebaf..b0cff4dc2684792271a5e648c889dbb5a4d04ac5 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/debug.h
+++ b/drivers/gpu/drm/i915/gvt/debug.h
@@ -27,6 +27,14 @@
  #define gvt_err(fmt, args...) \
         DRM_ERROR("gvt: "fmt, ##args)
  
+#define gvt_vgpu_err(fmt, args...)                                     \
+do {                                                                   \
+       if (IS_ERR_OR_NULL(vgpu))                                       \
+               DRM_DEBUG_DRIVER("gvt: "fmt, ##args);                   \
+       else                                                            \
+               DRM_DEBUG_DRIVER("gvt: vgpu %d: "fmt, vgpu->id, ##args);\
+} while (0)
+
  #define gvt_dbg_core(fmt, args...) \
         DRM_DEBUG_DRIVER("gvt: core: "fmt, ##args)
  
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c

index 6d8fde880c39936f816eae411320c38dac87662c..5419ae6ec6339cecee3ea7704c61c02689a86d22 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -83,44 +83,80 @@ static int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe)
         return 0;
  }
  
+static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = {
+       {
+/* EDID with 1024x768 as its resolution */
+               /*Header*/
+               0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+               /* Vendor & Product Identification */
+               0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17,
+               /* Version & Revision */
+               0x01, 0x04,
+               /* Basic Display Parameters & Features */
+               0xa5, 0x34, 0x20, 0x78, 0x23,
+               /* Color Characteristics */
+               0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
+               /* Established Timings: maximum resolution is 1024x768 */
+               0x21, 0x08, 0x00,
+               /* Standard Timings. All invalid */
+               0x00, 0xc0, 0x00, 0xc0, 0x00, 0x40, 0x00, 0x80, 0x00, 0x00,
+               0x00, 0x40, 0x00, 0x00, 0x00, 0x01,
+               /* 18 Byte Data Blocks 1: invalid */
+               0x00, 0x00, 0x80, 0xa0, 0x70, 0xb0,
+               0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
+               /* 18 Byte Data Blocks 2: invalid */
+               0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
+               0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+               /* 18 Byte Data Blocks 3: invalid */
+               0x00, 0x00, 0x00, 0xfc, 0x00, 0x48,
+               0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20,
+               /* 18 Byte Data Blocks 4: invalid */
+               0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30,
+               0x44, 0x58, 0x51, 0x0a, 0x20, 0x20,
+               /* Extension Block Count */
+               0x00,
+               /* Checksum */
+               0xef,
+       },
+       {
  /* EDID with 1920x1200 as its resolution */
-static unsigned char virtual_dp_monitor_edid[] = {
-       /*Header*/
-       0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
-       /* Vendor & Product Identification */
-       0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17,
-       /* Version & Revision */
-       0x01, 0x04,
-       /* Basic Display Parameters & Features */
-       0xa5, 0x34, 0x20, 0x78, 0x23,
-       /* Color Characteristics */
-       0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
-       /* Established Timings: maximum resolution is 1024x768 */
-       0x21, 0x08, 0x00,
-       /*
-        * Standard Timings.
-        * below new resolutions can be supported:
-        * 1920x1080, 1280x720, 1280x960, 1280x1024,
-        * 1440x900, 1600x1200, 1680x1050
-        */
-       0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00,
-       0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01,
-       /* 18 Byte Data Blocks 1: max resolution is 1920x1200 */
-       0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0,
-       0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
-       /* 18 Byte Data Blocks 2: invalid */
-       0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
-       0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-       /* 18 Byte Data Blocks 3: invalid */
-       0x00, 0x00, 0x00, 0xfc, 0x00, 0x48,
-       0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20,
-       /* 18 Byte Data Blocks 4: invalid */
-       0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30,
-       0x44, 0x58, 0x51, 0x0a, 0x20, 0x20,
-       /* Extension Block Count */
-       0x00,
-       /* Checksum */
-       0x45,
+               /*Header*/
+               0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+               /* Vendor & Product Identification */
+               0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17,
+               /* Version & Revision */
+               0x01, 0x04,
+               /* Basic Display Parameters & Features */
+               0xa5, 0x34, 0x20, 0x78, 0x23,
+               /* Color Characteristics */
+               0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
+               /* Established Timings: maximum resolution is 1024x768 */
+               0x21, 0x08, 0x00,
+               /*
+                * Standard Timings.
+                * below new resolutions can be supported:
+                * 1920x1080, 1280x720, 1280x960, 1280x1024,
+                * 1440x900, 1600x1200, 1680x1050
+                */
+               0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00,
+               0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01,
+               /* 18 Byte Data Blocks 1: max resolution is 1920x1200 */
+               0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0,
+               0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
+               /* 18 Byte Data Blocks 2: invalid */
+               0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
+               0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+               /* 18 Byte Data Blocks 3: invalid */
+               0x00, 0x00, 0x00, 0xfc, 0x00, 0x48,
+               0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20,
+               /* 18 Byte Data Blocks 4: invalid */
+               0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30,
+               0x44, 0x58, 0x51, 0x0a, 0x20, 0x20,
+               /* Extension Block Count */
+               0x00,
+               /* Checksum */
+               0x45,
+       },
  };
  
  #define DPCD_HEADER_SIZE        0xb
@@ -140,14 +176,20 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
                 vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT |
                                 SDE_PORTE_HOTPLUG_SPT);
  
-       if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B))
+       if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
                 vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
+               vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
+       }
  
-       if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C))
+       if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
                 vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
+               vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
+       }
  
-       if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D))
+       if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
                 vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
+               vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
+       }
  
         if (IS_SKYLAKE(dev_priv) &&
                         intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) {
@@ -160,6 +202,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
                                 GEN8_PORT_DP_A_HOTPLUG;
                 else
                         vgpu_vreg(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT;
+
+               vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED;
         }
  }
  
@@ -175,10 +219,13 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
  }
  
  static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
-               int type)
+                                   int type, unsigned int resolution)
  {
         struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
  
+       if (WARN_ON(resolution >= GVT_EDID_NUM))
+               return -EINVAL;
+
         port->edid = kzalloc(sizeof(*(port->edid)), GFP_KERNEL);
         if (!port->edid)
                 return -ENOMEM;
@@ -189,7 +236,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
                 return -ENOMEM;
         }
  
-       memcpy(port->edid->edid_block, virtual_dp_monitor_edid,
+       memcpy(port->edid->edid_block, virtual_dp_monitor_edid[resolution],
                         EDID_SIZE);
         port->edid->data_valid = true;
  
@@ -322,16 +369,18 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu)
   * Zero on success, negative error code if failed.
   *
   */
-int intel_vgpu_init_display(struct intel_vgpu *vgpu)
+int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution)
  {
         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
  
         intel_vgpu_init_i2c_edid(vgpu);
  
         if (IS_SKYLAKE(dev_priv))
-               return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D);
+               return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D,
+                                               resolution);
         else
-               return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B);
+               return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B,
+                                               resolution);
  }
  
  /**
diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h

index 8b234ea961f67b96a185e3cd8bfd35728360a05d..d73de22102e2b77f1c4c166ee0688b86d2e29391 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/display.h
+++ b/drivers/gpu/drm/i915/gvt/display.h
@@ -154,10 +154,28 @@ struct intel_vgpu_port {
         int type;
  };
  
+enum intel_vgpu_edid {
+       GVT_EDID_1024_768,
+       GVT_EDID_1920_1200,
+       GVT_EDID_NUM,
+};
+
+static inline char *vgpu_edid_str(enum intel_vgpu_edid id)
+{
+       switch (id) {
+       case GVT_EDID_1024_768:
+               return "1024x768";
+       case GVT_EDID_1920_1200:
+               return "1920x1200";
+       default:
+               return "";
+       }
+}
+
  void intel_gvt_emulate_vblank(struct intel_gvt *gvt);
  void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt);
  
-int intel_vgpu_init_display(struct intel_vgpu *vgpu);
+int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution);
  void intel_vgpu_reset_display(struct intel_vgpu *vgpu);
  void intel_vgpu_clean_display(struct intel_vgpu *vgpu);
  
diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c

index bda85dff7b2a998d68d1485479c9687eab206600..42cd09ec63fa7c41b69d684f8cf382a04f15b2ab 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/edid.c
+++ b/drivers/gpu/drm/i915/gvt/edid.c
@@ -52,16 +52,16 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu)
         unsigned char chr = 0;
  
         if (edid->state == I2C_NOT_SPECIFIED || !edid->slave_selected) {
-               gvt_err("Driver tries to read EDID without proper sequence!\n");
+               gvt_vgpu_err("Driver tries to read EDID without proper sequence!\n");
                 return 0;
         }
         if (edid->current_edid_read >= EDID_SIZE) {
-               gvt_err("edid_get_byte() exceeds the size of EDID!\n");
+               gvt_vgpu_err("edid_get_byte() exceeds the size of EDID!\n");
                 return 0;
         }
  
         if (!edid->edid_available) {
-               gvt_err("Reading EDID but EDID is not available!\n");
+               gvt_vgpu_err("Reading EDID but EDID is not available!\n");
                 return 0;
         }
  
@@ -72,7 +72,7 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu)
                 chr = edid_data->edid_block[edid->current_edid_read];
                 edid->current_edid_read++;
         } else {
-               gvt_err("No EDID available during the reading?\n");
+               gvt_vgpu_err("No EDID available during the reading?\n");
         }
         return chr;
  }
@@ -223,7 +223,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
                         vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE;
                         break;
                 default:
-                       gvt_err("Unknown/reserved GMBUS cycle detected!\n");
+                       gvt_vgpu_err("Unknown/reserved GMBUS cycle detected!\n");
                         break;
                 }
                 /*
@@ -292,8 +292,7 @@ static int gmbus3_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
                  */
         } else {
                 memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes);
-               gvt_err("vgpu%d: warning: gmbus3 read with nothing returned\n",
-                               vgpu->id);
+               gvt_vgpu_err("warning: gmbus3 read with nothing returned\n");
         }
         return 0;
  }
@@ -496,7 +495,8 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu,
                         unsigned char val = edid_get_byte(vgpu);
  
                         aux_data_for_write = (val << 16);
-               }
+               } else
+                       aux_data_for_write = (0xff << 16);
         }
         /* write the return value in AUX_CH_DATA reg which includes:
          * ACK of I2C_WRITE
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c

index 46eb9fd3c03f6b2fdb19fec6550a5f4347038f88..d186c157f65fefe3c64b45b86f0ffa6ab824df2f 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -172,6 +172,7 @@ static int emulate_execlist_ctx_schedule_out(
                 struct intel_vgpu_execlist *execlist,
                 struct execlist_ctx_descriptor_format *ctx)
  {
+       struct intel_vgpu *vgpu = execlist->vgpu;
         struct intel_vgpu_execlist_slot *running = execlist->running_slot;
         struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
         struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0];
@@ -183,7 +184,7 @@ static int emulate_execlist_ctx_schedule_out(
         gvt_dbg_el("schedule out context id %x\n", ctx->context_id);
  
         if (WARN_ON(!same_context(ctx, execlist->running_context))) {
-               gvt_err("schedule out context is not running context,"
+               gvt_vgpu_err("schedule out context is not running context,"
                                 "ctx id %x running ctx id %x\n",
                                 ctx->context_id,
                                 execlist->running_context->context_id);
@@ -254,7 +255,7 @@ static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
         status.udw = vgpu_vreg(vgpu, status_reg + 4);
  
         if (status.execlist_queue_full) {
-               gvt_err("virtual execlist slots are full\n");
+               gvt_vgpu_err("virtual execlist slots are full\n");
                 return NULL;
         }
  
@@ -270,11 +271,12 @@ static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
  
         struct execlist_ctx_descriptor_format *ctx0, *ctx1;
         struct execlist_context_status_format status;
+       struct intel_vgpu *vgpu = execlist->vgpu;
  
         gvt_dbg_el("emulate schedule-in\n");
  
         if (!slot) {
-               gvt_err("no available execlist slot\n");
+               gvt_vgpu_err("no available execlist slot\n");
                 return -EINVAL;
         }
  
@@ -375,7 +377,6 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
  
                 vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0);
                 if (IS_ERR(vma)) {
-                       gvt_err("Cannot pin\n");
                         return;
                 }
  
@@ -428,7 +429,6 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
         vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL,
                                        0, CACHELINE_BYTES, 0);
         if (IS_ERR(vma)) {
-               gvt_err("Cannot pin indirect ctx obj\n");
                 return;
         }
  
@@ -561,6 +561,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
  {
         struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
         struct intel_vgpu_mm *mm;
+       struct intel_vgpu *vgpu = workload->vgpu;
         int page_table_level;
         u32 pdp[8];
  
@@ -569,7 +570,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
         } else if (desc->addressing_mode == 3) { /* legacy 64 bit */
                 page_table_level = 4;
         } else {
-               gvt_err("Advanced Context mode(SVM) is not supported!\n");
+               gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
                 return -EINVAL;
         }
  
@@ -583,7 +584,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
                 mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
                                 pdp, page_table_level, 0);
                 if (IS_ERR(mm)) {
-                       gvt_err("fail to create mm object.\n");
+                       gvt_vgpu_err("fail to create mm object.\n");
                         return PTR_ERR(mm);
                 }
         }
@@ -609,7 +610,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
         ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
                         (u32)((desc->lrca + 1) << GTT_PAGE_SHIFT));
         if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) {
-               gvt_err("invalid guest context LRCA: %x\n", desc->lrca);
+               gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca);
                 return -EINVAL;
         }
  
@@ -724,8 +725,7 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
                         continue;
  
                 if (!desc[i]->privilege_access) {
-                       gvt_err("vgpu%d: unexpected GGTT elsp submission\n",
-                                       vgpu->id);
+                       gvt_vgpu_err("unexpected GGTT elsp submission\n");
                         return -EINVAL;
                 }
  
@@ -735,15 +735,13 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
         }
  
         if (!valid_desc_bitmap) {
-               gvt_err("vgpu%d: no valid desc in a elsp submission\n",
-                               vgpu->id);
+               gvt_vgpu_err("no valid desc in a elsp submission\n");
                 return -EINVAL;
         }
  
         if (!test_bit(0, (void *)&valid_desc_bitmap) &&
                         test_bit(1, (void *)&valid_desc_bitmap)) {
-               gvt_err("vgpu%d: weird elsp submission, desc 0 is not valid\n",
-                               vgpu->id);
+               gvt_vgpu_err("weird elsp submission, desc 0 is not valid\n");
                 return -EINVAL;
         }
  
@@ -752,8 +750,7 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
                 ret = submit_context(vgpu, ring_id, &valid_desc[i],
                                 emulate_schedule_in);
                 if (ret) {
-                       gvt_err("vgpu%d: fail to schedule workload\n",
-                                       vgpu->id);
+                       gvt_vgpu_err("fail to schedule workload\n");
                         return ret;
                 }
                 emulate_schedule_in = false;
@@ -778,7 +775,8 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
                         _EL_OFFSET_STATUS_PTR);
  
         ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
-       ctx_status_ptr.read_ptr = ctx_status_ptr.write_ptr = 0x7;
+       ctx_status_ptr.read_ptr = 0;
+       ctx_status_ptr.write_ptr = 0x7;
         vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
  }
  
diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c

index 1cb29b2d7dc638bd701b4ec16eec9edc8fea1133..dce8d15f706f58b4cf1019ddc1d5b609c903980c 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -75,12 +75,12 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
         struct gvt_firmware_header *h;
         void *firmware;
         void *p;
-       unsigned long size;
+       unsigned long size, crc32_start;
         int i;
         int ret;
  
-       size = sizeof(*h) + info->mmio_size + info->cfg_space_size - 1;
-       firmware = vmalloc(size);
+       size = sizeof(*h) + info->mmio_size + info->cfg_space_size;
+       firmware = vzalloc(size);
         if (!firmware)
                 return -ENOMEM;
  
@@ -112,6 +112,9 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
  
         memcpy(gvt->firmware.mmio, p, info->mmio_size);
  
+       crc32_start = offsetof(struct gvt_firmware_header, crc32) + 4;
+       h->crc32 = crc32_le(0, firmware + crc32_start, size - crc32_start);
+
         firmware_attr.size = size;
         firmware_attr.private = firmware;
  
@@ -234,7 +237,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt)
  
         firmware->mmio = mem;
  
-       sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%04x.golden_hw_state",
+       sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%02x.golden_hw_state",
                  GVT_FIRMWARE_PATH, pdev->vendor, pdev->device,
                  pdev->revision);
  
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c

index 28c92346db0e4e3615c2b5c484b4421eeb097e6e..b832bea64e0367ed6c25bf031a2a71db82d7edcc 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -49,8 +49,8 @@ bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
  {
         if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size
                         && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) {
-               gvt_err("vgpu%d: invalid range gmadr 0x%llx size 0x%x\n",
-                               vgpu->id, addr, size);
+               gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n",
+                               addr, size);
                 return false;
         }
         return true;
@@ -430,7 +430,7 @@ static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p,
  
         mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn);
         if (mfn == INTEL_GVT_INVALID_ADDR) {
-               gvt_err("fail to translate gfn: 0x%lx\n", gfn);
+               gvt_vgpu_err("fail to translate gfn: 0x%lx\n", gfn);
                 return -ENXIO;
         }
  
@@ -611,7 +611,7 @@ static inline int init_shadow_page(struct intel_vgpu *vgpu,
  
         daddr = dma_map_page(kdev, p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
         if (dma_mapping_error(kdev, daddr)) {
-               gvt_err("fail to map dma addr\n");
+               gvt_vgpu_err("fail to map dma addr\n");
                 return -EINVAL;
         }
  
@@ -735,7 +735,7 @@ retry:
                 if (reclaim_one_mm(vgpu->gvt))
                         goto retry;
  
-               gvt_err("fail to allocate ppgtt shadow page\n");
+               gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
                 return ERR_PTR(-ENOMEM);
         }
  
@@ -750,14 +750,14 @@ retry:
          */
         ret = init_shadow_page(vgpu, &spt->shadow_page, type);
         if (ret) {
-               gvt_err("fail to initialize shadow page for spt\n");
+               gvt_vgpu_err("fail to initialize shadow page for spt\n");
                 goto err;
         }
  
         ret = intel_vgpu_init_guest_page(vgpu, &spt->guest_page,
                         gfn, ppgtt_write_protection_handler, NULL);
         if (ret) {
-               gvt_err("fail to initialize guest page for spt\n");
+               gvt_vgpu_err("fail to initialize guest page for spt\n");
                 goto err;
         }
  
@@ -776,8 +776,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page(
         if (p)
                 return shadow_page_to_ppgtt_spt(p);
  
-       gvt_err("vgpu%d: fail to find ppgtt shadow page: 0x%lx\n",
-                       vgpu->id, mfn);
+       gvt_vgpu_err("fail to find ppgtt shadow page: 0x%lx\n", mfn);
         return NULL;
  }
  
@@ -827,8 +826,8 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu,
         }
         s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
         if (!s) {
-               gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n",
-                               vgpu->id, ops->get_pfn(e));
+               gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
+                               ops->get_pfn(e));
                 return -ENXIO;
         }
         return ppgtt_invalidate_shadow_page(s);
@@ -836,6 +835,7 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu,
  
  static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
  {
+       struct intel_vgpu *vgpu = spt->vgpu;
         struct intel_gvt_gtt_entry e;
         unsigned long index;
         int ret;
@@ -854,7 +854,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
  
         for_each_present_shadow_entry(spt, &e, index) {
                 if (!gtt_type_is_pt(get_next_pt_type(e.type))) {
-                       gvt_err("GVT doesn't support pse bit for now\n");
+                       gvt_vgpu_err("GVT doesn't support pse bit for now\n");
                         return -EINVAL;
                 }
                 ret = ppgtt_invalidate_shadow_page_by_shadow_entry(
@@ -868,8 +868,8 @@ release:
         ppgtt_free_shadow_page(spt);
         return 0;
  fail:
-       gvt_err("vgpu%d: fail: shadow page %p shadow entry 0x%llx type %d\n",
-                       spt->vgpu->id, spt, e.val64, e.type);
+       gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
+                       spt, e.val64, e.type);
         return ret;
  }
  
@@ -914,8 +914,8 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry(
         }
         return s;
  fail:
-       gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
-                       vgpu->id, s, we->val64, we->type);
+       gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+                       s, we->val64, we->type);
         return ERR_PTR(ret);
  }
  
@@ -953,7 +953,7 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
  
         for_each_present_guest_entry(spt, &ge, i) {
                 if (!gtt_type_is_pt(get_next_pt_type(ge.type))) {
-                       gvt_err("GVT doesn't support pse bit now\n");
+                       gvt_vgpu_err("GVT doesn't support pse bit now\n");
                         ret = -EINVAL;
                         goto fail;
                 }
@@ -969,8 +969,8 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
         }
         return 0;
  fail:
-       gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
-                       vgpu->id, spt, ge.val64, ge.type);
+       gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+                       spt, ge.val64, ge.type);
         return ret;
  }
  
@@ -999,7 +999,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
                 struct intel_vgpu_ppgtt_spt *s =
                         ppgtt_find_shadow_page(vgpu, ops->get_pfn(&e));
                 if (!s) {
-                       gvt_err("fail to find guest page\n");
+                       gvt_vgpu_err("fail to find guest page\n");
                         ret = -ENXIO;
                         goto fail;
                 }
@@ -1011,8 +1011,8 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
         ppgtt_set_shadow_entry(spt, &e, index);
         return 0;
  fail:
-       gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
-                       vgpu->id, spt, e.val64, e.type);
+       gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+                       spt, e.val64, e.type);
         return ret;
  }
  
@@ -1046,8 +1046,8 @@ static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt,
         }
         return 0;
  fail:
-       gvt_err("vgpu%d: fail: spt %p guest entry 0x%llx type %d\n", vgpu->id,
-                       spt, we->val64, we->type);
+       gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
+               spt, we->val64, we->type);
         return ret;
  }
  
@@ -1250,8 +1250,8 @@ static int ppgtt_handle_guest_write_page_table(
         }
         return 0;
  fail:
-       gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d.\n",
-                       vgpu->id, spt, we->val64, we->type);
+       gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
+                       spt, we->val64, we->type);
         return ret;
  }
  
@@ -1493,7 +1493,7 @@ static int shadow_mm(struct intel_vgpu_mm *mm)
  
                 spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge);
                 if (IS_ERR(spt)) {
-                       gvt_err("fail to populate guest root pointer\n");
+                       gvt_vgpu_err("fail to populate guest root pointer\n");
                         ret = PTR_ERR(spt);
                         goto fail;
                 }
@@ -1566,7 +1566,7 @@ struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu,
  
         ret = gtt->mm_alloc_page_table(mm);
         if (ret) {
-               gvt_err("fail to allocate page table for mm\n");
+               gvt_vgpu_err("fail to allocate page table for mm\n");
                 goto fail;
         }
  
@@ -1584,7 +1584,7 @@ struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu,
         }
         return mm;
  fail:
-       gvt_err("fail to create mm\n");
+       gvt_vgpu_err("fail to create mm\n");
         if (mm)
                 intel_gvt_mm_unreference(mm);
         return ERR_PTR(ret);
@@ -1760,7 +1760,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
                         mm->page_table_level, gma, gpa);
         return gpa;
  err:
-       gvt_err("invalid mm type: %d gma %lx\n", mm->type, gma);
+       gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
         return INTEL_GVT_INVALID_ADDR;
  }
  
@@ -1825,11 +1825,8 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
         gma = g_gtt_index << GTT_PAGE_SHIFT;
  
         /* the VM may configure the whole GM space when ballooning is used */
-       if (WARN_ONCE(!vgpu_gmadr_is_valid(vgpu, gma),
-                               "vgpu%d: found oob ggtt write, offset %x\n",
-                               vgpu->id, off)) {
+       if (!vgpu_gmadr_is_valid(vgpu, gma))
                 return 0;
-       }
  
         ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
  
@@ -1839,13 +1836,16 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
         if (ops->test_present(&e)) {
                 ret = gtt_entry_p2m(vgpu, &e, &m);
                 if (ret) {
-                       gvt_err("vgpu%d: fail to translate guest gtt entry\n",
-                                       vgpu->id);
-                       return ret;
+                       gvt_vgpu_err("fail to translate guest gtt entry\n");
+                       /* guest driver may read/write the entry when partial
+                        * update the entry in this situation p2m will fail
+                        * settting the shadow entry to point to a scratch page
+                        */
+                       ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn);
                 }
         } else {
                 m = e;
-               m.val64 = 0;
+               ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn);
         }
  
         ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index);
@@ -1896,14 +1896,14 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu,
  
         scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
         if (!scratch_pt) {
-               gvt_err("fail to allocate scratch page\n");
+               gvt_vgpu_err("fail to allocate scratch page\n");
                 return -ENOMEM;
         }
  
         daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
                         4096, PCI_DMA_BIDIRECTIONAL);
         if (dma_mapping_error(dev, daddr)) {
-               gvt_err("fail to dmamap scratch_pt\n");
+               gvt_vgpu_err("fail to dmamap scratch_pt\n");
                 __free_page(virt_to_page(scratch_pt));
                 return -ENOMEM;
         }
@@ -2006,7 +2006,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
         ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT,
                         NULL, 1, 0);
         if (IS_ERR(ggtt_mm)) {
-               gvt_err("fail to create mm for ggtt.\n");
+               gvt_vgpu_err("fail to create mm for ggtt.\n");
                 return PTR_ERR(ggtt_mm);
         }
  
@@ -2015,6 +2015,22 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
         return create_scratch_page_tree(vgpu);
  }
  
+static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type)
+{
+       struct list_head *pos, *n;
+       struct intel_vgpu_mm *mm;
+
+       list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) {
+               mm = container_of(pos, struct intel_vgpu_mm, list);
+               if (mm->type == type) {
+                       vgpu->gvt->gtt.mm_free_page_table(mm);
+                       list_del(&mm->list);
+                       list_del(&mm->lru_list);
+                       kfree(mm);
+               }
+       }
+}
+
  /**
   * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
   * @vgpu: a vGPU
@@ -2027,19 +2043,11 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
   */
  void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
  {
-       struct list_head *pos, *n;
-       struct intel_vgpu_mm *mm;
-
         ppgtt_free_all_shadow_page(vgpu);
         release_scratch_page_tree(vgpu);
  
-       list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) {
-               mm = container_of(pos, struct intel_vgpu_mm, list);
-               vgpu->gvt->gtt.mm_free_page_table(mm);
-               list_del(&mm->list);
-               list_del(&mm->lru_list);
-               kfree(mm);
-       }
+       intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT);
+       intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_GGTT);
  }
  
  static void clean_spt_oos(struct intel_gvt *gvt)
@@ -2071,7 +2079,6 @@ static int setup_spt_oos(struct intel_gvt *gvt)
         for (i = 0; i < preallocated_oos_pages; i++) {
                 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
                 if (!oos_page) {
-                       gvt_err("fail to pre-allocate oos page\n");
                         ret = -ENOMEM;
                         goto fail;
                 }
@@ -2161,7 +2168,7 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
                 mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_PPGTT,
                                 pdp, page_table_level, 0);
                 if (IS_ERR(mm)) {
-                       gvt_err("fail to create mm\n");
+                       gvt_vgpu_err("fail to create mm\n");
                         return PTR_ERR(mm);
                 }
         }
@@ -2191,7 +2198,7 @@ int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu,
  
         mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp);
         if (!mm) {
-               gvt_err("fail to find ppgtt instance.\n");
+               gvt_vgpu_err("fail to find ppgtt instance.\n");
                 return -EINVAL;
         }
         intel_gvt_mm_unreference(mm);
@@ -2322,6 +2329,13 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr)
         int i;
  
         ppgtt_free_all_shadow_page(vgpu);
+
+       /* Shadow pages are only created when there is no page
+        * table tracking data, so remove page tracking data after
+        * removing the shadow pages.
+        */
+       intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT);
+
         if (!dmlr)
                 return;
  
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c

index 3b9d59e457ba7dbf2a1baffff7cd4f4c9aa75f3f..ef3baa0c4754566319a4706d50a77d2e1c6e2255 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -52,6 +52,8 @@ static const struct intel_gvt_ops intel_gvt_ops = {
         .vgpu_create = intel_gvt_create_vgpu,
         .vgpu_destroy = intel_gvt_destroy_vgpu,
         .vgpu_reset = intel_gvt_reset_vgpu,
+       .vgpu_activate = intel_gvt_activate_vgpu,
+       .vgpu_deactivate = intel_gvt_deactivate_vgpu,
  };
  
  /**
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h

index e227caf5859ebdfd2c420bc994d42a5734ba4272..becae2fa3b29d9956cf69469968c6a1c7c74b770 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -143,6 +143,8 @@ struct intel_vgpu {
         int id;
         unsigned long handle; /* vGPU handle used by hypervisor MPT modules */
         bool active;
+       bool pv_notified;
+       bool failsafe;
         bool resetting;
         void *sched_data;
  
@@ -160,7 +162,6 @@ struct intel_vgpu {
         atomic_t running_workload_num;
         DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
         struct i915_gem_context *shadow_ctx;
-       struct notifier_block shadow_ctx_notifier_block;
  
  #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT)
         struct {
@@ -203,18 +204,18 @@ struct intel_gvt_firmware {
  };
  
  struct intel_gvt_opregion {
-       void __iomem *opregion_va;
+       void *opregion_va;
         u32 opregion_pa;
  };
  
  #define NR_MAX_INTEL_VGPU_TYPES 20
  struct intel_vgpu_type {
         char name[16];
-       unsigned int max_instance;
         unsigned int avail_instance;
         unsigned int low_gm_size;
         unsigned int high_gm_size;
         unsigned int fence;
+       enum intel_vgpu_edid resolution;
  };
  
  struct intel_gvt {
@@ -231,6 +232,7 @@ struct intel_gvt {
         struct intel_gvt_gtt gtt;
         struct intel_gvt_opregion opregion;
         struct intel_gvt_workload_scheduler scheduler;
+       struct notifier_block shadow_ctx_notifier_block[I915_NUM_ENGINES];
         DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS);
         struct intel_vgpu_type *types;
         unsigned int num_types;
@@ -317,6 +319,7 @@ struct intel_vgpu_creation_params {
         __u64 low_gm_sz;  /* in MB */
         __u64 high_gm_sz; /* in MB */
         __u64 fence_sz;
+       __u64 resolution;
         __s32 primary;
         __u64 vgpu_id;
  };
@@ -379,7 +382,8 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
  void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
                                  unsigned int engine_mask);
  void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu);
-
+void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu);
+void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu);
  
  /* validating GM functions */
  #define vgpu_gmadr_is_aperture(vgpu, gmadr) \
@@ -446,9 +450,16 @@ struct intel_gvt_ops {
                                 struct intel_vgpu_type *);
         void (*vgpu_destroy)(struct intel_vgpu *);
         void (*vgpu_reset)(struct intel_vgpu *);
+       void (*vgpu_activate)(struct intel_vgpu *);
+       void (*vgpu_deactivate)(struct intel_vgpu *);
  };
  
  
+enum {
+       GVT_FAILSAFE_UNSUPPORTED_GUEST,
+       GVT_FAILSAFE_INSUFFICIENT_RESOURCE,
+};
+
  #include "mpt.h"
  
  #endif
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c

index 1d450627ff654025b56119a181864ed4f2b1c607..6da9ae1618e35e39fb06e99caec00e1455132873 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -121,6 +121,7 @@ static int new_mmio_info(struct intel_gvt *gvt,
                 info->size = size;
                 info->length = (i + 4) < end ? 4 : (end - i);
                 info->addr_mask = addr_mask;
+               info->ro_mask = ro_mask;
                 info->device = device;
                 info->read = read ? read : intel_vgpu_default_mmio_read;
                 info->write = write ? write : intel_vgpu_default_mmio_write;
@@ -150,15 +151,42 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg)
  #define fence_num_to_offset(num) \
         (num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0)))
  
+
+static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
+{
+       switch (reason) {
+       case GVT_FAILSAFE_UNSUPPORTED_GUEST:
+               pr_err("Detected your guest driver doesn't support GVT-g.\n");
+               break;
+       case GVT_FAILSAFE_INSUFFICIENT_RESOURCE:
+               pr_err("Graphics resource is not enough for the guest\n");
+       default:
+               break;
+       }
+       pr_err("Now vgpu %d will enter failsafe mode.\n", vgpu->id);
+       vgpu->failsafe = true;
+}
+
  static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
                 unsigned int fence_num, void *p_data, unsigned int bytes)
  {
         if (fence_num >= vgpu_fence_sz(vgpu)) {
-               gvt_err("vgpu%d: found oob fence register access\n",
-                               vgpu->id);
-               gvt_err("vgpu%d: total fence num %d access fence num %d\n",
-                               vgpu->id, vgpu_fence_sz(vgpu), fence_num);
+
+               /* When guest access oob fence regs without access
+                * pv_info first, we treat guest not supporting GVT,
+                * and we will let vgpu enter failsafe mode.
+                */
+               if (!vgpu->pv_notified)
+                       enter_failsafe_mode(vgpu,
+                                       GVT_FAILSAFE_UNSUPPORTED_GUEST);
+
+               if (!vgpu->mmio.disable_warn_untrack) {
+                       gvt_vgpu_err("found oob fence register access\n");
+                       gvt_vgpu_err("total fence %d, access fence %d\n",
+                                       vgpu_fence_sz(vgpu), fence_num);
+               }
                 memset(p_data, 0, bytes);
+               return -EINVAL;
         }
         return 0;
  }
@@ -219,7 +247,7 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu,
                         break;
                 default:
                         /*should not hit here*/
-                       gvt_err("invalid forcewake offset 0x%x\n", offset);
+                       gvt_vgpu_err("invalid forcewake offset 0x%x\n", offset);
                         return -EINVAL;
                 }
         } else {
@@ -369,6 +397,74 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
         return 0;
  }
  
+/* ascendingly sorted */
+static i915_reg_t force_nonpriv_white_list[] = {
+       GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec)
+       GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248)
+       GEN8_CS_CHICKEN1,//_MMIO(0x2580)
+       _MMIO(0x2690),
+       _MMIO(0x2694),
+       _MMIO(0x2698),
+       _MMIO(0x4de0),
+       _MMIO(0x4de4),
+       _MMIO(0x4dfc),
+       GEN7_COMMON_SLICE_CHICKEN1,//_MMIO(0x7010)
+       _MMIO(0x7014),
+       HDC_CHICKEN0,//_MMIO(0x7300)
+       GEN8_HDC_CHICKEN1,//_MMIO(0x7304)
+       _MMIO(0x7700),
+       _MMIO(0x7704),
+       _MMIO(0x7708),
+       _MMIO(0x770c),
+       _MMIO(0xb110),
+       GEN8_L3SQCREG4,//_MMIO(0xb118)
+       _MMIO(0xe100),
+       _MMIO(0xe18c),
+       _MMIO(0xe48c),
+       _MMIO(0xe5f4),
+};
+
+/* a simple bsearch */
+static inline bool in_whitelist(unsigned int reg)
+{
+       int left = 0, right = ARRAY_SIZE(force_nonpriv_white_list);
+       i915_reg_t *array = force_nonpriv_white_list;
+
+       while (left < right) {
+               int mid = (left + right)/2;
+
+               if (reg > array[mid].reg)
+                       left = mid + 1;
+               else if (reg < array[mid].reg)
+                       right = mid;
+               else
+                       return true;
+       }
+       return false;
+}
+
+static int force_nonpriv_write(struct intel_vgpu *vgpu,
+       unsigned int offset, void *p_data, unsigned int bytes)
+{
+       u32 reg_nonpriv = *(u32 *)p_data;
+       int ret = -EINVAL;
+
+       if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) {
+               gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n",
+                       vgpu->id, offset, bytes);
+               return ret;
+       }
+
+       if (in_whitelist(reg_nonpriv)) {
+               ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data,
+                       bytes);
+       } else {
+               gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n",
+                       vgpu->id, reg_nonpriv);
+       }
+       return ret;
+}
+
  static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
                 void *p_data, unsigned int bytes)
  {
@@ -432,7 +528,7 @@ static int check_fdi_rx_train_status(struct intel_vgpu *vgpu,
                 fdi_tx_train_bits = FDI_LINK_TRAIN_PATTERN_2;
                 fdi_iir_check_bits = FDI_RX_SYMBOL_LOCK;
         } else {
-               gvt_err("Invalid train pattern %d\n", train_pattern);
+               gvt_vgpu_err("Invalid train pattern %d\n", train_pattern);
                 return -EINVAL;
         }
  
@@ -490,7 +586,7 @@ static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu,
         else if (FDI_RX_IMR_TO_PIPE(offset) != INVALID_INDEX)
                 index = FDI_RX_IMR_TO_PIPE(offset);
         else {
-               gvt_err("Unsupport registers %x\n", offset);
+               gvt_vgpu_err("Unsupport registers %x\n", offset);
                 return -EINVAL;
         }
  
@@ -720,7 +816,7 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu,
         u32 data;
  
         if (!dpy_is_valid_port(port_index)) {
-               gvt_err("GVT(%d): Unsupported DP port access!\n", vgpu->id);
+               gvt_vgpu_err("Unsupported DP port access!\n");
                 return 0;
         }
  
@@ -874,6 +970,14 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu,
         return 0;
  }
  
+static int mbctl_write(struct intel_vgpu *vgpu, unsigned int offset,
+               void *p_data, unsigned int bytes)
+{
+       *(u32 *)p_data &= (~GEN6_MBCTL_ENABLE_BOOT_FETCH);
+       write_vreg(vgpu, offset, p_data, bytes);
+       return 0;
+}
+
  static int vga_control_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
                 void *p_data, unsigned int bytes)
  {
@@ -918,8 +1022,7 @@ static void write_virtual_sbi_register(struct intel_vgpu *vgpu,
  
         if (i == num) {
                 if (num == SBI_REG_MAX) {
-                       gvt_err("vgpu%d: SBI caching meets maximum limits\n",
-                                       vgpu->id);
+                       gvt_vgpu_err("SBI caching meets maximum limits\n");
                         return;
                 }
                 display->sbi.number++;
@@ -999,8 +1102,9 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
                 break;
         }
         if (invalid_read)
-               gvt_err("invalid pvinfo read: [%x:%x] = %x\n",
+               gvt_vgpu_err("invalid pvinfo read: [%x:%x] = %x\n",
                                 offset, bytes, *(u32 *)p_data);
+       vgpu->pv_notified = true;
         return 0;
  }
  
@@ -1026,7 +1130,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
         case 1: /* Remove this in guest driver. */
                 break;
         default:
-               gvt_err("Invalid PV notification %d\n", notification);
+               gvt_vgpu_err("Invalid PV notification %d\n", notification);
         }
         return ret;
  }
@@ -1039,7 +1143,7 @@ static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready)
         char vmid_str[20];
         char display_ready_str[20];
  
-       snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready);
+       snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d", ready);
         env[0] = display_ready_str;
  
         snprintf(vmid_str, 20, "VMID=%d", vgpu->id);
@@ -1078,8 +1182,11 @@ static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
         case _vgtif_reg(execlist_context_descriptor_lo):
         case _vgtif_reg(execlist_context_descriptor_hi):
                 break;
+       case _vgtif_reg(rsv5[0])..._vgtif_reg(rsv5[3]):
+               enter_failsafe_mode(vgpu, GVT_FAILSAFE_INSUFFICIENT_RESOURCE);
+               break;
         default:
-               gvt_err("invalid pvinfo write offset %x bytes %x data %x\n",
+               gvt_vgpu_err("invalid pvinfo write offset %x bytes %x data %x\n",
                                 offset, bytes, data);
                 break;
         }
@@ -1203,26 +1310,37 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
         u32 *data0 = &vgpu_vreg(vgpu, GEN6_PCODE_DATA);
  
         switch (cmd) {
-       case 0x6:
-               /**
-                * "Read memory latency" command on gen9.
-                * Below memory latency values are read
-                * from skylake platform.
-                */
-               if (!*data0)
-                       *data0 = 0x1e1a1100;
-               else
-                       *data0 = 0x61514b3d;
+       case GEN9_PCODE_READ_MEM_LATENCY:
+               if (IS_SKYLAKE(vgpu->gvt->dev_priv)) {
+                       /**
+                        * "Read memory latency" command on gen9.
+                        * Below memory latency values are read
+                        * from skylake platform.
+                        */
+                       if (!*data0)
+                               *data0 = 0x1e1a1100;
+                       else
+                               *data0 = 0x61514b3d;
+               }
                 break;
-       case 0x5:
+       case SKL_PCODE_CDCLK_CONTROL:
+               if (IS_SKYLAKE(vgpu->gvt->dev_priv))
+                       *data0 = SKL_CDCLK_READY_FOR_CHANGE;
+               break;
+       case GEN6_PCODE_READ_RC6VIDS:
                 *data0 |= 0x1;
                 break;
         }
  
         gvt_dbg_core("VM(%d) write %x to mailbox, return data0 %x\n",
                      vgpu->id, value, *data0);
-
-       value &= ~(1 << 31);
+       /**
+        * PCODE_READY clear means ready for pcode read/write,
+        * PCODE_ERROR_MASK clear means no error happened. In GVT-g we
+        * always emulate as pcode read/write success and ready for access
+        * anytime, since we don't touch real physical registers here.
+        */
+       value &= ~(GEN6_PCODE_READY | GEN6_PCODE_ERROR_MASK);
         return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes);
  }
  
@@ -1302,7 +1420,8 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
         if (execlist->elsp_dwords.index == 3) {
                 ret = intel_vgpu_submit_execlist(vgpu, ring_id);
                 if(ret)
-                       gvt_err("fail submit workload on ring %d\n", ring_id);
+                       gvt_vgpu_err("fail submit workload on ring %d\n",
+                               ring_id);
         }
  
         ++execlist->elsp_dwords.index;
@@ -1318,6 +1437,17 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
         bool enable_execlist;
  
         write_vreg(vgpu, offset, p_data, bytes);
+
+       /* when PPGTT mode enabled, we will check if guest has called
+        * pvinfo, if not, we will treat this guest as non-gvtg-aware
+        * guest, and stop emulating its cfg space, mmio, gtt, etc.
+        */
+       if (((data & _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)) ||
+                       (data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)))
+                       && !vgpu->pv_notified) {
+               enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
+               return 0;
+       }
         if ((data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE))
                         || (data & _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE))) {
                 enable_execlist = !!(data & GFX_RUN_LIST_ENABLE);
@@ -1400,6 +1530,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
  #define MMIO_GM(reg, d, r, w) \
         MMIO_F(reg, 4, F_GMADR, 0xFFFFF000, 0, d, r, w)
  
+#define MMIO_GM_RDR(reg, d, r, w) \
+       MMIO_F(reg, 4, F_GMADR | F_CMD_ACCESS, 0xFFFFF000, 0, d, r, w)
+
  #define MMIO_RO(reg, d, f, rm, r, w) \
         MMIO_F(reg, 4, F_RO | f, 0, rm, d, r, w)
  
@@ -1419,6 +1552,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
  #define MMIO_RING_GM(prefix, d, r, w) \
         MMIO_RING_F(prefix, 4, F_GMADR, 0xFFFF0000, 0, d, r, w)
  
+#define MMIO_RING_GM_RDR(prefix, d, r, w) \
+       MMIO_RING_F(prefix, 4, F_GMADR | F_CMD_ACCESS, 0xFFFF0000, 0, d, r, w)
+
  #define MMIO_RING_RO(prefix, d, f, rm, r, w) \
         MMIO_RING_F(prefix, 4, F_RO | f, 0, rm, d, r, w)
  
@@ -1427,73 +1563,81 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
         struct drm_i915_private *dev_priv = gvt->dev_priv;
         int ret;
  
-       MMIO_RING_DFH(RING_IMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler);
+       MMIO_RING_DFH(RING_IMR, D_ALL, F_CMD_ACCESS, NULL,
+               intel_vgpu_reg_imr_handler);
  
         MMIO_DFH(SDEIMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler);
         MMIO_DFH(SDEIER, D_ALL, 0, NULL, intel_vgpu_reg_ier_handler);
         MMIO_DFH(SDEIIR, D_ALL, 0, NULL, intel_vgpu_reg_iir_handler);
         MMIO_D(SDEISR, D_ALL);
  
-       MMIO_RING_D(RING_HWSTAM, D_ALL);
+       MMIO_RING_DFH(RING_HWSTAM, D_ALL, F_CMD_ACCESS, NULL, NULL);
  
-       MMIO_GM(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL);
-       MMIO_GM(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL);
-       MMIO_GM(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL);
-       MMIO_GM(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+       MMIO_GM_RDR(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+       MMIO_GM_RDR(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+       MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+       MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL);
  
  #define RING_REG(base) (base + 0x28)
-       MMIO_RING_D(RING_REG, D_ALL);
+       MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
  #undef RING_REG
  
  #define RING_REG(base) (base + 0x134)
-       MMIO_RING_D(RING_REG, D_ALL);
+       MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
  #undef RING_REG
  
-       MMIO_GM(0x2148, D_ALL, NULL, NULL);
-       MMIO_GM(CCID, D_ALL, NULL, NULL);
-       MMIO_GM(0x12198, D_ALL, NULL, NULL);
+       MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL);
+       MMIO_GM_RDR(CCID, D_ALL, NULL, NULL);
+       MMIO_GM_RDR(0x12198, D_ALL, NULL, NULL);
         MMIO_D(GEN7_CXT_SIZE, D_ALL);
  
-       MMIO_RING_D(RING_TAIL, D_ALL);
-       MMIO_RING_D(RING_HEAD, D_ALL);
-       MMIO_RING_D(RING_CTL, D_ALL);
-       MMIO_RING_D(RING_ACTHD, D_ALL);
-       MMIO_RING_GM(RING_START, D_ALL, NULL, NULL);
+       MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL);
  
         /* RING MODE */
  #define RING_REG(base) (base + 0x29c)
-       MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK, NULL, ring_mode_mmio_write);
+       MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL,
+               ring_mode_mmio_write);
  #undef RING_REG
  
-       MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK, NULL, NULL);
-       MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK, NULL, NULL);
+       MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+               NULL, NULL);
+       MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+                       NULL, NULL);
         MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS,
                         ring_timestamp_mmio_read, NULL);
         MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS,
                         ring_timestamp_mmio_read, NULL);
  
-       MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK, NULL, NULL);
-       MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK, NULL, NULL);
+       MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+               NULL, NULL);
         MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-
-       MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK, NULL, NULL);
-       MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK, NULL, NULL);
-       MMIO_DFH(0x2088, D_ALL, F_MODE_MASK, NULL, NULL);
-       MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK, NULL, NULL);
-       MMIO_DFH(0x2470, D_ALL, F_MODE_MASK, NULL, NULL);
-       MMIO_D(GAM_ECOCHK, D_ALL);
-       MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK, NULL, NULL);
+       MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x2124, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+
+       MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x2088, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x2470, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+               NULL, NULL);
         MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-       MMIO_D(0x9030, D_ALL);
-       MMIO_D(0x20a0, D_ALL);
-       MMIO_D(0x2420, D_ALL);
-       MMIO_D(0x2430, D_ALL);
-       MMIO_D(0x2434, D_ALL);
-       MMIO_D(0x2438, D_ALL);
-       MMIO_D(0x243c, D_ALL);
-       MMIO_DFH(0x7018, D_ALL, F_MODE_MASK, NULL, NULL);
+       MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x2430, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x2434, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x2438, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x243c, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x7018, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
         MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-       MMIO_DFH(0xe100, D_ALL, F_MODE_MASK, NULL, NULL);
+       MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
  
         /* display */
         MMIO_F(0x60220, 0x20, 0, 0, 0, D_ALL, NULL, NULL);
@@ -2022,8 +2166,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
         MMIO_D(FORCEWAKE_ACK, D_ALL);
         MMIO_D(GEN6_GT_CORE_STATUS, D_ALL);
         MMIO_D(GEN6_GT_THREAD_STATUS_REG, D_ALL);
-       MMIO_D(GTFIFODBG, D_ALL);
-       MMIO_D(GTFIFOCTL, D_ALL);
+       MMIO_DFH(GTFIFODBG, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(GTFIFOCTL, D_ALL, F_CMD_ACCESS, NULL, NULL);
         MMIO_DH(FORCEWAKE_MT, D_PRE_SKL, NULL, mul_force_wake_write);
         MMIO_DH(FORCEWAKE_ACK_HSW, D_HSW | D_BDW, NULL, NULL);
         MMIO_D(ECOBUS, D_ALL);
@@ -2080,7 +2224,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
  
         MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL);
  
-       MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_SKL);
+       MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_BDW);
         MMIO_D(GEN6_PCODE_DATA, D_ALL);
         MMIO_D(0x13812c, D_ALL);
         MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL);
@@ -2102,7 +2246,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
         MMIO_D(0x7180, D_ALL);
         MMIO_D(0x7408, D_ALL);
         MMIO_D(0x7c00, D_ALL);
-       MMIO_D(GEN6_MBCTL, D_ALL);
+       MMIO_DH(GEN6_MBCTL, D_ALL, NULL, mbctl_write);
         MMIO_D(0x911c, D_ALL);
         MMIO_D(0x9120, D_ALL);
         MMIO_DFH(GEN7_UCGCTL4, D_ALL, F_CMD_ACCESS, NULL, NULL);
@@ -2159,36 +2303,35 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
         MMIO_D(0x1a054, D_ALL);
  
         MMIO_D(0x44070, D_ALL);
-
-       MMIO_D(0x215c, D_HSW_PLUS);
+       MMIO_DFH(0x215c, D_HSW_PLUS, F_CMD_ACCESS, NULL, NULL);
         MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL);
         MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
         MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL);
         MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
  
-       MMIO_F(0x2290, 8, 0, 0, 0, D_HSW_PLUS, NULL, NULL);
-       MMIO_D(GEN7_OACONTROL, D_HSW);
+       MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_HSW_PLUS, NULL, NULL);
+       MMIO_DFH(GEN7_OACONTROL, D_HSW, F_CMD_ACCESS, NULL, NULL);
         MMIO_D(0x2b00, D_BDW_PLUS);
         MMIO_D(0x2360, D_BDW_PLUS);
-       MMIO_F(0x5200, 32, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(0x5240, 32, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(0x5280, 16, 0, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(0x5200, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(0x5240, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(0x5280, 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
  
         MMIO_DFH(0x1c17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
         MMIO_DFH(0x1c178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-       MMIO_D(BCS_SWCTRL, D_ALL);
-
-       MMIO_F(HS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(DS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(IA_VERTICES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(IA_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(VS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(GS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(GS_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(CL_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(CL_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(PS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-       MMIO_F(PS_DEPTH_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
+       MMIO_DFH(BCS_SWCTRL, D_ALL, F_CMD_ACCESS, NULL, NULL);
+
+       MMIO_F(HS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(DS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(IA_VERTICES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(IA_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(VS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(GS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(GS_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(CL_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(CL_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(PS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+       MMIO_F(PS_DEPTH_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
         MMIO_DH(0x4260, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
         MMIO_DH(0x4264, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
         MMIO_DH(0x4268, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
@@ -2196,6 +2339,17 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
         MMIO_DH(0x4270, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
         MMIO_DFH(0x4094, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
  
+       MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+       MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL);
+       MMIO_DFH(0x2220, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x12220, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x22220, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_RING_DFH(RING_SYNC_1, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_RING_DFH(RING_SYNC_0, D_ALL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x22178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x1a178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x1a17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x2217c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
         return 0;
  }
  
@@ -2204,7 +2358,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
         struct drm_i915_private *dev_priv = gvt->dev_priv;
         int ret;
  
-       MMIO_DH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL,
+       MMIO_DFH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, NULL,
                         intel_vgpu_reg_imr_handler);
  
         MMIO_DH(GEN8_GT_IMR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler);
@@ -2269,24 +2423,31 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
         MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL,
                 intel_vgpu_reg_master_irq_handler);
  
-       MMIO_D(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-       MMIO_D(0x1c134, D_BDW_PLUS);
-
-       MMIO_D(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-       MMIO_D(RING_HEAD(GEN8_BSD2_RING_BASE),  D_BDW_PLUS);
-       MMIO_GM(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL);
-       MMIO_D(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-       MMIO_D(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-       MMIO_D(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-       MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK, NULL, ring_mode_mmio_write);
-       MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK,
-                       NULL, NULL);
-       MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK,
-                       NULL, NULL);
+       MMIO_DFH(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+               F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x1c134, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+
+       MMIO_DFH(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
+               NULL, NULL);
+       MMIO_DFH(RING_HEAD(GEN8_BSD2_RING_BASE),  D_BDW_PLUS,
+               F_CMD_ACCESS, NULL, NULL);
+       MMIO_GM_RDR(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL);
+       MMIO_DFH(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
+               NULL, NULL);
+       MMIO_DFH(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+               F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+               F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL,
+               ring_mode_mmio_write);
+       MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+               F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+               F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
         MMIO_DFH(RING_TIMESTAMP(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
                         ring_timestamp_mmio_read, NULL);
  
-       MMIO_RING_D(RING_ACTHD_UDW, D_BDW_PLUS);
+       MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
  
  #define RING_REG(base) (base + 0xd0)
         MMIO_RING_F(RING_REG, 4, F_RO, 0,
@@ -2303,13 +2464,16 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
  #undef RING_REG
  
  #define RING_REG(base) (base + 0x234)
-       MMIO_RING_F(RING_REG, 8, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL);
-       MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO, 0, ~0LL, D_BDW_PLUS, NULL, NULL);
+       MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS,
+               NULL, NULL);
+       MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO | F_CMD_ACCESS, 0,
+               ~0LL, D_BDW_PLUS, NULL, NULL);
  #undef RING_REG
  
  #define RING_REG(base) (base + 0x244)
-       MMIO_RING_D(RING_REG, D_BDW_PLUS);
-       MMIO_D(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
+       MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
+               NULL, NULL);
  #undef RING_REG
  
  #define RING_REG(base) (base + 0x370)
@@ -2331,6 +2495,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
         MMIO_D(GEN7_MISCCPCTL, D_BDW_PLUS);
         MMIO_D(0x1c054, D_BDW_PLUS);
  
+       MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write);
+
         MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS);
         MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS);
  
@@ -2341,14 +2507,14 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
         MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL);
  #undef RING_REG
  
-       MMIO_RING_GM(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL);
-       MMIO_GM(0x1c080, D_BDW_PLUS, NULL, NULL);
+       MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL);
+       MMIO_GM_RDR(RING_HWS_PGA(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL);
  
         MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
  
-       MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW);
-       MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW);
-       MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW);
+       MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW_PLUS);
+       MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW_PLUS);
+       MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS);
  
         MMIO_D(WM_MISC, D_BDW);
         MMIO_D(BDW_EDP_PSR_BASE, D_BDW);
@@ -2362,27 +2528,31 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
         MMIO_D(GEN8_EU_DISABLE1, D_BDW_PLUS);
         MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS);
  
-       MMIO_D(0xfdc, D_BDW);
-       MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-       MMIO_D(GEN7_ROW_CHICKEN2, D_BDW_PLUS);
-       MMIO_D(GEN8_UCGCTL6, D_BDW_PLUS);
+       MMIO_D(0xfdc, D_BDW_PLUS);
+       MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+               NULL, NULL);
+       MMIO_DFH(GEN7_ROW_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+               NULL, NULL);
+       MMIO_DFH(GEN8_UCGCTL6, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
  
-       MMIO_D(0xb1f0, D_BDW);
-       MMIO_D(0xb1c0, D_BDW);
+       MMIO_DFH(0xb1f0, D_BDW, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0xb1c0, D_BDW, F_CMD_ACCESS, NULL, NULL);
         MMIO_DFH(GEN8_L3SQCREG4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-       MMIO_D(0xb100, D_BDW);
-       MMIO_D(0xb10c, D_BDW);
+       MMIO_DFH(0xb100, D_BDW, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0xb10c, D_BDW, F_CMD_ACCESS, NULL, NULL);
         MMIO_D(0xb110, D_BDW);
  
-       MMIO_DFH(0x24d0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-       MMIO_DFH(0x24d4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-       MMIO_DFH(0x24d8, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-       MMIO_DFH(0x24dc, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS,
+               NULL, force_nonpriv_write);
  
-       MMIO_D(0x83a4, D_BDW);
+       MMIO_D(0x22040, D_BDW_PLUS);
+       MMIO_D(0x44484, D_BDW_PLUS);
+       MMIO_D(0x4448c, D_BDW_PLUS);
+
+       MMIO_DFH(0x83a4, D_BDW, F_CMD_ACCESS, NULL, NULL);
         MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS);
  
-       MMIO_D(0x8430, D_BDW);
+       MMIO_DFH(0x8430, D_BDW, F_CMD_ACCESS, NULL, NULL);
  
         MMIO_D(0x110000, D_BDW_PLUS);
  
@@ -2394,10 +2564,19 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
         MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
         MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
         MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-       MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK, NULL, NULL);
-
-       MMIO_D(0x2248, D_BDW);
-
+       MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+
+       MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL);
+
+       MMIO_DFH(0xe220, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0xe230, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0xe240, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0xe260, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0xe270, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0xe280, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0xe2a0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0xe2b0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0xe2c0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
         return 0;
  }
  
@@ -2420,7 +2599,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
         MMIO_D(HSW_PWR_WELL_BIOS, D_SKL);
         MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write);
  
-       MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL, NULL, mailbox_write);
         MMIO_D(0xa210, D_SKL_PLUS);
         MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
         MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
@@ -2578,16 +2756,16 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
         MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL);
  
         MMIO_D(0xd08, D_SKL);
-       MMIO_D(0x20e0, D_SKL);
-       MMIO_D(0x20ec, D_SKL);
+       MMIO_DFH(0x20e0, D_SKL, F_MODE_MASK, NULL, NULL);
+       MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
  
         /* TRTT */
-       MMIO_D(0x4de0, D_SKL);
-       MMIO_D(0x4de4, D_SKL);
-       MMIO_D(0x4de8, D_SKL);
-       MMIO_D(0x4dec, D_SKL);
-       MMIO_D(0x4df0, D_SKL);
-       MMIO_DH(0x4df4, D_SKL, NULL, gen9_trtte_write);
+       MMIO_DFH(0x4de0, D_SKL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x4de4, D_SKL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x4de8, D_SKL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x4dec, D_SKL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x4df0, D_SKL, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(0x4df4, D_SKL, F_CMD_ACCESS, NULL, gen9_trtte_write);
         MMIO_DH(0x4dfc, D_SKL, NULL, gen9_trtt_chicken_write);
  
         MMIO_D(0x45008, D_SKL);
@@ -2611,7 +2789,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
         MMIO_D(0x65f08, D_SKL);
         MMIO_D(0x320f0, D_SKL);
  
-       MMIO_D(_REG_VCS2_EXCC, D_SKL);
+       MMIO_DFH(_REG_VCS2_EXCC, D_SKL, F_CMD_ACCESS, NULL, NULL);
         MMIO_D(0x70034, D_SKL);
         MMIO_D(0x71034, D_SKL);
         MMIO_D(0x72034, D_SKL);
@@ -2624,6 +2802,9 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
         MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL);
  
         MMIO_D(0x44500, D_SKL);
+       MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL, F_MODE_MASK | F_CMD_ACCESS,
+               NULL, NULL);
         return 0;
  }
  
@@ -2813,3 +2994,20 @@ int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
         write_vreg(vgpu, offset, p_data, bytes);
         return 0;
  }
+
+/**
+ * intel_gvt_in_force_nonpriv_whitelist - if a mmio is in whitelist to be
+ * force-nopriv register
+ *
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ * Returns:
+ * True if the register is in force-nonpriv whitelist;
+ * False if outside;
+ */
+bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt,
+                                         unsigned int offset)
+{
+       return in_whitelist(offset);
+}
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c

index 0f7f5d97f5829d65aeaf7392d0fcb4b19fd4d713..e466259034e24b2c62b82265978298c3500b79c5 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -96,10 +96,10 @@ static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn,
         struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
         dma_addr_t daddr;
  
-       page = pfn_to_page(pfn);
-       if (is_error_page(page))
+       if (unlikely(!pfn_valid(pfn)))
                 return -EFAULT;
  
+       page = pfn_to_page(pfn);
         daddr = dma_map_page(dev, page, 0, PAGE_SIZE,
                         PCI_DMA_BIDIRECTIONAL);
         if (dma_mapping_error(dev, daddr))
@@ -295,10 +295,10 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev,
                 return 0;
  
         return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
-                               "fence: %d\n",
-                               BYTES_TO_MB(type->low_gm_size),
-                               BYTES_TO_MB(type->high_gm_size),
-                               type->fence);
+                      "fence: %d\nresolution: %s\n",
+                      BYTES_TO_MB(type->low_gm_size),
+                      BYTES_TO_MB(type->high_gm_size),
+                      type->fence, vgpu_edid_str(type->resolution));
  }
  
  static MDEV_TYPE_ATTR_RO(available_instances);
@@ -426,7 +426,7 @@ static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
  
  static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
  {
-       struct intel_vgpu *vgpu;
+       struct intel_vgpu *vgpu = NULL;
         struct intel_vgpu_type *type;
         struct device *pdev;
         void *gvt;
@@ -437,7 +437,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
  
         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
         if (!type) {
-               gvt_err("failed to find type %s to create\n",
+               gvt_vgpu_err("failed to find type %s to create\n",
                                                 kobject_name(kobj));
                 ret = -EINVAL;
                 goto out;
@@ -446,7 +446,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
         vgpu = intel_gvt_ops->vgpu_create(gvt, type);
         if (IS_ERR_OR_NULL(vgpu)) {
                 ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
-               gvt_err("failed to create intel vgpu: %d\n", ret);
+               gvt_vgpu_err("failed to create intel vgpu: %d\n", ret);
                 goto out;
         }
  
@@ -526,7 +526,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
                                 &vgpu->vdev.iommu_notifier);
         if (ret != 0) {
-               gvt_err("vfio_register_notifier for iommu failed: %d\n", ret);
+               gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
+                       ret);
                 goto out;
         }
  
@@ -534,7 +535,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
                                 &vgpu->vdev.group_notifier);
         if (ret != 0) {
-               gvt_err("vfio_register_notifier for group failed: %d\n", ret);
+               gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
+                       ret);
                 goto undo_iommu;
         }
  
@@ -542,6 +544,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
         if (ret)
                 goto undo_group;
  
+       intel_gvt_ops->vgpu_activate(vgpu);
+
         atomic_set(&vgpu->vdev.released, 0);
         return ret;
  
@@ -567,6 +571,8 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
         if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
                 return;
  
+       intel_gvt_ops->vgpu_deactivate(vgpu);
+
         ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
                                         &vgpu->vdev.iommu_notifier);
         WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
@@ -635,7 +641,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
  
  
         if (index >= VFIO_PCI_NUM_REGIONS) {
-               gvt_err("invalid index: %u\n", index);
+               gvt_vgpu_err("invalid index: %u\n", index);
                 return -EINVAL;
         }
  
@@ -669,7 +675,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
         case VFIO_PCI_VGA_REGION_INDEX:
         case VFIO_PCI_ROM_REGION_INDEX:
         default:
-               gvt_err("unsupported region: %u\n", index);
+               gvt_vgpu_err("unsupported region: %u\n", index);
         }
  
         return ret == 0 ? count : ret;
@@ -861,7 +867,7 @@ static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
  
                 trigger = eventfd_ctx_fdget(fd);
                 if (IS_ERR(trigger)) {
-                       gvt_err("eventfd_ctx_fdget failed\n");
+                       gvt_vgpu_err("eventfd_ctx_fdget failed\n");
                         return PTR_ERR(trigger);
                 }
                 vgpu->vdev.msi_trigger = trigger;
@@ -1120,7 +1126,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
                         ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
                                                 VFIO_PCI_NUM_IRQS, &data_size);
                         if (ret) {
-                               gvt_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
+                               gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
                                 return -EINVAL;
                         }
                         if (data_size) {
@@ -1310,7 +1316,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
  
         kvm = vgpu->vdev.kvm;
         if (!kvm || kvm->mm != current->mm) {
-               gvt_err("KVM is required to use Intel vGPU\n");
+               gvt_vgpu_err("KVM is required to use Intel vGPU\n");
                 return -ESRCH;
         }
  
@@ -1324,6 +1330,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
         vgpu->handle = (unsigned long)info;
         info->vgpu = vgpu;
         info->kvm = kvm;
+       kvm_get_kvm(info->kvm);
  
         kvmgt_protect_table_init(info);
         gvt_cache_init(vgpu);
@@ -1337,12 +1344,8 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
  
  static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
  {
-       if (!info) {
-               gvt_err("kvmgt_guest_info invalid\n");
-               return false;
-       }
-
         kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
+       kvm_put_kvm(info->kvm);
         kvmgt_protect_table_destroy(info);
         gvt_cache_destroy(info->vgpu);
         vfree(info);
@@ -1383,12 +1386,14 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
         unsigned long iova, pfn;
         struct kvmgt_guest_info *info;
         struct device *dev;
+       struct intel_vgpu *vgpu;
         int rc;
  
         if (!handle_valid(handle))
                 return INTEL_GVT_INVALID_ADDR;
  
         info = (struct kvmgt_guest_info *)handle;
+       vgpu = info->vgpu;
         iova = gvt_cache_find(info->vgpu, gfn);
         if (iova != INTEL_GVT_INVALID_ADDR)
                 return iova;
@@ -1397,13 +1402,14 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
         dev = mdev_dev(info->vgpu->vdev.mdev);
         rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
         if (rc != 1) {
-               gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc);
+               gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
+                       gfn, rc);
                 return INTEL_GVT_INVALID_ADDR;
         }
         /* transfer to host iova for GFX to use DMA */
         rc = gvt_dma_map_iova(info->vgpu, pfn, &iova);
         if (rc) {
-               gvt_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
+               gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
                 vfio_unpin_pages(dev, &gfn, 1);
                 return INTEL_GVT_INVALID_ADDR;
         }
@@ -1417,7 +1423,7 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
  {
         struct kvmgt_guest_info *info;
         struct kvm *kvm;
-       int ret;
+       int idx, ret;
         bool kthread = current->mm == NULL;
  
         if (!handle_valid(handle))
@@ -1429,8 +1435,10 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
         if (kthread)
                 use_mm(kvm->mm);
  
+       idx = srcu_read_lock(&kvm->srcu);
         ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
                       kvm_read_guest(kvm, gpa, buf, len);
+       srcu_read_unlock(&kvm->srcu, idx);
  
         if (kthread)
                 unuse_mm(kvm->mm);
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c

index 4df078bc5d042b1f4fc411fbb0f98c83a3cba729..1ba3bdb093416674c2f44014942971bcdcd8ea9e 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -57,6 +57,58 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa)
         (reg >= gvt->device_info.gtt_start_offset \
          && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt))
  
+static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa,
+               void *p_data, unsigned int bytes, bool read)
+{
+       struct intel_gvt *gvt = NULL;
+       void *pt = NULL;
+       unsigned int offset = 0;
+
+       if (!vgpu || !p_data)
+               return;
+
+       gvt = vgpu->gvt;
+       mutex_lock(&gvt->lock);
+       offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa);
+       if (reg_is_mmio(gvt, offset)) {
+               if (read)
+                       intel_vgpu_default_mmio_read(vgpu, offset, p_data,
+                                       bytes);
+               else
+                       intel_vgpu_default_mmio_write(vgpu, offset, p_data,
+                                       bytes);
+       } else if (reg_is_gtt(gvt, offset) &&
+                       vgpu->gtt.ggtt_mm->virtual_page_table) {
+               offset -= gvt->device_info.gtt_start_offset;
+               pt = vgpu->gtt.ggtt_mm->virtual_page_table + offset;
+               if (read)
+                       memcpy(p_data, pt, bytes);
+               else
+                       memcpy(pt, p_data, bytes);
+
+       } else if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
+               struct intel_vgpu_guest_page *gp;
+
+               /* Since we enter the failsafe mode early during guest boot,
+                * guest may not have chance to set up its ppgtt table, so
+                * there should not be any wp pages for guest. Keep the wp
+                * related code here in case we need to handle it in furture.
+                */
+               gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT);
+               if (gp) {
+                       /* remove write protection to prevent furture traps */
+                       intel_vgpu_clean_guest_page(vgpu, gp);
+                       if (read)
+                               intel_gvt_hypervisor_read_gpa(vgpu, pa,
+                                               p_data, bytes);
+                       else
+                               intel_gvt_hypervisor_write_gpa(vgpu, pa,
+                                               p_data, bytes);
+               }
+       }
+       mutex_unlock(&gvt->lock);
+}
+
  /**
   * intel_vgpu_emulate_mmio_read - emulate MMIO read
   * @vgpu: a vGPU
@@ -75,6 +127,11 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
         unsigned int offset = 0;
         int ret = -EINVAL;
  
+
+       if (vgpu->failsafe) {
+               failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true);
+               return 0;
+       }
         mutex_lock(&gvt->lock);
  
         if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
@@ -85,10 +142,10 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
                         ret = intel_gvt_hypervisor_read_gpa(vgpu, pa,
                                         p_data, bytes);
                         if (ret) {
-                               gvt_err("vgpu%d: guest page read error %d, "
+                               gvt_vgpu_err("guest page read error %d, "
                                         "gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n",
-                                       vgpu->id, ret,
-                                       gp->gfn, pa, *(u32 *)p_data, bytes);
+                                       ret, gp->gfn, pa, *(u32 *)p_data,
+                                       bytes);
                         }
                         mutex_unlock(&gvt->lock);
                         return ret;
@@ -143,14 +200,13 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
                 ret = intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes);
  
                 if (!vgpu->mmio.disable_warn_untrack) {
-                       gvt_err("vgpu%d: read untracked MMIO %x(%dB) val %x\n",
-                               vgpu->id, offset, bytes, *(u32 *)p_data);
+                       gvt_vgpu_err("read untracked MMIO %x(%dB) val %x\n",
+                               offset, bytes, *(u32 *)p_data);
  
                         if (offset == 0x206c) {
-                               gvt_err("------------------------------------------\n");
-                               gvt_err("vgpu%d: likely triggers a gfx reset\n",
-                                       vgpu->id);
-                               gvt_err("------------------------------------------\n");
+                               gvt_vgpu_err("------------------------------------------\n");
+                               gvt_vgpu_err("likely triggers a gfx reset\n");
+                               gvt_vgpu_err("------------------------------------------\n");
                                 vgpu->mmio.disable_warn_untrack = true;
                         }
                 }
@@ -163,8 +219,8 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
         mutex_unlock(&gvt->lock);
         return 0;
  err:
-       gvt_err("vgpu%d: fail to emulate MMIO read %08x len %d\n",
-                       vgpu->id, offset, bytes);
+       gvt_vgpu_err("fail to emulate MMIO read %08x len %d\n",
+                       offset, bytes);
         mutex_unlock(&gvt->lock);
         return ret;
  }
@@ -188,6 +244,11 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
         u32 old_vreg = 0, old_sreg = 0;
         int ret = -EINVAL;
  
+       if (vgpu->failsafe) {
+               failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, false);
+               return 0;
+       }
+
         mutex_lock(&gvt->lock);
  
         if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
@@ -197,10 +258,11 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
                 if (gp) {
                         ret = gp->handler(gp, pa, p_data, bytes);
                         if (ret) {
-                               gvt_err("vgpu%d: guest page write error %d, "
-                                       "gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n",
-                                       vgpu->id, ret,
-                                       gp->gfn, pa, *(u32 *)p_data, bytes);
+                               gvt_err("guest page write error %d, "
+                                       "gfn 0x%lx, pa 0x%llx, "
+                                       "var 0x%x, len %d\n",
+                                       ret, gp->gfn, pa,
+                                       *(u32 *)p_data, bytes);
                         }
                         mutex_unlock(&gvt->lock);
                         return ret;
@@ -236,7 +298,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
  
         mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4));
         if (!mmio && !vgpu->mmio.disable_warn_untrack)
-               gvt_err("vgpu%d: write untracked MMIO %x len %d val %x\n",
+               gvt_dbg_mmio("vgpu%d: write untracked MMIO %x len %d val %x\n",
                                 vgpu->id, offset, bytes, *(u32 *)p_data);
  
         if (!intel_gvt_mmio_is_unalign(gvt, offset)) {
@@ -267,8 +329,8 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
  
                         /* all register bits are RO. */
                         if (ro_mask == ~(u64)0) {
-                               gvt_err("vgpu%d: try to write RO reg %x\n",
-                                               vgpu->id, offset);
+                               gvt_vgpu_err("try to write RO reg %x\n",
+                                       offset);
                                 ret = 0;
                                 goto out;
                         }
@@ -298,8 +360,8 @@ out:
         mutex_unlock(&gvt->lock);
         return 0;
  err:
-       gvt_err("vgpu%d: fail to emulate MMIO write %08x len %d\n",
-                       vgpu->id, offset, bytes);
+       gvt_vgpu_err("fail to emulate MMIO write %08x len %d\n", offset,
+                    bytes);
         mutex_unlock(&gvt->lock);
         return ret;
  }
@@ -322,6 +384,8 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu)
  
         /* set the bit 0:2(Core C-State ) to C0 */
         vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0;
+
+       vgpu->mmio.disable_warn_untrack = false;
  }
  
  /**
diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h

index 3bc620f56f351e774dc8658c9f06c79d0b24446b..a3a027025cd0a40f9543e6ee76b385a4ad761dcc 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/mmio.h
+++ b/drivers/gpu/drm/i915/gvt/mmio.h
@@ -107,4 +107,7 @@ int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
                                  void *p_data, unsigned int bytes);
  int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
                                   void *p_data, unsigned int bytes);
+
+bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt,
+                                         unsigned int offset);
  #endif
diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c

index d9fb41ab71198cb19b1ade4796f687af49444c80..311799136d7f6e9e2fd96537da3c69918d7258ee 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/opregion.c
+++ b/drivers/gpu/drm/i915/gvt/opregion.c
@@ -27,7 +27,6 @@
  
  static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa)
  {
-       void __iomem *host_va = vgpu->gvt->opregion.opregion_va;
         u8 *buf;
         int i;
  
@@ -43,8 +42,8 @@ static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa)
         if (!vgpu_opregion(vgpu)->va)
                 return -ENOMEM;
  
-       memcpy_fromio(vgpu_opregion(vgpu)->va, host_va,
-                       INTEL_GVT_OPREGION_SIZE);
+       memcpy(vgpu_opregion(vgpu)->va, vgpu->gvt->opregion.opregion_va,
+              INTEL_GVT_OPREGION_SIZE);
  
         for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++)
                 vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i;
@@ -68,14 +67,15 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map)
                 mfn = intel_gvt_hypervisor_virt_to_mfn(vgpu_opregion(vgpu)->va
                         + i * PAGE_SIZE);
                 if (mfn == INTEL_GVT_INVALID_ADDR) {
-                       gvt_err("fail to get MFN from VA\n");
+                       gvt_vgpu_err("fail to get MFN from VA\n");
                         return -EINVAL;
                 }
                 ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu,
                                 vgpu_opregion(vgpu)->gfn[i],
                                 mfn, 1, map);
                 if (ret) {
-                       gvt_err("fail to map GFN to MFN, errno: %d\n", ret);
+                       gvt_vgpu_err("fail to map GFN to MFN, errno: %d\n",
+                               ret);
                         return ret;
                 }
         }
@@ -288,7 +288,7 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci)
         parm = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_PARM;
  
         if (!(swsci & SWSCI_SCI_SELECT)) {
-               gvt_err("vgpu%d: requesting SMI service\n", vgpu->id);
+               gvt_vgpu_err("requesting SMI service\n");
                 return 0;
         }
         /* ignore non 0->1 trasitions */
@@ -301,9 +301,8 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci)
         func = GVT_OPREGION_FUNC(*scic);
         subfunc = GVT_OPREGION_SUBFUNC(*scic);
         if (!querying_capabilities(*scic)) {
-               gvt_err("vgpu%d: requesting runtime service: func \"%s\","
+               gvt_vgpu_err("requesting runtime service: func \"%s\","
                                 " subfunc \"%s\"\n",
-                               vgpu->id,
                                 opregion_func_name(func),
                                 opregion_subfunc_name(subfunc));
                 /*
diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c

index 2b3a642284b6da67f8f5d821256314d896799298..0beb83563b0870edecae35cf5a5807ba1bfc4ae8 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -53,6 +53,14 @@ static struct render_mmio gen8_render_mmio_list[] = {
         {RCS, _MMIO(0x24d4), 0, false},
         {RCS, _MMIO(0x24d8), 0, false},
         {RCS, _MMIO(0x24dc), 0, false},
+       {RCS, _MMIO(0x24e0), 0, false},
+       {RCS, _MMIO(0x24e4), 0, false},
+       {RCS, _MMIO(0x24e8), 0, false},
+       {RCS, _MMIO(0x24ec), 0, false},
+       {RCS, _MMIO(0x24f0), 0, false},
+       {RCS, _MMIO(0x24f4), 0, false},
+       {RCS, _MMIO(0x24f8), 0, false},
+       {RCS, _MMIO(0x24fc), 0, false},
         {RCS, _MMIO(0x7004), 0xffff, true},
         {RCS, _MMIO(0x7008), 0xffff, true},
         {RCS, _MMIO(0x7000), 0xffff, true},
@@ -76,6 +84,14 @@ static struct render_mmio gen9_render_mmio_list[] = {
         {RCS, _MMIO(0x24d4), 0, false},
         {RCS, _MMIO(0x24d8), 0, false},
         {RCS, _MMIO(0x24dc), 0, false},
+       {RCS, _MMIO(0x24e0), 0, false},
+       {RCS, _MMIO(0x24e4), 0, false},
+       {RCS, _MMIO(0x24e8), 0, false},
+       {RCS, _MMIO(0x24ec), 0, false},
+       {RCS, _MMIO(0x24f0), 0, false},
+       {RCS, _MMIO(0x24f4), 0, false},
+       {RCS, _MMIO(0x24f8), 0, false},
+       {RCS, _MMIO(0x24fc), 0, false},
         {RCS, _MMIO(0x7004), 0xffff, true},
         {RCS, _MMIO(0x7008), 0xffff, true},
         {RCS, _MMIO(0x7000), 0xffff, true},
@@ -151,7 +167,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
         I915_WRITE_FW(reg, 0x1);
  
         if (wait_for_atomic((I915_READ_FW(reg) == 0), 50))
-               gvt_err("timeout in invalidate ring (%d) tlb\n", ring_id);
+               gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id);
         else
                 vgpu_vreg(vgpu, regs[ring_id]) = 0;
  
@@ -191,7 +207,7 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
                 l3_offset.reg = 0xb020;
                 for (i = 0; i < 32; i++) {
                         gen9_render_mocs_L3[i] = I915_READ(l3_offset);
-                       I915_WRITE(l3_offset, vgpu_vreg(vgpu, offset));
+                       I915_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset));
                         POSTING_READ(l3_offset);
                         l3_offset.reg += 4;
                 }
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c

index 06c9584ac5f0333c28d628d797686b82d8f82806..34b9acdf34791c84170cd6c96203a5b84f860b77 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -101,7 +101,7 @@ struct tbs_sched_data {
         struct list_head runq_head;
  };
  
-#define GVT_DEFAULT_TIME_SLICE (1 * HZ / 1000)
+#define GVT_DEFAULT_TIME_SLICE (msecs_to_jiffies(1))
  
  static void tbs_sched_func(struct work_struct *work)
  {
@@ -223,7 +223,7 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
                 return;
  
         list_add_tail(&vgpu_data->list, &sched_data->runq_head);
-       schedule_delayed_work(&sched_data->work, sched_data->period);
+       schedule_delayed_work(&sched_data->work, 0);
  }
  
  static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c

index d6b6d0efdd1aeef15463e9504a4054ff3f2c3f8f..a44782412f2c9922bd4ec4fa5160bdb588db6cf0 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -84,7 +84,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
                                 (u32)((workload->ctx_desc.lrca + i) <<
                                 GTT_PAGE_SHIFT));
                 if (context_gpa == INTEL_GVT_INVALID_ADDR) {
-                       gvt_err("Invalid guest context descriptor\n");
+                       gvt_vgpu_err("Invalid guest context descriptor\n");
                         return -EINVAL;
                 }
  
@@ -127,18 +127,24 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
         return 0;
  }
  
+static inline bool is_gvt_request(struct drm_i915_gem_request *req)
+{
+       return i915_gem_context_force_single_submission(req->ctx);
+}
+
  static int shadow_context_status_change(struct notifier_block *nb,
                 unsigned long action, void *data)
  {
-       struct intel_vgpu *vgpu = container_of(nb,
-                       struct intel_vgpu, shadow_ctx_notifier_block);
-       struct drm_i915_gem_request *req =
-               (struct drm_i915_gem_request *)data;
-       struct intel_gvt_workload_scheduler *scheduler =
-               &vgpu->gvt->scheduler;
+       struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data;
+       struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
+                               shadow_ctx_notifier_block[req->engine->id]);
+       struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
         struct intel_vgpu_workload *workload =
                 scheduler->current_workload[req->engine->id];
  
+       if (!is_gvt_request(req) || unlikely(!workload))
+               return NOTIFY_OK;
+
         switch (action) {
         case INTEL_CONTEXT_SCHEDULE_IN:
                 intel_gvt_load_render_mmio(workload->vgpu,
@@ -148,6 +154,15 @@ static int shadow_context_status_change(struct notifier_block *nb,
         case INTEL_CONTEXT_SCHEDULE_OUT:
                 intel_gvt_restore_render_mmio(workload->vgpu,
                                               workload->ring_id);
+               /* If the status is -EINPROGRESS means this workload
+                * doesn't meet any issue during dispatching so when
+                * get the SCHEDULE_OUT set the status to be zero for
+                * good. If the status is NOT -EINPROGRESS means there
+                * is something wrong happened during dispatching and
+                * the status should not be set to zero
+                */
+               if (workload->status == -EINPROGRESS)
+                       workload->status = 0;
                 atomic_set(&workload->shadow_ctx_active, 0);
                 break;
         default:
@@ -163,7 +178,9 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
         int ring_id = workload->ring_id;
         struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
         struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+       struct intel_engine_cs *engine = dev_priv->engine[ring_id];
         struct drm_i915_gem_request *rq;
+       struct intel_vgpu *vgpu = workload->vgpu;
         int ret;
  
         gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
@@ -175,9 +192,24 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
  
         mutex_lock(&dev_priv->drm.struct_mutex);
  
+       /* pin shadow context by gvt even the shadow context will be pinned
+        * when i915 alloc request. That is because gvt will update the guest
+        * context from shadow context when workload is completed, and at that
+        * moment, i915 may already unpined the shadow context to make the
+        * shadow_ctx pages invalid. So gvt need to pin itself. After update
+        * the guest context, gvt can unpin the shadow_ctx safely.
+        */
+       ret = engine->context_pin(engine, shadow_ctx);
+       if (ret) {
+               gvt_vgpu_err("fail to pin shadow context\n");
+               workload->status = ret;
+               mutex_unlock(&dev_priv->drm.struct_mutex);
+               return ret;
+       }
+
         rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
         if (IS_ERR(rq)) {
-               gvt_err("fail to allocate gem request\n");
+               gvt_vgpu_err("fail to allocate gem request\n");
                 ret = PTR_ERR(rq);
                 goto out;
         }
@@ -190,9 +222,12 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
         if (ret)
                 goto out;
  
-       ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
-       if (ret)
-               goto out;
+       if ((workload->ring_id == RCS) &&
+           (workload->wa_ctx.indirect_ctx.size != 0)) {
+               ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
+               if (ret)
+                       goto out;
+       }
  
         ret = populate_shadow_context(workload);
         if (ret)
@@ -215,6 +250,9 @@ out:
  
         if (!IS_ERR_OR_NULL(rq))
                 i915_add_request_no_flush(rq);
+       else
+               engine->context_unpin(engine, shadow_ctx);
+
         mutex_unlock(&dev_priv->drm.struct_mutex);
         return ret;
  }
@@ -310,7 +348,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
                                 (u32)((workload->ctx_desc.lrca + i) <<
                                         GTT_PAGE_SHIFT));
                 if (context_gpa == INTEL_GVT_INVALID_ADDR) {
-                       gvt_err("invalid guest context descriptor\n");
+                       gvt_vgpu_err("invalid guest context descriptor\n");
                         return;
                 }
  
@@ -359,15 +397,31 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
         workload = scheduler->current_workload[ring_id];
         vgpu = workload->vgpu;
  
-       if (!workload->status && !vgpu->resetting) {
+       /* For the workload w/ request, needs to wait for the context
+        * switch to make sure request is completed.
+        * For the workload w/o request, directly complete the workload.
+        */
+       if (workload->req) {
+               struct drm_i915_private *dev_priv =
+                       workload->vgpu->gvt->dev_priv;
+               struct intel_engine_cs *engine =
+                       dev_priv->engine[workload->ring_id];
                 wait_event(workload->shadow_ctx_status_wq,
                            !atomic_read(&workload->shadow_ctx_active));
  
-               update_guest_context(workload);
+               i915_gem_request_put(fetch_and_zero(&workload->req));
+
+               if (!workload->status && !vgpu->resetting) {
+                       update_guest_context(workload);
  
-               for_each_set_bit(event, workload->pending_events,
-                                INTEL_GVT_EVENT_MAX)
-                       intel_vgpu_trigger_virtual_event(vgpu, event);
+                       for_each_set_bit(event, workload->pending_events,
+                                        INTEL_GVT_EVENT_MAX)
+                               intel_vgpu_trigger_virtual_event(vgpu, event);
+               }
+               mutex_lock(&dev_priv->drm.struct_mutex);
+               /* unpin shadow ctx as the shadow_ctx update is done */
+               engine->context_unpin(engine, workload->vgpu->shadow_ctx);
+               mutex_unlock(&dev_priv->drm.struct_mutex);
         }
  
         gvt_dbg_sched("ring id %d complete workload %p status %d\n",
@@ -397,7 +451,7 @@ static int workload_thread(void *priv)
         int ring_id = p->ring_id;
         struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
         struct intel_vgpu_workload *workload = NULL;
-       long lret;
+       struct intel_vgpu *vgpu = NULL;
         int ret;
         bool need_force_wake = IS_SKYLAKE(gvt->dev_priv);
         DEFINE_WAIT_FUNC(wait, woken_wake_function);
@@ -440,29 +494,19 @@ static int workload_thread(void *priv)
                 mutex_unlock(&gvt->lock);
  
                 if (ret) {
-                       gvt_err("fail to dispatch workload, skip\n");
+                       vgpu = workload->vgpu;
+                       gvt_vgpu_err("fail to dispatch workload, skip\n");
                         goto complete;
                 }
  
                 gvt_dbg_sched("ring id %d wait workload %p\n",
                                 workload->ring_id, workload);
-
-               lret = i915_wait_request(workload->req,
-                                        0, MAX_SCHEDULE_TIMEOUT);
-               if (lret < 0) {
-                       workload->status = lret;
-                       gvt_err("fail to wait workload, skip\n");
-               } else {
-                       workload->status = 0;
-               }
+               i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
  
  complete:
                 gvt_dbg_sched("will complete workload %p, status: %d\n",
                                 workload, workload->status);
  
-               if (workload->req)
-                       i915_gem_request_put(fetch_and_zero(&workload->req));
-
                 complete_current_workload(gvt, ring_id);
  
                 if (need_force_wake)
@@ -493,15 +537,16 @@ void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu)
  void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt)
  {
         struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
-       int i;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id i;
  
         gvt_dbg_core("clean workload scheduler\n");
  
-       for (i = 0; i < I915_NUM_ENGINES; i++) {
-               if (scheduler->thread[i]) {
-                       kthread_stop(scheduler->thread[i]);
-                       scheduler->thread[i] = NULL;
-               }
+       for_each_engine(engine, gvt->dev_priv, i) {
+               atomic_notifier_chain_unregister(
+                                       &engine->context_status_notifier,
+                                       &gvt->shadow_ctx_notifier_block[i]);
+               kthread_stop(scheduler->thread[i]);
         }
  }
  
@@ -509,18 +554,15 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
  {
         struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
         struct workload_thread_param *param = NULL;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id i;
         int ret;
-       int i;
  
         gvt_dbg_core("init workload scheduler\n");
  
         init_waitqueue_head(&scheduler->workload_complete_wq);
  
-       for (i = 0; i < I915_NUM_ENGINES; i++) {
-               /* check ring mask at init time */
-               if (!HAS_ENGINE(gvt->dev_priv, i))
-                       continue;
-
+       for_each_engine(engine, gvt->dev_priv, i) {
                 init_waitqueue_head(&scheduler->waitq[i]);
  
                 param = kzalloc(sizeof(*param), GFP_KERNEL);
@@ -539,6 +581,11 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
                         ret = PTR_ERR(scheduler->thread[i]);
                         goto err;
                 }
+
+               gvt->shadow_ctx_notifier_block[i].notifier_call =
+                                       shadow_context_status_change;
+               atomic_notifier_chain_register(&engine->context_status_notifier,
+                                       &gvt->shadow_ctx_notifier_block[i]);
         }
         return 0;
  err:
@@ -550,9 +597,6 @@ err:
  
  void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu)
  {
-       atomic_notifier_chain_unregister(&vgpu->shadow_ctx->status_notifier,
-                       &vgpu->shadow_ctx_notifier_block);
-
         i915_gem_context_put_unlocked(vgpu->shadow_ctx);
  }
  
@@ -567,10 +611,5 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu)
  
         vgpu->shadow_ctx->engine[RCS].initialised = true;
  
-       vgpu->shadow_ctx_notifier_block.notifier_call =
-               shadow_context_status_change;
-
-       atomic_notifier_chain_register(&vgpu->shadow_ctx->status_notifier,
-                                      &vgpu->shadow_ctx_notifier_block);
         return 0;
  }
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c

index 95a97aa0051e787430fff4266be7ac559973b78f..649ef280cc9a5bc10f4bebdc2f43c27e5249bd7f 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -64,6 +64,20 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
         WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
  }
  
+static struct {
+       unsigned int low_mm;
+       unsigned int high_mm;
+       unsigned int fence;
+       enum intel_vgpu_edid edid;
+       char *name;
+} vgpu_types[] = {
+/* Fixed vGPU type table */
+       { MB_TO_BYTES(64), MB_TO_BYTES(384), 4, GVT_EDID_1024_768, "8" },
+       { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" },
+       { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" },
+       { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" },
+};
+
  /**
   * intel_gvt_init_vgpu_types - initialize vGPU type list
   * @gvt : GVT device
@@ -78,9 +92,8 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
         unsigned int min_low;
  
         /* vGPU type name is defined as GVTg_Vx_y which contains
-        * physical GPU generation type and 'y' means maximum vGPU
-        * instances user can create on one physical GPU for this
-        * type.
+        * physical GPU generation type (e.g V4 as BDW server, V5 as
+        * SKL server).
          *
          * Depend on physical SKU resource, might see vGPU types like
          * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create
@@ -92,7 +105,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
          */
         low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE;
         high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
-       num_types = 4;
+       num_types = sizeof(vgpu_types) / sizeof(vgpu_types[0]);
  
         gvt->types = kzalloc(num_types * sizeof(struct intel_vgpu_type),
                              GFP_KERNEL);
@@ -101,28 +114,29 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
  
         min_low = MB_TO_BYTES(32);
         for (i = 0; i < num_types; ++i) {
-               if (low_avail / min_low == 0)
+               if (low_avail / vgpu_types[i].low_mm == 0)
                         break;
-               gvt->types[i].low_gm_size = min_low;
-               gvt->types[i].high_gm_size = max((min_low<<3), MB_TO_BYTES(384U));
-               gvt->types[i].fence = 4;
-               gvt->types[i].max_instance = min(low_avail / min_low,
-                                                high_avail / gvt->types[i].high_gm_size);
-               gvt->types[i].avail_instance = gvt->types[i].max_instance;
+
+               gvt->types[i].low_gm_size = vgpu_types[i].low_mm;
+               gvt->types[i].high_gm_size = vgpu_types[i].high_mm;
+               gvt->types[i].fence = vgpu_types[i].fence;
+               gvt->types[i].resolution = vgpu_types[i].edid;
+               gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm,
+                                                  high_avail / vgpu_types[i].high_mm);
  
                 if (IS_GEN8(gvt->dev_priv))
-                       sprintf(gvt->types[i].name, "GVTg_V4_%u",
-                                               gvt->types[i].max_instance);
+                       sprintf(gvt->types[i].name, "GVTg_V4_%s",
+                                               vgpu_types[i].name);
                 else if (IS_GEN9(gvt->dev_priv))
-                       sprintf(gvt->types[i].name, "GVTg_V5_%u",
-                                               gvt->types[i].max_instance);
+                       sprintf(gvt->types[i].name, "GVTg_V5_%s",
+                                               vgpu_types[i].name);
  
-               min_low <<= 1;
-               gvt_dbg_core("type[%d]: %s max %u avail %u low %u high %u fence %u\n",
-                            i, gvt->types[i].name, gvt->types[i].max_instance,
+               gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n",
+                            i, gvt->types[i].name,
                              gvt->types[i].avail_instance,
                              gvt->types[i].low_gm_size,
-                            gvt->types[i].high_gm_size, gvt->types[i].fence);
+                            gvt->types[i].high_gm_size, gvt->types[i].fence,
+                            vgpu_edid_str(gvt->types[i].resolution));
         }
  
         gvt->num_types = i;
@@ -138,7 +152,7 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
  {
         int i;
         unsigned int low_gm_avail, high_gm_avail, fence_avail;
-       unsigned int low_gm_min, high_gm_min, fence_min, total_min;
+       unsigned int low_gm_min, high_gm_min, fence_min;
  
         /* Need to depend on maxium hw resource size but keep on
          * static config for now.
@@ -154,32 +168,45 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
                 low_gm_min = low_gm_avail / gvt->types[i].low_gm_size;
                 high_gm_min = high_gm_avail / gvt->types[i].high_gm_size;
                 fence_min = fence_avail / gvt->types[i].fence;
-               total_min = min(min(low_gm_min, high_gm_min), fence_min);
-               gvt->types[i].avail_instance = min(gvt->types[i].max_instance,
-                                                  total_min);
+               gvt->types[i].avail_instance = min(min(low_gm_min, high_gm_min),
+                                                  fence_min);
  
-               gvt_dbg_core("update type[%d]: %s max %u avail %u low %u high %u fence %u\n",
-                      i, gvt->types[i].name, gvt->types[i].max_instance,
+               gvt_dbg_core("update type[%d]: %s avail %u low %u high %u fence %u\n",
+                      i, gvt->types[i].name,
                        gvt->types[i].avail_instance, gvt->types[i].low_gm_size,
                        gvt->types[i].high_gm_size, gvt->types[i].fence);
         }
  }
  
  /**
- * intel_gvt_destroy_vgpu - destroy a virtual GPU
+ * intel_gvt_active_vgpu - activate a virtual GPU
   * @vgpu: virtual GPU
   *
- * This function is called when user wants to destroy a virtual GPU.
+ * This function is called when user wants to activate a virtual GPU.
   *
   */
-void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
+void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu)
+{
+       mutex_lock(&vgpu->gvt->lock);
+       vgpu->active = true;
+       mutex_unlock(&vgpu->gvt->lock);
+}
+
+/**
+ * intel_gvt_deactive_vgpu - deactivate a virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to deactivate a virtual GPU.
+ * All virtual GPU runtime information will be destroyed.
+ *
+ */
+void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu)
  {
         struct intel_gvt *gvt = vgpu->gvt;
  
         mutex_lock(&gvt->lock);
  
         vgpu->active = false;
-       idr_remove(&gvt->vgpu_idr, vgpu->id);
  
         if (atomic_read(&vgpu->running_workload_num)) {
                 mutex_unlock(&gvt->lock);
@@ -188,6 +215,26 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
         }
  
         intel_vgpu_stop_schedule(vgpu);
+
+       mutex_unlock(&gvt->lock);
+}
+
+/**
+ * intel_gvt_destroy_vgpu - destroy a virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to destroy a virtual GPU.
+ *
+ */
+void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
+{
+       struct intel_gvt *gvt = vgpu->gvt;
+
+       mutex_lock(&gvt->lock);
+
+       WARN(vgpu->active, "vGPU is still active!\n");
+
+       idr_remove(&gvt->vgpu_idr, vgpu->id);
         intel_vgpu_clean_sched_policy(vgpu);
         intel_vgpu_clean_gvt_context(vgpu);
         intel_vgpu_clean_execlist(vgpu);
@@ -248,7 +295,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
         if (ret)
                 goto out_detach_hypervisor_vgpu;
  
-       ret = intel_vgpu_init_display(vgpu);
+       ret = intel_vgpu_init_display(vgpu, param->resolution);
         if (ret)
                 goto out_clean_gtt;
  
@@ -264,7 +311,6 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
         if (ret)
                 goto out_clean_shadow_ctx;
  
-       vgpu->active = true;
         mutex_unlock(&gvt->lock);
  
         return vgpu;
@@ -312,6 +358,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
         param.low_gm_sz = type->low_gm_size;
         param.high_gm_sz = type->high_gm_size;
         param.fence_sz = type->fence;
+       param.resolution = type->resolution;
  
         /* XXX current param based on MB */
         param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz);
@@ -387,8 +434,12 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
                 populate_pvinfo_page(vgpu);
                 intel_vgpu_reset_display(vgpu);
  
-               if (dmlr)
+               if (dmlr) {
                         intel_vgpu_reset_cfg_space(vgpu);
+                       /* only reset the failsafe mode when dmlr reset */
+                       vgpu->failsafe = false;
+                       vgpu->pv_notified = false;
+               }
         }
  
         vgpu->resetting = false;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c

index e703556eba999a95fd1a728538f6412db28068ae..5c089b3c2a7efdb29343de77a43dfc1e1f720057 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -248,6 +248,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
         case I915_PARAM_IRQ_ACTIVE:
         case I915_PARAM_ALLOW_BATCHBUFFER:
         case I915_PARAM_LAST_DISPATCH:
+       case I915_PARAM_HAS_EXEC_CONSTANTS:
                 /* Reject all old ums/dri params. */
                 return -ENODEV;
         case I915_PARAM_CHIPSET_ID:
@@ -274,9 +275,6 @@ static int i915_getparam(struct drm_device *dev, void *data,
         case I915_PARAM_HAS_BSD2:
                 value = !!dev_priv->engine[VCS2];
                 break;
-       case I915_PARAM_HAS_EXEC_CONSTANTS:
-               value = INTEL_GEN(dev_priv) >= 4;
-               break;
         case I915_PARAM_HAS_LLC:
                 value = HAS_LLC(dev_priv);
                 break;
@@ -1436,8 +1434,6 @@ static int i915_drm_suspend(struct drm_device *dev)
                 goto out;
         }
  
-       intel_guc_suspend(dev_priv);
-
         intel_display_suspend(dev);
  
         intel_dp_mst_suspend(dev);
@@ -1788,7 +1784,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
                 goto error;
         }
  
-       i915_gem_reset_finish(dev_priv);
+       i915_gem_reset(dev_priv);
         intel_overlay_reset(dev_priv);
  
         /* Ok, now get things going again... */
@@ -1814,6 +1810,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
         i915_queue_hangcheck(dev_priv);
  
  wakeup:
+       i915_gem_reset_finish(dev_priv);
         enable_irq(dev_priv->drm.irq);
         wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
         return;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index 0a4b42d313912c3c5b56a449cfac33e63afeb16e..46fcd8b7080aafca8d589ca25ef6d57a9dc27a48 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -293,6 +293,7 @@ enum plane_id {
         PLANE_PRIMARY,
         PLANE_SPRITE0,
         PLANE_SPRITE1,
+       PLANE_SPRITE2,
         PLANE_CURSOR,
         I915_MAX_PLANES,
  };
@@ -805,6 +806,7 @@ struct intel_csr {
         func(has_resource_streamer); \
         func(has_runtime_pm); \
         func(has_snoop); \
+       func(unfenced_needs_alignment); \
         func(cursor_needs_physical); \
         func(hws_needs_physical); \
         func(overlay_needs_physical); \
@@ -1324,7 +1326,7 @@ struct intel_gen6_power_mgmt {
         unsigned boosts;
  
         /* manual wa residency calculations */
-       struct intel_rps_ei up_ei, down_ei;
+       struct intel_rps_ei ei;
  
         /*
          * Protects RPS/RC6 register access and PCU communication.
@@ -2063,8 +2065,6 @@ struct drm_i915_private {
  
         const struct intel_device_info info;
  
-       int relative_constants_mode;
-
         void __iomem *regs;
  
         struct intel_uncore uncore;
@@ -3341,6 +3341,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
  }
  
  int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
+void i915_gem_reset(struct drm_i915_private *dev_priv);
  void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
  void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
  void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 6908123162d17cd998c1e7f0bf54a27064e67588..fe531f90406241bfa2cfc89e6155f1ef25802404 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1434,6 +1434,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
  
         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
  
+       ret = -ENODEV;
+       if (obj->ops->pwrite)
+               ret = obj->ops->pwrite(obj, args);
+       if (ret != -ENODEV)
+               goto err;
+
         ret = i915_gem_object_wait(obj,
                                    I915_WAIT_INTERRUPTIBLE |
                                    I915_WAIT_ALL,
@@ -2119,6 +2125,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
          */
         shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
         obj->mm.madv = __I915_MADV_PURGED;
+       obj->mm.pages = ERR_PTR(-EFAULT);
  }
  
  /* Try to discard unwanted pages */
@@ -2218,7 +2225,9 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
  
         __i915_gem_object_reset_page_iter(obj);
  
-       obj->ops->put_pages(obj, pages);
+       if (!IS_ERR(pages))
+               obj->ops->put_pages(obj, pages);
+
  unlock:
         mutex_unlock(&obj->mm.lock);
  }
@@ -2437,7 +2446,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
         if (err)
                 return err;
  
-       if (unlikely(!obj->mm.pages)) {
+       if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
                 err = ____i915_gem_object_get_pages(obj);
                 if (err)
                         goto unlock;
@@ -2515,7 +2524,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
  
         pinned = true;
         if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
-               if (unlikely(!obj->mm.pages)) {
+               if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
                         ret = ____i915_gem_object_get_pages(obj);
                         if (ret)
                                 goto err_unlock;
@@ -2563,6 +2572,75 @@ err_unlock:
         goto out_unlock;
  }
  
+static int
+i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
+                          const struct drm_i915_gem_pwrite *arg)
+{
+       struct address_space *mapping = obj->base.filp->f_mapping;
+       char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+       u64 remain, offset;
+       unsigned int pg;
+
+       /* Before we instantiate/pin the backing store for our use, we
+        * can prepopulate the shmemfs filp efficiently using a write into
+        * the pagecache. We avoid the penalty of instantiating all the
+        * pages, important if the user is just writing to a few and never
+        * uses the object on the GPU, and using a direct write into shmemfs
+        * allows it to avoid the cost of retrieving a page (either swapin
+        * or clearing-before-use) before it is overwritten.
+        */
+       if (READ_ONCE(obj->mm.pages))
+               return -ENODEV;
+
+       /* Before the pages are instantiated the object is treated as being
+        * in the CPU domain. The pages will be clflushed as required before
+        * use, and we can freely write into the pages directly. If userspace
+        * races pwrite with any other operation; corruption will ensue -
+        * that is userspace's prerogative!
+        */
+
+       remain = arg->size;
+       offset = arg->offset;
+       pg = offset_in_page(offset);
+
+       do {
+               unsigned int len, unwritten;
+               struct page *page;
+               void *data, *vaddr;
+               int err;
+
+               len = PAGE_SIZE - pg;
+               if (len > remain)
+                       len = remain;
+
+               err = pagecache_write_begin(obj->base.filp, mapping,
+                                           offset, len, 0,
+                                           &page, &data);
+               if (err < 0)
+                       return err;
+
+               vaddr = kmap(page);
+               unwritten = copy_from_user(vaddr + pg, user_data, len);
+               kunmap(page);
+
+               err = pagecache_write_end(obj->base.filp, mapping,
+                                         offset, len, len - unwritten,
+                                         page, data);
+               if (err < 0)
+                       return err;
+
+               if (unwritten)
+                       return -EFAULT;
+
+               remain -= len;
+               user_data += len;
+               offset += len;
+               pg = 0;
+       } while (remain);
+
+       return 0;
+}
+
  static bool ban_context(const struct i915_gem_context *ctx)
  {
         return (i915_gem_context_is_bannable(ctx) &&
@@ -2641,7 +2719,16 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
         for_each_engine(engine, dev_priv, id) {
                 struct drm_i915_gem_request *request;
  
+               /* Prevent request submission to the hardware until we have
+                * completed the reset in i915_gem_reset_finish(). If a request
+                * is completed by one engine, it may then queue a request
+                * to a second via its engine->irq_tasklet *just* as we are
+                * calling engine->init_hw() and also writing the ELSP.
+                * Turning off the engine->irq_tasklet until the reset is over
+                * prevents the race.
+                */
                 tasklet_kill(&engine->irq_tasklet);
+               tasklet_disable(&engine->irq_tasklet);
  
                 if (engine_stalled(engine)) {
                         request = i915_gem_find_active_request(engine);
@@ -2756,7 +2843,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
         engine->reset_hw(engine, request);
  }
  
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
+void i915_gem_reset(struct drm_i915_private *dev_priv)
  {
         struct intel_engine_cs *engine;
         enum intel_engine_id id;
@@ -2778,6 +2865,17 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
         }
  }
  
+void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+       for_each_engine(engine, dev_priv, id)
+               tasklet_enable(&engine->irq_tasklet);
+}
+
  static void nop_submit_request(struct drm_i915_gem_request *request)
  {
         dma_fence_set_error(&request->fence, -EIO);
@@ -3029,6 +3127,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
                 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
                 if (args->timeout_ns < 0)
                         args->timeout_ns = 0;
+
+               /*
+                * Apparently ktime isn't accurate enough and occasionally has a
+                * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+                * things up to make the test happy. We allow up to 1 jiffy.
+                *
+                * This is a regression from the timespec->ktime conversion.
+                */
+               if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
+                       args->timeout_ns = 0;
         }
  
         i915_gem_object_put(obj);
@@ -3974,8 +4082,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
  static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
         .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
                  I915_GEM_OBJECT_IS_SHRINKABLE,
+
         .get_pages = i915_gem_object_get_pages_gtt,
         .put_pages = i915_gem_object_put_pages_gtt,
+
+       .pwrite = i915_gem_object_pwrite_gtt,
  };
  
  struct drm_i915_gem_object *
@@ -4237,6 +4348,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
         i915_gem_context_lost(dev_priv);
         mutex_unlock(&dev->struct_mutex);
  
+       intel_guc_suspend(dev_priv);
+
         cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
         cancel_delayed_work_sync(&dev_priv->gt.retire_work);
  
@@ -4583,8 +4696,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
         init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
         init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
  
-       dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
-
         init_waitqueue_head(&dev_priv->pending_flip_queue);
  
         dev_priv->mm.interruptible = true;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c

index 17f90c6182081c932652715ed34724f1c71b373d..e2d83b6d376b03e8bdef54bc9ed9e7c9a3d0138c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -311,7 +311,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
         ctx->ring_size = 4 * PAGE_SIZE;
         ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
                              GEN8_CTX_ADDRESSING_MODE_SHIFT;
-       ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier);
  
         /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not
          * present or not in use we still need a small bias as ring wraparound
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h

index 0ac750b90f3d33f5585351fe284da68eaf6cb1e2..e9c008fe14b1d77500e7d456ef37705053685130 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -160,9 +160,6 @@ struct i915_gem_context {
         /** desc_template: invariant fields for the HW context descriptor */
         u32 desc_template;
  
-       /** status_notifier: list of callbacks for context-switch changes */
-       struct atomic_notifier_head status_notifier;
-
         /** guilty_count: How many times this context has caused a GPU hang. */
         unsigned int guilty_count;
         /**
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c

index c181b1bb3d2c9e72addb040ee8a0d5a4b52f06c9..3be2503aa042c0c48cb2745ad26e9316a2409484 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -293,12 +293,12 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
                  * those as well to make room for our guard pages.
                  */
                 if (check_color) {
-                       if (vma->node.start + vma->node.size == node->start) {
-                               if (vma->node.color == node->color)
+                       if (node->start + node->size == target->start) {
+                               if (node->color == target->color)
                                         continue;
                         }
-                       if (vma->node.start == node->start + node->size) {
-                               if (vma->node.color == node->color)
+                       if (node->start == target->start + target->size) {
+                               if (node->color == target->color)
                                         continue;
                         }
                 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

index d02cfaefe1c84e86cf39d74aa1ef09a4d1345af1..15a15d00a6bfa07cbe93ac2669cefaee3cb5ed2c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -888,6 +888,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
         struct list_head ordered_vmas;
         struct list_head pinned_vmas;
         bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
+       bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment;
         int retry;
  
         vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
@@ -908,7 +909,8 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
                 if (!has_fenced_gpu_access)
                         entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
                 need_fence =
-                       entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+                       (entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
+                        needs_unfenced_map) &&
                         i915_gem_object_is_tiled(obj);
                 need_mappable = need_fence || need_reloc_mappable(vma);
  
@@ -1408,10 +1410,7 @@ execbuf_submit(struct i915_execbuffer_params *params,
                struct drm_i915_gem_execbuffer2 *args,
                struct list_head *vmas)
  {
-       struct drm_i915_private *dev_priv = params->request->i915;
         u64 exec_start, exec_len;
-       int instp_mode;
-       u32 instp_mask;
         int ret;
  
         ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
@@ -1422,56 +1421,11 @@ execbuf_submit(struct i915_execbuffer_params *params,
         if (ret)
                 return ret;
  
-       instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
-       instp_mask = I915_EXEC_CONSTANTS_MASK;
-       switch (instp_mode) {
-       case I915_EXEC_CONSTANTS_REL_GENERAL:
-       case I915_EXEC_CONSTANTS_ABSOLUTE:
-       case I915_EXEC_CONSTANTS_REL_SURFACE:
-               if (instp_mode != 0 && params->engine->id != RCS) {
-                       DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
-                       return -EINVAL;
-               }
-
-               if (instp_mode != dev_priv->relative_constants_mode) {
-                       if (INTEL_INFO(dev_priv)->gen < 4) {
-                               DRM_DEBUG("no rel constants on pre-gen4\n");
-                               return -EINVAL;
-                       }
-
-                       if (INTEL_INFO(dev_priv)->gen > 5 &&
-                           instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
-                               DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
-                               return -EINVAL;
-                       }
-
-                       /* The HW changed the meaning on this bit on gen6 */
-                       if (INTEL_INFO(dev_priv)->gen >= 6)
-                               instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
-               }
-               break;
-       default:
-               DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
+       if (args->flags & I915_EXEC_CONSTANTS_MASK) {
+               DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n");
                 return -EINVAL;
         }
  
-       if (params->engine->id == RCS &&
-           instp_mode != dev_priv->relative_constants_mode) {
-               struct intel_ring *ring = params->request->ring;
-
-               ret = intel_ring_begin(params->request, 4);
-               if (ret)
-                       return ret;
-
-               intel_ring_emit(ring, MI_NOOP);
-               intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-               intel_ring_emit_reg(ring, INSTPM);
-               intel_ring_emit(ring, instp_mask << 16 | instp_mode);
-               intel_ring_advance(ring);
-
-               dev_priv->relative_constants_mode = instp_mode;
-       }
-
         if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
                 ret = i915_reset_gen7_sol_offsets(params->request);
                 if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c

index 2801a4d5632491787009c4ae62130dc732215136..96e45a4d54410b085191e09ab33c6f4ceca0100a 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2704,7 +2704,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
         struct i915_ggtt *ggtt = &dev_priv->ggtt;
  
         if (unlikely(ggtt->do_idle_maps)) {
-               if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
+               if (i915_gem_wait_for_idle(dev_priv, 0)) {
                         DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
                         /* Wait a bit, in hopes it avoids the hang */
                         udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h

index bf90b07163d1266a6bb0c87f036e84fa78181991..76b80a0be79767be189c94694434c338c1f97e6a 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -54,6 +54,9 @@ struct drm_i915_gem_object_ops {
         struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
         void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
  
+       int (*pwrite)(struct drm_i915_gem_object *,
+                     const struct drm_i915_gem_pwrite *);
+
         int (*dmabuf_export)(struct drm_i915_gem_object *);
         void (*release)(struct drm_i915_gem_object *);
  };
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c

index e7c3c0318ff60f2bf60b3c5afce405d11ce54a5c..da70bfe97ec5843adbdac276e5c86fa608266087 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -37,6 +37,17 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence)
  
  static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
  {
+       /* The timeline struct (as part of the ppgtt underneath a context)
+        * may be freed when the request is no longer in use by the GPU.
+        * We could extend the life of a context to beyond that of all
+        * fences, possibly keeping the hw resource around indefinitely,
+        * or we just give them a false name. Since
+        * dma_fence_ops.get_timeline_name is a debug feature, the occasional
+        * lie seems justifiable.
+        */
+       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+               return "signaled";
+
         return to_request(fence)->timeline->common->name;
  }
  
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c

index 401006b4c6a36bf2a8058c2b47eb38105ab5baf4..70b3832a79dd40d066a6f2deb34cbc6d436559e1 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -53,6 +53,17 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
         BUG();
  }
  
+static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
+{
+       if (!unlock)
+               return;
+
+       mutex_unlock(&dev->struct_mutex);
+
+       /* expedite the RCU grace period to free some request slabs */
+       synchronize_rcu_expedited();
+}
+
  static bool any_vma_pinned(struct drm_i915_gem_object *obj)
  {
         struct i915_vma *vma;
@@ -232,11 +243,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
                 intel_runtime_pm_put(dev_priv);
  
         i915_gem_retire_requests(dev_priv);
-       if (unlock)
-               mutex_unlock(&dev_priv->drm.struct_mutex);
  
-       /* expedite the RCU grace period to free some request slabs */
-       synchronize_rcu_expedited();
+       i915_gem_shrinker_unlock(&dev_priv->drm, unlock);
  
         return count;
  }
@@ -263,7 +271,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
                                 I915_SHRINK_BOUND |
                                 I915_SHRINK_UNBOUND |
                                 I915_SHRINK_ACTIVE);
-       rcu_barrier(); /* wait until our RCU delayed slab frees are completed */
+       synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */
  
         return freed;
  }
@@ -293,8 +301,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
                         count += obj->base.size >> PAGE_SHIFT;
         }
  
-       if (unlock)
-               mutex_unlock(&dev->struct_mutex);
+       i915_gem_shrinker_unlock(dev, unlock);
  
         return count;
  }
@@ -321,8 +328,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
                                          sc->nr_to_scan - freed,
                                          I915_SHRINK_BOUND |
                                          I915_SHRINK_UNBOUND);
-       if (unlock)
-               mutex_unlock(&dev->struct_mutex);
+
+       i915_gem_shrinker_unlock(dev, unlock);
  
         return freed;
  }
@@ -364,8 +371,7 @@ i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv,
                                          struct shrinker_lock_uninterruptible *slu)
  {
         dev_priv->mm.interruptible = slu->was_interruptible;
-       if (slu->unlock)
-               mutex_unlock(&dev_priv->drm.struct_mutex);
+       i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock);
  }
  
  static int
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c

index e6ffef2f707a01934a3a6f777b1dc7548ef370c8..b6c886ac901bd78cfa7beb1a3aaaf706821a6d2b 100644 (file)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1046,68 +1046,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
         ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
  }
  
-static bool vlv_c0_above(struct drm_i915_private *dev_priv,
-                        const struct intel_rps_ei *old,
-                        const struct intel_rps_ei *now,
-                        int threshold)
-{
-       u64 time, c0;
-       unsigned int mul = 100;
-
-       if (old->cz_clock == 0)
-               return false;
-
-       if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
-               mul <<= 8;
-
-       time = now->cz_clock - old->cz_clock;
-       time *= threshold * dev_priv->czclk_freq;
-
-       /* Workload can be split between render + media, e.g. SwapBuffers
-        * being blitted in X after being rendered in mesa. To account for
-        * this we need to combine both engines into our activity counter.
-        */
-       c0 = now->render_c0 - old->render_c0;
-       c0 += now->media_c0 - old->media_c0;
-       c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
-
-       return c0 >= time;
-}
-
  void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
  {
-       vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
-       dev_priv->rps.up_ei = dev_priv->rps.down_ei;
+       memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
  }
  
  static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
  {
+       const struct intel_rps_ei *prev = &dev_priv->rps.ei;
         struct intel_rps_ei now;
         u32 events = 0;
  
-       if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
+       if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
                 return 0;
  
         vlv_c0_read(dev_priv, &now);
         if (now.cz_clock == 0)
                 return 0;
  
-       if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
-               if (!vlv_c0_above(dev_priv,
-                                 &dev_priv->rps.down_ei, &now,
-                                 dev_priv->rps.down_threshold))
-                       events |= GEN6_PM_RP_DOWN_THRESHOLD;
-               dev_priv->rps.down_ei = now;
-       }
+       if (prev->cz_clock) {
+               u64 time, c0;
+               unsigned int mul;
+
+               mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to threshold% */
+               if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
+                       mul <<= 8;
  
-       if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
-               if (vlv_c0_above(dev_priv,
-                                &dev_priv->rps.up_ei, &now,
-                                dev_priv->rps.up_threshold))
-                       events |= GEN6_PM_RP_UP_THRESHOLD;
-               dev_priv->rps.up_ei = now;
+               time = now.cz_clock - prev->cz_clock;
+               time *= dev_priv->czclk_freq;
+
+               /* Workload can be split between render + media,
+                * e.g. SwapBuffers being blitted in X after being rendered in
+                * mesa. To account for this we need to combine both engines
+                * into our activity counter.
+                */
+               c0 = now.render_c0 - prev->render_c0;
+               c0 += now.media_c0 - prev->media_c0;
+               c0 *= mul;
+
+               if (c0 > time * dev_priv->rps.up_threshold)
+                       events = GEN6_PM_RP_UP_THRESHOLD;
+               else if (c0 < time * dev_priv->rps.down_threshold)
+                       events = GEN6_PM_RP_DOWN_THRESHOLD;
         }
  
+       dev_priv->rps.ei = now;
         return events;
  }
  
@@ -4228,7 +4211,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
         /* Let's track the enabled rps events */
         if (IS_VALLEYVIEW(dev_priv))
                 /* WaGsvRC0ResidencyMethod:vlv */
-               dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
+               dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
         else
                 dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
  
@@ -4266,6 +4249,16 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
         if (!IS_GEN2(dev_priv))
                 dev->vblank_disable_immediate = true;
  
+       /* Most platforms treat the display irq block as an always-on
+        * power domain. vlv/chv can disable it at runtime and need
+        * special care to avoid writing any of the display block registers
+        * outside of the power domain. We defer setting up the display irqs
+        * in this case to the runtime pm.
+        */
+       dev_priv->display_irqs_enabled = true;
+       if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+               dev_priv->display_irqs_enabled = false;
+
         dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp;
         dev->driver->get_scanout_position = i915_get_crtc_scanoutpos;
  
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c

index ecb487b5356fe68696b19d3054dc61339a61f406..9bbbd4e83e3c5d99cb4cdc9d4422f586159826bf 100644 (file)
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -60,6 +60,7 @@
         .has_overlay = 1, .overlay_needs_physical = 1, \
         .has_gmch_display = 1, \
         .hws_needs_physical = 1, \
+       .unfenced_needs_alignment = 1, \
         .ring_mask = RENDER_RING, \
         GEN_DEFAULT_PIPEOFFSETS, \
         CURSOR_OFFSETS
@@ -101,6 +102,7 @@ static const struct intel_device_info intel_i915g_info = {
         .platform = INTEL_I915G, .cursor_needs_physical = 1,
         .has_overlay = 1, .overlay_needs_physical = 1,
         .hws_needs_physical = 1,
+       .unfenced_needs_alignment = 1,
  };
  
  static const struct intel_device_info intel_i915gm_info = {
@@ -112,6 +114,7 @@ static const struct intel_device_info intel_i915gm_info = {
         .supports_tv = 1,
         .has_fbc = 1,
         .hws_needs_physical = 1,
+       .unfenced_needs_alignment = 1,
  };
  
  static const struct intel_device_info intel_i945g_info = {
@@ -120,6 +123,7 @@ static const struct intel_device_info intel_i945g_info = {
         .has_hotplug = 1, .cursor_needs_physical = 1,
         .has_overlay = 1, .overlay_needs_physical = 1,
         .hws_needs_physical = 1,
+       .unfenced_needs_alignment = 1,
  };
  
  static const struct intel_device_info intel_i945gm_info = {
@@ -130,6 +134,7 @@ static const struct intel_device_info intel_i945gm_info = {
         .supports_tv = 1,
         .has_fbc = 1,
         .hws_needs_physical = 1,
+       .unfenced_needs_alignment = 1,
  };
  
  static const struct intel_device_info intel_g33_info = {
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c

index a1b7eec58be2742e6d94e5566b09fbb61e54df9d..70964ca9251e04939225c87a773e9a6bf760b1ac 100644 (file)
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1705,7 +1705,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
          */
         if (WARN_ON(stream->sample_flags != props->sample_flags)) {
                 ret = -ENODEV;
-               goto err_alloc;
+               goto err_flags;
         }
  
         list_add(&stream->link, &dev_priv->perf.streams);
@@ -1728,6 +1728,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
  
  err_open:
         list_del(&stream->link);
+err_flags:
         if (stream->ops->destroy)
                 stream->ops->destroy(stream);
  err_alloc:
@@ -1793,6 +1794,11 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
                 if (ret)
                         return ret;
  
+               if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
+                       DRM_DEBUG("Unknown i915 perf property ID\n");
+                       return -EINVAL;
+               }
+
                 switch ((enum drm_i915_perf_property_id)id) {
                 case DRM_I915_PERF_PROP_CTX_HANDLE:
                         props->single_context = 1;
@@ -1862,9 +1868,8 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
                         props->oa_periodic = true;
                         props->oa_period_exponent = value;
                         break;
-               default:
+               case DRM_I915_PERF_PROP_MAX:
                         MISSING_CASE(id);
-                       DRM_DEBUG("Unknown i915 perf property ID\n");
                         return -EINVAL;
                 }
  
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c

index 155906e848120ae2e1de533d81658080c546888d..df20e9bc1c0f3dee67eb555ae20741d907a6b430 100644 (file)
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -512,10 +512,36 @@ err_unpin:
         return ret;
  }
  
+static void
+i915_vma_remove(struct i915_vma *vma)
+{
+       struct drm_i915_gem_object *obj = vma->obj;
+
+       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+       GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+
+       drm_mm_remove_node(&vma->node);
+       list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+
+       /* Since the unbound list is global, only move to that list if
+        * no more VMAs exist.
+        */
+       if (--obj->bind_count == 0)
+               list_move_tail(&obj->global_link,
+                              &to_i915(obj->base.dev)->mm.unbound_list);
+
+       /* And finally now the object is completely decoupled from this vma,
+        * we can drop its hold on the backing storage and allow it to be
+        * reaped by the shrinker.
+        */
+       i915_gem_object_unpin_pages(obj);
+       GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
+}
+
  int __i915_vma_do_pin(struct i915_vma *vma,
                       u64 size, u64 alignment, u64 flags)
  {
-       unsigned int bound = vma->flags;
+       const unsigned int bound = vma->flags;
         int ret;
  
         lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -524,18 +550,18 @@ int __i915_vma_do_pin(struct i915_vma *vma,
  
         if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
                 ret = -EBUSY;
-               goto err;
+               goto err_unpin;
         }
  
         if ((bound & I915_VMA_BIND_MASK) == 0) {
                 ret = i915_vma_insert(vma, size, alignment, flags);
                 if (ret)
-                       goto err;
+                       goto err_unpin;
         }
  
         ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
         if (ret)
-               goto err;
+               goto err_remove;
  
         if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
                 __i915_vma_set_map_and_fenceable(vma);
@@ -544,7 +570,12 @@ int __i915_vma_do_pin(struct i915_vma *vma,
         GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
         return 0;
  
-err:
+err_remove:
+       if ((bound & I915_VMA_BIND_MASK) == 0) {
+               GEM_BUG_ON(vma->pages);
+               i915_vma_remove(vma);
+       }
+err_unpin:
         __i915_vma_unpin(vma);
         return ret;
  }
@@ -657,9 +688,6 @@ int i915_vma_unbind(struct i915_vma *vma)
         }
         vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
  
-       drm_mm_remove_node(&vma->node);
-       list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
-
         if (vma->pages != obj->mm.pages) {
                 GEM_BUG_ON(!vma->pages);
                 sg_free_table(vma->pages);
@@ -667,18 +695,7 @@ int i915_vma_unbind(struct i915_vma *vma)
         }
         vma->pages = NULL;
  
-       /* Since the unbound list is global, only move to that list if
-        * no more VMAs exist. */
-       if (--obj->bind_count == 0)
-               list_move_tail(&obj->global_link,
-                              &to_i915(obj->base.dev)->mm.unbound_list);
-
-       /* And finally now the object is completely decoupled from this vma,
-        * we can drop its hold on the backing storage and allow it to be
-        * reaped by the shrinker.
-        */
-       i915_gem_object_unpin_pages(obj);
-       GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
+       i915_vma_remove(vma);
  
  destroy:
         if (unlikely(i915_vma_is_closed(vma)))
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c

index 0085bc745f6aa5256cf21c1ae76dd2cbce80e387..de219b71fb76ecbfab51d58bd379064b555c8241 100644 (file)
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -35,7 +35,6 @@
   */
  
  #define I915_CSR_GLK "i915/glk_dmc_ver1_01.bin"
-MODULE_FIRMWARE(I915_CSR_GLK);
  #define GLK_CSR_VERSION_REQUIRED       CSR_VERSION(1, 1)
  
  #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin"
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c

index 01341670738fbb118d8402bbda62d7234c8c3863..ed1f4f272b4fb3907adeea175216173fea5253fb 100644 (file)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3669,10 +3669,6 @@ static void intel_update_pipe_config(struct intel_crtc *crtc,
         /* drm_atomic_helper_update_legacy_modeset_state might not be called. */
         crtc->base.mode = crtc->base.state->mode;
  
-       DRM_DEBUG_KMS("Updating pipe size %ix%i -> %ix%i\n",
-                     old_crtc_state->pipe_src_w, old_crtc_state->pipe_src_h,
-                     pipe_config->pipe_src_w, pipe_config->pipe_src_h);
-
         /*
          * Update pipe size and adjust fitter if needed: the reason for this is
          * that in compute_mode_changes we check the native mode (not the pfit
@@ -4796,23 +4792,17 @@ static void skylake_pfit_enable(struct intel_crtc *crtc)
         struct intel_crtc_scaler_state *scaler_state =
                 &crtc->config->scaler_state;
  
-       DRM_DEBUG_KMS("for crtc_state = %p\n", crtc->config);
-
         if (crtc->config->pch_pfit.enabled) {
                 int id;
  
-               if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) {
-                       DRM_ERROR("Requesting pfit without getting a scaler first\n");
+               if (WARN_ON(crtc->config->scaler_state.scaler_id < 0))
                         return;
-               }
  
                 id = scaler_state->scaler_id;
                 I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN |
                         PS_FILTER_MEDIUM | scaler_state->scalers[id].mode);
                 I915_WRITE(SKL_PS_WIN_POS(pipe, id), crtc->config->pch_pfit.pos);
                 I915_WRITE(SKL_PS_WIN_SZ(pipe, id), crtc->config->pch_pfit.size);
-
-               DRM_DEBUG_KMS("for crtc_state = %p scaler_id = %d\n", crtc->config, id);
         }
  }
  
@@ -14379,6 +14369,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state,
         } while (progress);
  }
  
+static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
+{
+       struct intel_atomic_state *state, *next;
+       struct llist_node *freed;
+
+       freed = llist_del_all(&dev_priv->atomic_helper.free_list);
+       llist_for_each_entry_safe(state, next, freed, freed)
+               drm_atomic_state_put(&state->base);
+}
+
+static void intel_atomic_helper_free_state_worker(struct work_struct *work)
+{
+       struct drm_i915_private *dev_priv =
+               container_of(work, typeof(*dev_priv), atomic_helper.free_work);
+
+       intel_atomic_helper_free_state(dev_priv);
+}
+
  static void intel_atomic_commit_tail(struct drm_atomic_state *state)
  {
         struct drm_device *dev = state->dev;
@@ -14545,6 +14553,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
          * can happen also when the device is completely off.
          */
         intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
+
+       intel_atomic_helper_free_state(dev_priv);
  }
  
  static void intel_atomic_commit_work(struct work_struct *work)
@@ -14946,17 +14956,19 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc,
                 to_intel_atomic_state(old_crtc_state->state);
         bool modeset = needs_modeset(crtc->state);
  
+       if (!modeset &&
+           (intel_cstate->base.color_mgmt_changed ||
+            intel_cstate->update_pipe)) {
+               intel_color_set_csc(crtc->state);
+               intel_color_load_luts(crtc->state);
+       }
+
         /* Perform vblank evasion around commit operation */
         intel_pipe_update_start(intel_crtc);
  
         if (modeset)
                 goto out;
  
-       if (crtc->state->color_mgmt_changed || to_intel_crtc_state(crtc->state)->update_pipe) {
-               intel_color_set_csc(crtc->state);
-               intel_color_load_luts(crtc->state);
-       }
-
         if (intel_cstate->update_pipe)
                 intel_update_pipe_config(intel_crtc, old_intel_cstate);
         else if (INTEL_GEN(dev_priv) >= 9)
@@ -16599,18 +16611,6 @@ fail:
         drm_modeset_acquire_fini(&ctx);
  }
  
-static void intel_atomic_helper_free_state(struct work_struct *work)
-{
-       struct drm_i915_private *dev_priv =
-               container_of(work, typeof(*dev_priv), atomic_helper.free_work);
-       struct intel_atomic_state *state, *next;
-       struct llist_node *freed;
-
-       freed = llist_del_all(&dev_priv->atomic_helper.free_list);
-       llist_for_each_entry_safe(state, next, freed, freed)
-               drm_atomic_state_put(&state->base);
-}
-
  int intel_modeset_init(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = to_i915(dev);
@@ -16631,7 +16631,7 @@ int intel_modeset_init(struct drm_device *dev)
         dev->mode_config.funcs = &intel_mode_funcs;
  
         INIT_WORK(&dev_priv->atomic_helper.free_work,
-                 intel_atomic_helper_free_state);
+                 intel_atomic_helper_free_state_worker);
  
         intel_init_quirks(dev);
  
@@ -16696,12 +16696,11 @@ int intel_modeset_init(struct drm_device *dev)
                 }
         }
  
-       intel_update_czclk(dev_priv);
-       intel_update_cdclk(dev_priv);
-       dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq;
-
         intel_shared_dpll_init(dev);
  
+       intel_update_czclk(dev_priv);
+       intel_modeset_init_hw(dev);
+
         if (dev_priv->max_cdclk_freq == 0)
                 intel_update_max_cdclk(dev_priv);
  
@@ -17258,8 +17257,6 @@ void intel_modeset_gem_init(struct drm_device *dev)
  
         intel_init_gt_powersave(dev_priv);
  
-       intel_modeset_init_hw(dev);
-
         intel_setup_overlay(dev_priv);
  }
  
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c

index 371acf109e343295ae060c4cf49908bf5607118e..ab1be5c80ea5960df983940e70f98f0d49cbd8e1 100644 (file)
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -105,6 +105,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
         /* Nothing to do here, execute in order of dependencies */
         engine->schedule = NULL;
  
+       ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
+
         dev_priv->engine[id] = engine;
         return 0;
  }
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c

index 1b8ba2e77539577f5eb997f9e1eb315f1f7ae078..2d449fb5d1d2b02dc016ebb50a026733b50acbf3 100644 (file)
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -357,14 +357,13 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
                                     bool *enabled, int width, int height)
  {
         struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
-       unsigned long conn_configured, mask;
+       unsigned long conn_configured, conn_seq, mask;
         unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
         int i, j;
         bool *save_enabled;
         bool fallback = true;
         int num_connectors_enabled = 0;
         int num_connectors_detected = 0;
-       int pass = 0;
  
         save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL);
         if (!save_enabled)
@@ -374,6 +373,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
         mask = BIT(count) - 1;
         conn_configured = 0;
  retry:
+       conn_seq = conn_configured;
         for (i = 0; i < count; i++) {
                 struct drm_fb_helper_connector *fb_conn;
                 struct drm_connector *connector;
@@ -387,7 +387,7 @@ retry:
                 if (conn_configured & BIT(i))
                         continue;
  
-               if (pass == 0 && !connector->has_tile)
+               if (conn_seq == 0 && !connector->has_tile)
                         continue;
  
                 if (connector->status == connector_status_connected)
@@ -498,10 +498,8 @@ retry:
                 conn_configured |= BIT(i);
         }
  
-       if ((conn_configured & mask) != mask) {
-               pass++;
+       if ((conn_configured & mask) != mask && conn_configured != conn_seq)
                 goto retry;
-       }
  
         /*
          * If the BIOS didn't enable everything it could, fall back to have the
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c

index d23c0fcff7516a9f3df4363010e2c3dfa84e3c00..8c04eca84351cbbe5d7f385fd6d262f40a080a28 100644 (file)
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -77,6 +77,11 @@ int intel_gvt_init(struct drm_i915_private *dev_priv)
                 goto bail;
         }
  
+       if (!i915.enable_execlists) {
+               DRM_INFO("GPU guest virtualisation [GVT-g] disabled due to disabled execlist submission [i915.enable_execlists module parameter]\n");
+               goto bail;
+       }
+
         /*
          * We're not in host or fail to find a MPT module, disable GVT-g
          */
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c

index ebae2bd839189c07588e88a526f3f804d08157b3..24b2fa5b62824dfa86d87c9e5d1c630957192867 100644 (file)
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1298,16 +1298,34 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
  
  static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state)
  {
-       struct drm_device *dev = crtc_state->base.crtc->dev;
+       struct drm_i915_private *dev_priv =
+               to_i915(crtc_state->base.crtc->dev);
+       struct drm_atomic_state *state = crtc_state->base.state;
+       struct drm_connector_state *connector_state;
+       struct drm_connector *connector;
+       int i;
  
-       if (HAS_GMCH_DISPLAY(to_i915(dev)))
+       if (HAS_GMCH_DISPLAY(dev_priv))
                 return false;
  
         /*
          * HDMI 12bpc affects the clocks, so it's only possible
          * when not cloning with other encoder types.
          */
-       return crtc_state->output_types == 1 << INTEL_OUTPUT_HDMI;
+       if (crtc_state->output_types != 1 << INTEL_OUTPUT_HDMI)
+               return false;
+
+       for_each_connector_in_state(state, connector, connector_state, i) {
+               const struct drm_display_info *info = &connector->display_info;
+
+               if (connector_state->crtc != crtc_state->base.crtc)
+                       continue;
+
+               if ((info->edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_36) == 0)
+                       return false;
+       }
+
+       return true;
  }
  
  bool intel_hdmi_compute_config(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c

index b62e3f8ad415f6173470c90a3cb1b35b04f91c4b..54208bef7a83561eb72c29d26079fa447211203c 100644 (file)
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -219,7 +219,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
                         }
                 }
         }
-       if (dev_priv->display.hpd_irq_setup)
+       if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup)
                 dev_priv->display.hpd_irq_setup(dev_priv);
         spin_unlock_irq(&dev_priv->irq_lock);
  
@@ -425,7 +425,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
                 }
         }
  
-       if (storm_detected)
+       if (storm_detected && dev_priv->display_irqs_enabled)
                 dev_priv->display.hpd_irq_setup(dev_priv);
         spin_unlock(&dev_priv->irq_lock);
  
@@ -471,10 +471,12 @@ void intel_hpd_init(struct drm_i915_private *dev_priv)
          * Interrupt setup is already guaranteed to be single-threaded, this is
          * just to make the assert_spin_locked checks happy.
          */
-       spin_lock_irq(&dev_priv->irq_lock);
-       if (dev_priv->display.hpd_irq_setup)
-               dev_priv->display.hpd_irq_setup(dev_priv);
-       spin_unlock_irq(&dev_priv->irq_lock);
+       if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) {
+               spin_lock_irq(&dev_priv->irq_lock);
+               if (dev_priv->display_irqs_enabled)
+                       dev_priv->display.hpd_irq_setup(dev_priv);
+               spin_unlock_irq(&dev_priv->irq_lock);
+       }
  }
  
  static void i915_hpd_poll_init_work(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index ebf8023d21e6fba52c01b54d46fb3456b4709731..47517a02f0a439125b3b3a769e6848a4c4928ca2 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -345,7 +345,8 @@ execlists_context_status_change(struct drm_i915_gem_request *rq,
         if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
                 return;
  
-       atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
+       atomic_notifier_call_chain(&rq->engine->context_status_notifier,
+                                  status, rq);
  }
  
  static void
@@ -669,15 +670,14 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
  static struct intel_engine_cs *
  pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
  {
-       struct intel_engine_cs *engine;
+       struct intel_engine_cs *engine =
+               container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+
+       GEM_BUG_ON(!locked);
  
-       engine = container_of(pt,
-                             struct drm_i915_gem_request,
-                             priotree)->engine;
         if (engine != locked) {
-               if (locked)
-                       spin_unlock_irq(&locked->timeline->lock);
-               spin_lock_irq(&engine->timeline->lock);
+               spin_unlock(&locked->timeline->lock);
+               spin_lock(&engine->timeline->lock);
         }
  
         return engine;
@@ -685,7 +685,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
  
  static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
  {
-       struct intel_engine_cs *engine = NULL;
+       struct intel_engine_cs *engine;
         struct i915_dependency *dep, *p;
         struct i915_dependency stack;
         LIST_HEAD(dfs);
@@ -719,26 +719,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
         list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
                 struct i915_priotree *pt = dep->signaler;
  
-               list_for_each_entry(p, &pt->signalers_list, signal_link)
+               /* Within an engine, there can be no cycle, but we may
+                * refer to the same dependency chain multiple times
+                * (redundant dependencies are not eliminated) and across
+                * engines.
+                */
+               list_for_each_entry(p, &pt->signalers_list, signal_link) {
+                       GEM_BUG_ON(p->signaler->priority < pt->priority);
                         if (prio > READ_ONCE(p->signaler->priority))
                                 list_move_tail(&p->dfs_link, &dfs);
+               }
  
                 list_safe_reset_next(dep, p, dfs_link);
-               if (!RB_EMPTY_NODE(&pt->node))
-                       continue;
-
-               engine = pt_lock_engine(pt, engine);
-
-               /* If it is not already in the rbtree, we can update the
-                * priority inplace and skip over it (and its dependencies)
-                * if it is referenced *again* as we descend the dfs.
-                */
-               if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
-                       pt->priority = prio;
-                       list_del_init(&dep->dfs_link);
-               }
         }
  
+       engine = request->engine;
+       spin_lock_irq(&engine->timeline->lock);
+
         /* Fifo and depth-first replacement ensure our deps execute before us */
         list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
                 struct i915_priotree *pt = dep->signaler;
@@ -750,16 +747,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
                 if (prio <= pt->priority)
                         continue;
  
-               GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
-
                 pt->priority = prio;
-               rb_erase(&pt->node, &engine->execlist_queue);
-               if (insert_request(pt, &engine->execlist_queue))
-                       engine->execlist_first = &pt->node;
+               if (!RB_EMPTY_NODE(&pt->node)) {
+                       rb_erase(&pt->node, &engine->execlist_queue);
+                       if (insert_request(pt, &engine->execlist_queue))
+                               engine->execlist_first = &pt->node;
+               }
         }
  
-       if (engine)
-               spin_unlock_irq(&engine->timeline->lock);
+       spin_unlock_irq(&engine->timeline->lock);
  
         /* XXX Do we need to preempt to make room for us and our deps? */
  }
@@ -1439,7 +1435,9 @@ static void reset_common_ring(struct intel_engine_cs *engine,
         GEM_BUG_ON(request->ctx != port[0].request->ctx);
  
         /* Reset WaIdleLiteRestore:bdw,skl as well */
-       request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
+       request->tail =
+               intel_ring_wrap(request->ring,
+                               request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
  }
  
  static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c

index 249623d45be0caa3e891e8a272706dff84dbc4be..6a29784d2b4137c9805e85ffb80265e05e46af53 100644 (file)
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4891,6 +4891,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
                 break;
         }
  
+       /* When byt can survive without system hang with dynamic
+        * sw freq adjustments, this restriction can be lifted.
+        */
+       if (IS_VALLEYVIEW(dev_priv))
+               goto skip_hw_write;
+
         I915_WRITE(GEN6_RP_UP_EI,
                    GT_INTERVAL_FROM_US(dev_priv, ei_up));
         I915_WRITE(GEN6_RP_UP_THRESHOLD,
@@ -4911,6 +4917,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
                    GEN6_RP_UP_BUSY_AVG |
                    GEN6_RP_DOWN_IDLE_AVG);
  
+skip_hw_write:
         dev_priv->rps.power = new_power;
         dev_priv->rps.up_threshold = threshold_up;
         dev_priv->rps.down_threshold = threshold_down;
@@ -4921,8 +4928,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
  {
         u32 mask = 0;
  
+       /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
         if (val > dev_priv->rps.min_freq_softlimit)
-               mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
+               mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
         if (val < dev_priv->rps.max_freq_softlimit)
                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
  
@@ -5032,7 +5040,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
  {
         mutex_lock(&dev_priv->rps.hw_lock);
         if (dev_priv->rps.enabled) {
-               if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
+               if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
                         gen6_rps_reset_ei(dev_priv);
                 I915_WRITE(GEN6_PMINTRMSK,
                            gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
@@ -7916,10 +7924,10 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
   * @timeout_base_ms: timeout for polling with preemption enabled
   *
   * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
- * reports an error or an overall timeout of @timeout_base_ms+10 ms expires.
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
   * The request is acknowledged once the PCODE reply dword equals @reply after
   * applying @reply_mask. Polling is first attempted with preemption enabled
- * for @timeout_base_ms and if this times out for another 10 ms with
+ * for @timeout_base_ms and if this times out for another 50 ms with
   * preemption disabled.
   *
   * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
@@ -7955,14 +7963,15 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
          * worst case) _and_ PCODE was busy for some reason even after a
          * (queued) request and @timeout_base_ms delay. As a workaround retry
          * the poll with preemption disabled to maximize the number of
-        * requests. Increase the timeout from @timeout_base_ms to 10ms to
+        * requests. Increase the timeout from @timeout_base_ms to 50ms to
          * account for interrupts that could reduce the number of these
-        * requests.
+        * requests, and for any quirks of the PCODE firmware that delays
+        * the request completion.
          */
         DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
         WARN_ON_ONCE(timeout_base_ms > 3);
         preempt_disable();
-       ret = wait_for_atomic(COND, 10);
+       ret = wait_for_atomic(COND, 50);
         preempt_enable();
  
  out:
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c

index 91bc4abf5d3e578ae9dffe2dd2adfd9c4305e31e..6c5f9958197d5541381e7c2838c909fe19fa1520 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2024,6 +2024,8 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
                 ret = context_pin(ctx, flags);
                 if (ret)
                         goto error;
+
+               ce->state->obj->mm.dirty = true;
         }
  
         /* The kernel context is only used as a placeholder for flushing the
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h

index 79c2b8d72322cf58cacd3f732c5a55ce0ef68f3b..8cb2078c5bfc4abc7aeaa7fe51974266edcc8016 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -403,6 +403,9 @@ struct intel_engine_cs {
          */
         struct i915_gem_context *legacy_active_context;
  
+       /* status_notifier: list of callbacks for context-switch changes */
+       struct atomic_notifier_head context_status_notifier;
+
         struct intel_engine_hangcheck hangcheck;
  
         bool needs_cmd_parser;
@@ -518,11 +521,17 @@ static inline void intel_ring_advance(struct intel_ring *ring)
          */
  }
  
+static inline u32
+intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+       return pos & (ring->size - 1);
+}
+
  static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr)
  {
         /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
         u32 offset = addr - ring->vaddr;
-       return offset & (ring->size - 1);
+       return intel_ring_wrap(ring, offset);
  }
  
  int __intel_ring_space(int head, int tail, int size);
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c

index 9ef54688872a86a70ab020a64b7209e040de70e0..9481ca9a3ae7e0a342957baf655a34f570a51eae 100644 (file)
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -254,9 +254,6 @@ skl_update_plane(struct drm_plane *drm_plane,
                 int scaler_id = plane_state->scaler_id;
                 const struct intel_scaler *scaler;
  
-               DRM_DEBUG_KMS("plane = %d PS_PLANE_SEL(plane) = 0x%x\n",
-                             plane_id, PS_PLANE_SEL(plane_id));
-
                 scaler = &crtc_state->scaler_state.scalers[scaler_id];
  
                 I915_WRITE(SKL_PS_CTRL(pipe, scaler_id),
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c

index abe08885a5ba4ef1726d67809544534cf35a57df..b7ff592b14f5e00d68ff1cf6440dd45d6959606d 100644 (file)
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -119,6 +119,8 @@ fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
  
         for_each_fw_domain_masked(d, fw_domains, dev_priv)
                 fw_domain_wait_ack(d);
+
+       dev_priv->uncore.fw_domains_active |= fw_domains;
  }
  
  static void
@@ -130,6 +132,8 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
                 fw_domain_put(d);
                 fw_domain_posting_read(d);
         }
+
+       dev_priv->uncore.fw_domains_active &= ~fw_domains;
  }
  
  static void
@@ -240,10 +244,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
         if (WARN_ON(domain->wake_count == 0))
                 domain->wake_count++;
  
-       if (--domain->wake_count == 0) {
+       if (--domain->wake_count == 0)
                 dev_priv->uncore.funcs.force_wake_put(dev_priv, domain->mask);
-               dev_priv->uncore.fw_domains_active &= ~domain->mask;
-       }
  
         spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  
@@ -454,10 +456,8 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
                         fw_domains &= ~domain->mask;
         }
  
-       if (fw_domains) {
+       if (fw_domains)
                 dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
-               dev_priv->uncore.fw_domains_active |= fw_domains;
-       }
  }
  
  /**
@@ -968,7 +968,6 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv,
                 fw_domain_arm_timer(domain);
  
         dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
-       dev_priv->uncore.fw_domains_active |= fw_domains;
  }
  
  static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c

index 4414cf73735d26ccb655756327af8226f539f4f5..36602ac7e24835fb9350b3040037524f4c95b7d1 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License version 2 and
@@ -534,7 +534,7 @@ static void a5xx_destroy(struct msm_gpu *gpu)
         }
  
         if (a5xx_gpu->gpmu_bo) {
-               if (a5xx_gpu->gpmu_bo)
+               if (a5xx_gpu->gpmu_iova)
                         msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id);
                 drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo);
         }
@@ -860,7 +860,9 @@ static const struct adreno_gpu_funcs funcs = {
                 .idle = a5xx_idle,
                 .irq = a5xx_irq,
                 .destroy = a5xx_destroy,
+#ifdef CONFIG_DEBUG_FS
                 .show = a5xx_show,
+#endif
         },
         .get_timestamp = a5xx_get_timestamp,
  };
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c

index c9bd1e6225f4f96e3b8a79a101508f244de17baf..5ae65426b4e5593c5f88b19627c10b403995ed73 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -418,18 +418,27 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
         return 0;
  }
  
-void adreno_gpu_cleanup(struct adreno_gpu *gpu)
+void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
  {
-       if (gpu->memptrs_bo) {
-               if (gpu->memptrs)
-                       msm_gem_put_vaddr(gpu->memptrs_bo);
+       struct msm_gpu *gpu = &adreno_gpu->base;
+
+       if (adreno_gpu->memptrs_bo) {
+               if (adreno_gpu->memptrs)
+                       msm_gem_put_vaddr(adreno_gpu->memptrs_bo);
+
+               if (adreno_gpu->memptrs_iova)
+                       msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->id);
+
+               drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo);
+       }
+       release_firmware(adreno_gpu->pm4);
+       release_firmware(adreno_gpu->pfp);
  
-               if (gpu->memptrs_iova)
-                       msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
+       msm_gpu_cleanup(gpu);
  
-               drm_gem_object_unreference_unlocked(gpu->memptrs_bo);
+       if (gpu->aspace) {
+               gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
+                       iommu_ports, ARRAY_SIZE(iommu_ports));
+               msm_gem_address_space_destroy(gpu->aspace);
         }
-       release_firmware(gpu->pm4);
-       release_firmware(gpu->pfp);
-       msm_gpu_cleanup(&gpu->base);
  }
diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c

index 921270ea6059debadab1a785ed19ca79b064811c..a879ffa534b4d845007d3f974f9ce9c7df69dff0 100644 (file)
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -171,7 +171,7 @@ dsi_mgr_phy_enable(int id,
                         }
                 }
         } else {
-               msm_dsi_host_reset_phy(mdsi->host);
+               msm_dsi_host_reset_phy(msm_dsi->host);
                 ret = enable_phy(msm_dsi, src_pll_id, &shared_timings[id]);
                 if (ret)
                         return ret;
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c

index a54d3bb5baad9c01047a582ad3cad98a9efb7f66..8177e8511afd8c6827b1a731688e159da652a8d2 100644 (file)
--- a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
@@ -18,13 +18,6 @@
  #include <linux/hdmi.h>
  #include "hdmi.h"
  
-
-/* Supported HDMI Audio channels */
-#define MSM_HDMI_AUDIO_CHANNEL_2               0
-#define MSM_HDMI_AUDIO_CHANNEL_4               1
-#define MSM_HDMI_AUDIO_CHANNEL_6               2
-#define MSM_HDMI_AUDIO_CHANNEL_8               3
-
  /* maps MSM_HDMI_AUDIO_CHANNEL_n consts used by audio driver to # of channels: */
  static int nchannels[] = { 2, 4, 6, 8 };
  
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h

index 611da7a660c9426ed6c165ae90c8257f7a777360..238901987e00b0c1bc74979647a6cfadcfee084a 100644 (file)
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h
@@ -18,7 +18,8 @@
  #ifndef __MDP5_PIPE_H__
  #define __MDP5_PIPE_H__
  
-#define SSPP_MAX       (SSPP_RGB3 + 1) /* TODO: Add SSPP_MAX in mdp5.xml.h */
+/* TODO: Add SSPP_MAX in mdp5.xml.h */
+#define SSPP_MAX       (SSPP_CURSOR1 + 1)
  
  /* represents a hw pipe, which is dynamically assigned to a plane */
  struct mdp5_hw_pipe {
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c

index 59811f29607de60f2a22d53f9ec969e62bb39d98..68e509b3b9e4d08730e3901f46a397519c33e77c 100644 (file)
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -812,6 +812,12 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev,
  
         size = PAGE_ALIGN(size);
  
+       /* Disallow zero sized objects as they make the underlying
+        * infrastructure grumpy
+        */
+       if (size == 0)
+               return ERR_PTR(-EINVAL);
+
         ret = msm_gem_new_impl(dev, size, flags, NULL, &obj);
         if (ret)
                 goto fail;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c

index 99e05aacbee181f4341625af8abbc91ad284f381..af5b6ba4095b06f6a24069f62e178618b547209e 100644 (file)
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -706,9 +706,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
                 msm_ringbuffer_destroy(gpu->rb);
         }
  
-       if (gpu->aspace)
-               msm_gem_address_space_destroy(gpu->aspace);
-
         if (gpu->fctx)
                 msm_fence_context_free(gpu->fctx);
  }
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c

index e10a4eda4078ba9b211ca49530da2b659865c8ee..1144e0c9e8942ddb6226a7f409ed07f25a0eb96a 100644 (file)
--- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
@@ -65,13 +65,11 @@ static int mxsfb_set_pixel_fmt(struct mxsfb_drm_private *mxsfb)
         switch (format) {
         case DRM_FORMAT_RGB565:
                 dev_dbg(drm->dev, "Setting up RGB565 mode\n");
-               ctrl |= CTRL_SET_BUS_WIDTH(STMLCDIF_16BIT);
                 ctrl |= CTRL_SET_WORD_LENGTH(0);
                 ctrl1 |= CTRL1_SET_BYTE_PACKAGING(0xf);
                 break;
         case DRM_FORMAT_XRGB8888:
                 dev_dbg(drm->dev, "Setting up XRGB8888 mode\n");
-               ctrl |= CTRL_SET_BUS_WIDTH(STMLCDIF_24BIT);
                 ctrl |= CTRL_SET_WORD_LENGTH(3);
                 /* Do not use packed pixels = one pixel per word instead. */
                 ctrl1 |= CTRL1_SET_BYTE_PACKAGING(0x7);
@@ -87,6 +85,36 @@ static int mxsfb_set_pixel_fmt(struct mxsfb_drm_private *mxsfb)
         return 0;
  }
  
+static void mxsfb_set_bus_fmt(struct mxsfb_drm_private *mxsfb)
+{
+       struct drm_crtc *crtc = &mxsfb->pipe.crtc;
+       struct drm_device *drm = crtc->dev;
+       u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+       u32 reg;
+
+       reg = readl(mxsfb->base + LCDC_CTRL);
+
+       if (mxsfb->connector.display_info.num_bus_formats)
+               bus_format = mxsfb->connector.display_info.bus_formats[0];
+
+       reg &= ~CTRL_BUS_WIDTH_MASK;
+       switch (bus_format) {
+       case MEDIA_BUS_FMT_RGB565_1X16:
+               reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_16BIT);
+               break;
+       case MEDIA_BUS_FMT_RGB666_1X18:
+               reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_18BIT);
+               break;
+       case MEDIA_BUS_FMT_RGB888_1X24:
+               reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_24BIT);
+               break;
+       default:
+               dev_err(drm->dev, "Unknown media bus format %d\n", bus_format);
+               break;
+       }
+       writel(reg, mxsfb->base + LCDC_CTRL);
+}
+
  static void mxsfb_enable_controller(struct mxsfb_drm_private *mxsfb)
  {
         u32 reg;
@@ -168,13 +196,22 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
                 vdctrl0 |= VDCTRL0_HSYNC_ACT_HIGH;
         if (m->flags & DRM_MODE_FLAG_PVSYNC)
                 vdctrl0 |= VDCTRL0_VSYNC_ACT_HIGH;
-       if (bus_flags & DRM_BUS_FLAG_DE_HIGH)
+       /* Make sure Data Enable is high active by default */
+       if (!(bus_flags & DRM_BUS_FLAG_DE_LOW))
                 vdctrl0 |= VDCTRL0_ENABLE_ACT_HIGH;
-       if (bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
+       /*
+        * DRM_BUS_FLAG_PIXDATA_ defines are controller centric,
+        * controllers VDCTRL0_DOTCLK is display centric.
+        * Drive on positive edge       -> display samples on falling edge
+        * DRM_BUS_FLAG_PIXDATA_POSEDGE -> VDCTRL0_DOTCLK_ACT_FALLING
+        */
+       if (bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE)
                 vdctrl0 |= VDCTRL0_DOTCLK_ACT_FALLING;
  
         writel(vdctrl0, mxsfb->base + LCDC_VDCTRL0);
  
+       mxsfb_set_bus_fmt(mxsfb);
+
         /* Frame length in lines. */
         writel(m->crtc_vtotal, mxsfb->base + LCDC_VDCTRL1);
  
@@ -184,8 +221,8 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
                VDCTRL2_SET_HSYNC_PERIOD(m->crtc_htotal),
                mxsfb->base + LCDC_VDCTRL2);
  
-       writel(SET_HOR_WAIT_CNT(m->crtc_hblank_end - m->crtc_hsync_end) |
-              SET_VERT_WAIT_CNT(m->crtc_vblank_end - m->crtc_vsync_end),
+       writel(SET_HOR_WAIT_CNT(m->crtc_htotal - m->crtc_hsync_start) |
+              SET_VERT_WAIT_CNT(m->crtc_vtotal - m->crtc_vsync_start),
                mxsfb->base + LCDC_VDCTRL3);
  
         writel(SET_DOTCLK_H_VALID_DATA_CNT(m->hdisplay),
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c

index cdfbe0284635decf262db79c0b048291cc87c026..ff6d6a6f842e5a61def5c23264b10eef06fa1e75 100644 (file)
--- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
@@ -102,14 +102,18 @@ static void mxsfb_pipe_enable(struct drm_simple_display_pipe *pipe,
  {
         struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe);
  
+       drm_panel_prepare(mxsfb->panel);
         mxsfb_crtc_enable(mxsfb);
+       drm_panel_enable(mxsfb->panel);
  }
  
  static void mxsfb_pipe_disable(struct drm_simple_display_pipe *pipe)
  {
         struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe);
  
+       drm_panel_disable(mxsfb->panel);
         mxsfb_crtc_disable(mxsfb);
+       drm_panel_unprepare(mxsfb->panel);
  }
  
  static void mxsfb_pipe_update(struct drm_simple_display_pipe *pipe,
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_out.c b/drivers/gpu/drm/mxsfb/mxsfb_out.c

index fa8d173994071d64f0b772ec435e1e5038916a7f..b8e81422d4e26f9f8c3f692955753bb046aebcca 100644 (file)
--- a/drivers/gpu/drm/mxsfb/mxsfb_out.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_out.c
@@ -112,6 +112,7 @@ static int mxsfb_attach_endpoint(struct drm_device *drm,
  
  int mxsfb_create_output(struct drm_device *drm)
  {
+       struct mxsfb_drm_private *mxsfb = drm->dev_private;
         struct device_node *ep_np = NULL;
         struct of_endpoint ep;
         int ret;
@@ -127,5 +128,8 @@ int mxsfb_create_output(struct drm_device *drm)
                 }
         }
  
+       if (!mxsfb->panel)
+               return -EPROBE_DEFER;
+
         return 0;
  }
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_regs.h b/drivers/gpu/drm/mxsfb/mxsfb_regs.h

index 31d62cd0d3d78abe465559992ea75f8dd988ff22..66a6ba9ec533ffe96c04915937bd6b8fba68a1b0 100644 (file)
--- a/drivers/gpu/drm/mxsfb/mxsfb_regs.h
+++ b/drivers/gpu/drm/mxsfb/mxsfb_regs.h
@@ -44,6 +44,7 @@
  #define CTRL_DATA_SELECT               (1 << 16)
  #define CTRL_SET_BUS_WIDTH(x)          (((x) & 0x3) << 10)
  #define CTRL_GET_BUS_WIDTH(x)          (((x) >> 10) & 0x3)
+#define CTRL_BUS_WIDTH_MASK            (0x3 << 10)
  #define CTRL_SET_WORD_LENGTH(x)                (((x) & 0x3) << 8)
  #define CTRL_GET_WORD_LENGTH(x)                (((x) >> 8) & 0x3)
  #define CTRL_MASTER                    (1 << 5)
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c

index 0b4440ffbeae21a3d33e67ed1c727e00cf3884b3..a9182d5e60117321a1a354b9b97288d11cc13cf4 100644 (file)
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -995,7 +995,6 @@ nv50_wndw_atomic_destroy_state(struct drm_plane *plane,
  {
         struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
         __drm_atomic_helper_plane_destroy_state(&asyw->state);
-       dma_fence_put(asyw->state.fence);
         kfree(asyw);
  }
  
@@ -1007,7 +1006,6 @@ nv50_wndw_atomic_duplicate_state(struct drm_plane *plane)
         if (!(asyw = kmalloc(sizeof(*asyw), GFP_KERNEL)))
                 return NULL;
         __drm_atomic_helper_plane_duplicate_state(plane, &asyw->state);
-       asyw->state.fence = NULL;
         asyw->interval = 1;
         asyw->sema = armw->sema;
         asyw->ntfy = armw->ntfy;
@@ -2036,6 +2034,7 @@ nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
         u32 vbackp  = (mode->vtotal - mode->vsync_end) * vscan / ilace;
         u32 hfrontp =  mode->hsync_start - mode->hdisplay;
         u32 vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace;
+       u32 blankus;
         struct nv50_head_mode *m = &asyh->mode;
  
         m->h.active = mode->htotal;
@@ -2049,9 +2048,10 @@ nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
         m->v.blanks = m->v.active - vfrontp - 1;
  
         /*XXX: Safe underestimate, even "0" works */
-       m->v.blankus = (m->v.active - mode->vdisplay - 2) * m->h.active;
-       m->v.blankus *= 1000;
-       m->v.blankus /= mode->clock;
+       blankus = (m->v.active - mode->vdisplay - 2) * m->h.active;
+       blankus *= 1000;
+       blankus /= mode->clock;
+       m->v.blankus = blankus;
  
         if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
                 m->v.blank2e =  m->v.active + m->v.synce + vbackp;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c

index 273562dd6bbdb1a138c2e73417fa0e071716749b..3b86a73995672220b5e71b0f39898129782abf28 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -714,7 +714,7 @@ nv4a_chipset = {
         .i2c = nv04_i2c_new,
         .imem = nv40_instmem_new,
         .mc = nv44_mc_new,
-       .mmu = nv44_mmu_new,
+       .mmu = nv04_mmu_new,
         .pci = nv40_pci_new,
         .therm = nv40_therm_new,
         .timer = nv41_timer_new,
@@ -2271,6 +2271,35 @@ nv136_chipset = {
         .fifo = gp100_fifo_new,
  };
  
+static const struct nvkm_device_chip
+nv137_chipset = {
+       .name = "GP107",
+       .bar = gf100_bar_new,
+       .bios = nvkm_bios_new,
+       .bus = gf100_bus_new,
+       .devinit = gm200_devinit_new,
+       .fb = gp102_fb_new,
+       .fuse = gm107_fuse_new,
+       .gpio = gk104_gpio_new,
+       .i2c = gm200_i2c_new,
+       .ibus = gm200_ibus_new,
+       .imem = nv50_instmem_new,
+       .ltc = gp100_ltc_new,
+       .mc = gp100_mc_new,
+       .mmu = gf100_mmu_new,
+       .pci = gp100_pci_new,
+       .pmu = gp102_pmu_new,
+       .timer = gk20a_timer_new,
+       .top = gk104_top_new,
+       .ce[0] = gp102_ce_new,
+       .ce[1] = gp102_ce_new,
+       .ce[2] = gp102_ce_new,
+       .ce[3] = gp102_ce_new,
+       .disp = gp102_disp_new,
+       .dma = gf119_dma_new,
+       .fifo = gp100_fifo_new,
+};
+
  static int
  nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size,
                        struct nvkm_notify *notify)
@@ -2708,6 +2737,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
                 case 0x132: device->chip = &nv132_chipset; break;
                 case 0x134: device->chip = &nv134_chipset; break;
                 case 0x136: device->chip = &nv136_chipset; break;
+               case 0x137: device->chip = &nv137_chipset; break;
                 default:
                         nvdev_error(device, "unknown chipset (%08x)\n", boot0);
                         goto done;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c

index 003ac915eaadad44c5b0ad2c1bbab9d18377596e..8a8895246d26a23be65eb6fb29cbb001f71f728b 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
@@ -198,7 +198,7 @@ nv31_mpeg_intr(struct nvkm_engine *engine)
                 }
  
                 if (type == 0x00000010) {
-                       if (!nv31_mpeg_mthd(mpeg, mthd, data))
+                       if (nv31_mpeg_mthd(mpeg, mthd, data))
                                 show &= ~0x01000000;
                 }
         }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c

index e536f37e24b0c75fbf7882eccae37c5b1d0aeae8..c3cf02ed468ea1ccf6212a5e9c42dbee00fa612e 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
@@ -172,7 +172,7 @@ nv44_mpeg_intr(struct nvkm_engine *engine)
                 }
  
                 if (type == 0x00000010) {
-                       if (!nv44_mpeg_mthd(subdev->device, mthd, data))
+                       if (nv44_mpeg_mthd(subdev->device, mthd, data))
                                 show &= ~0x01000000;
                 }
         }
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c

index af267c35d813cc7548f060ef5771d6cd4232b4c9..ee5883f59be5a1992c6bdd20c751285079f5d3c1 100644 (file)
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -147,9 +147,6 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer,
         struct drm_gem_object *obj = buffer->priv;
         int ret = 0;
  
-       if (WARN_ON(!obj->filp))
-               return -EINVAL;
-
         ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma);
         if (ret < 0)
                 return ret;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c

index 684f1703aa5c7189bc55ca1633aeafd78a97c55a..aaa3e80fecb425164a3e5bcf799081aa7d2cf049 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
                         rbo->placement.num_busy_placement = 0;
                         for (i = 0; i < rbo->placement.num_placement; i++) {
                                 if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
-                                       if (rbo->placements[0].fpfn < fpfn)
-                                               rbo->placements[0].fpfn = fpfn;
+                                       if (rbo->placements[i].fpfn < fpfn)
+                                               rbo->placements[i].fpfn = fpfn;
                                 } else {
                                         rbo->placement.busy_placement =
                                                 &rbo->placements[i];
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c

index d12b8978142f69b52e19a159f9a628080f7a18e5..c7af9fdd20c729184654222dfc8b4bc6a870c066 100644 (file)
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2984,6 +2984,16 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
                     (rdev->pdev->device == 0x6667)) {
                         max_sclk = 75000;
                 }
+       } else if (rdev->family == CHIP_OLAND) {
+               if ((rdev->pdev->revision == 0xC7) ||
+                   (rdev->pdev->revision == 0x80) ||
+                   (rdev->pdev->revision == 0x81) ||
+                   (rdev->pdev->revision == 0x83) ||
+                   (rdev->pdev->revision == 0x87) ||
+                   (rdev->pdev->device == 0x6604) ||
+                   (rdev->pdev->device == 0x6605)) {
+                       max_sclk = 75000;
+               }
         }
  
         if (rps->vce_active) {
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c

index b5bfbe50bd87167a7b28f528a74a034b1f68a738..b0ff304ce3dc4a9ac18f359a73498efede18cbd4 100644 (file)
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -32,6 +32,10 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc)
  {
         const struct drm_display_mode *mode = &crtc->crtc.state->adjusted_mode;
         struct rcar_du_device *rcdu = crtc->group->dev;
+       struct vsp1_du_lif_config cfg = {
+               .width = mode->hdisplay,
+               .height = mode->vdisplay,
+       };
         struct rcar_du_plane_state state = {
                 .state = {
                         .crtc = &crtc->crtc,
@@ -66,12 +70,12 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc)
          */
         crtc->group->need_restart = true;
  
-       vsp1_du_setup_lif(crtc->vsp->vsp, mode->hdisplay, mode->vdisplay);
+       vsp1_du_setup_lif(crtc->vsp->vsp, &cfg);
  }
  
  void rcar_du_vsp_disable(struct rcar_du_crtc *crtc)
  {
-       vsp1_du_setup_lif(crtc->vsp->vsp, 0, 0);
+       vsp1_du_setup_lif(crtc->vsp->vsp, NULL);
  }
  
  void rcar_du_vsp_atomic_begin(struct rcar_du_crtc *crtc)
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c

index f80bf9385e412db766424bf00cacd76458a64a8e..d745e8b50fb86458d09e400f5c35c9d257f4de2b 100644 (file)
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -464,6 +464,7 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc)
  {
         struct drm_device *dev = crtc->dev;
         struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
+       unsigned long flags;
  
         WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
         mutex_lock(&tilcdc_crtc->enable_lock);
@@ -484,7 +485,17 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc)
         tilcdc_write_mask(dev, LCDC_RASTER_CTRL_REG,
                           LCDC_PALETTE_LOAD_MODE(DATA_ONLY),
                           LCDC_PALETTE_LOAD_MODE_MASK);
+
+       /* There is no real chance for a race here as the time stamp
+        * is taken before the raster DMA is started. The spin-lock is
+        * taken to have a memory barrier after taking the time-stamp
+        * and to avoid a context switch between taking the stamp and
+        * enabling the raster.
+        */
+       spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+       tilcdc_crtc->last_vblank = ktime_get();
         tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE);
+       spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
  
         drm_crtc_vblank_on(crtc);
  
@@ -539,7 +550,6 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown)
         }
  
         drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
-       tilcdc_crtc->last_vblank = 0;
  
         tilcdc_crtc->enabled = false;
         mutex_unlock(&tilcdc_crtc->enable_lock);
@@ -602,7 +612,6 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
  {
         struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
         struct drm_device *dev = crtc->dev;
-       unsigned long flags;
  
         WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
  
@@ -614,28 +623,30 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
         drm_framebuffer_reference(fb);
  
         crtc->primary->fb = fb;
+       tilcdc_crtc->event = event;
  
-       spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+       mutex_lock(&tilcdc_crtc->enable_lock);
  
-       if (crtc->hwmode.vrefresh && ktime_to_ns(tilcdc_crtc->last_vblank)) {
+       if (tilcdc_crtc->enabled) {
+               unsigned long flags;
                 ktime_t next_vblank;
                 s64 tdiff;
  
-               next_vblank = ktime_add_us(tilcdc_crtc->last_vblank,
-                       1000000 / crtc->hwmode.vrefresh);
+               spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
  
+               next_vblank = ktime_add_us(tilcdc_crtc->last_vblank,
+                                          1000000 / crtc->hwmode.vrefresh);
                 tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get()));
  
                 if (tdiff < TILCDC_VBLANK_SAFETY_THRESHOLD_US)
                         tilcdc_crtc->next_fb = fb;
-       }
-
-       if (tilcdc_crtc->next_fb != fb)
-               set_scanout(crtc, fb);
+               else
+                       set_scanout(crtc, fb);
  
-       tilcdc_crtc->event = event;
+               spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
+       }
  
-       spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
+       mutex_unlock(&tilcdc_crtc->enable_lock);
  
         return 0;
  }
@@ -1036,5 +1047,5 @@ int tilcdc_crtc_create(struct drm_device *dev)
  
  fail:
         tilcdc_crtc_destroy(crtc);
-       return -ENOMEM;
+       return ret;
  }
diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c

index fdb451e3ec01184a4642e6facd6ddf8e5f0cad47..26a7ad0f478978205be87f9409f309021b610884 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -179,7 +179,7 @@ int ttm_base_object_init(struct ttm_object_file *tfile,
         if (unlikely(ret != 0))
                 goto out_err0;
  
-       ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+       ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false);
         if (unlikely(ret != 0))
                 goto out_err1;
  
@@ -318,7 +318,8 @@ EXPORT_SYMBOL(ttm_ref_object_exists);
  
  int ttm_ref_object_add(struct ttm_object_file *tfile,
                        struct ttm_base_object *base,
-                      enum ttm_ref_type ref_type, bool *existed)
+                      enum ttm_ref_type ref_type, bool *existed,
+                      bool require_existed)
  {
         struct drm_open_hash *ht = &tfile->ref_hash[ref_type];
         struct ttm_ref_object *ref;
@@ -345,6 +346,9 @@ int ttm_ref_object_add(struct ttm_object_file *tfile,
                 }
  
                 rcu_read_unlock();
+               if (require_existed)
+                       return -EPERM;
+
                 ret = ttm_mem_global_alloc(mem_glob, sizeof(*ref),
                                            false, false);
                 if (unlikely(ret != 0))
@@ -449,10 +453,10 @@ void ttm_object_file_release(struct ttm_object_file **p_tfile)
                 ttm_ref_object_release(&ref->kref);
         }
  
+       spin_unlock(&tfile->lock);
         for (i = 0; i < TTM_REF_NUM; ++i)
                 drm_ht_remove(&tfile->ref_hash[i]);
  
-       spin_unlock(&tfile->lock);
         ttm_object_file_unref(&tfile);
  }
  EXPORT_SYMBOL(ttm_object_file_release);
@@ -529,9 +533,7 @@ void ttm_object_device_release(struct ttm_object_device **p_tdev)
  
         *p_tdev = NULL;
  
-       spin_lock(&tdev->object_lock);
         drm_ht_remove(&tdev->object_hash);
-       spin_unlock(&tdev->object_lock);
  
         kfree(tdev);
  }
@@ -635,7 +637,7 @@ int ttm_prime_fd_to_handle(struct ttm_object_file *tfile,
         prime = (struct ttm_prime_object *) dma_buf->priv;
         base = &prime->base;
         *handle = base->hash.key;
-       ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+       ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false);
  
         dma_buf_put(dma_buf);
  
diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c

index 917dcb978c2ccc921c1dfcf90329973ff3044b59..0c87b1ac6b68f0d41cfd01851a14b9a092455f4f 100644 (file)
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -14,6 +14,7 @@
  #include <linux/slab.h>
  #include <linux/fb.h>
  #include <linux/prefetch.h>
+#include <asm/unaligned.h>
  
  #include <drm/drmP.h>
  #include "udl_drv.h"
@@ -163,7 +164,7 @@ static void udl_compress_hline16(
                         const u8 *const start = pixel;
                         const uint16_t repeating_pixel_val16 = pixel_val16;
  
-                       *(uint16_t *)cmd = cpu_to_be16(pixel_val16);
+                       put_unaligned_be16(pixel_val16, cmd);
  
                         cmd += 2;
                         pixel += bpp;
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c

index 0c06844af4455d6319e83c183fbbd594e61cd678..9fcf05ca492b0c065a323f69be32ed3cc93b630f 100644 (file)
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -846,6 +846,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
         drm_atomic_helper_crtc_destroy_state(crtc, state);
  }
  
+static void
+vc4_crtc_reset(struct drm_crtc *crtc)
+{
+       if (crtc->state)
+               __drm_atomic_helper_crtc_destroy_state(crtc->state);
+
+       crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
+       if (crtc->state)
+               crtc->state->crtc = crtc;
+}
+
  static const struct drm_crtc_funcs vc4_crtc_funcs = {
         .set_config = drm_atomic_helper_set_config,
         .destroy = vc4_crtc_destroy,
@@ -853,7 +864,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = {
         .set_property = NULL,
         .cursor_set = NULL, /* handled by drm_mode_cursor_universal */
         .cursor_move = NULL, /* handled by drm_mode_cursor_universal */
-       .reset = drm_atomic_helper_crtc_reset,
+       .reset = vc4_crtc_reset,
         .atomic_duplicate_state = vc4_crtc_duplicate_state,
         .atomic_destroy_state = vc4_crtc_destroy_state,
         .gamma_set = vc4_crtc_gamma_set,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c

index 6541dd8b82dc0747433403b64795843a29e0544c..6b2708b4eafe84c832d41a10f75c2be9e65fc0b9 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -538,7 +538,7 @@ int vmw_fence_create(struct vmw_fence_manager *fman,
                      struct vmw_fence_obj **p_fence)
  {
         struct vmw_fence_obj *fence;
-       int ret;
+       int ret;
  
         fence = kzalloc(sizeof(*fence), GFP_KERNEL);
         if (unlikely(fence == NULL))
@@ -701,6 +701,41 @@ void vmw_fence_fifo_up(struct vmw_fence_manager *fman)
  }
  
  
+/**
+ * vmw_fence_obj_lookup - Look up a user-space fence object
+ *
+ * @tfile: A struct ttm_object_file identifying the caller.
+ * @handle: A handle identifying the fence object.
+ * @return: A struct vmw_user_fence base ttm object on success or
+ * an error pointer on failure.
+ *
+ * The fence object is looked up and type-checked. The caller needs
+ * to have opened the fence object first, but since that happens on
+ * creation and fence objects aren't shareable, that's not an
+ * issue currently.
+ */
+static struct ttm_base_object *
+vmw_fence_obj_lookup(struct ttm_object_file *tfile, u32 handle)
+{
+       struct ttm_base_object *base = ttm_base_object_lookup(tfile, handle);
+
+       if (!base) {
+               pr_err("Invalid fence object handle 0x%08lx.\n",
+                      (unsigned long)handle);
+               return ERR_PTR(-EINVAL);
+       }
+
+       if (base->refcount_release != vmw_user_fence_base_release) {
+               pr_err("Invalid fence object handle 0x%08lx.\n",
+                      (unsigned long)handle);
+               ttm_base_object_unref(&base);
+               return ERR_PTR(-EINVAL);
+       }
+
+       return base;
+}
+
+
  int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
                              struct drm_file *file_priv)
  {
@@ -726,13 +761,9 @@ int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
                 arg->kernel_cookie = jiffies + wait_timeout;
         }
  
-       base = ttm_base_object_lookup(tfile, arg->handle);
-       if (unlikely(base == NULL)) {
-               printk(KERN_ERR "Wait invalid fence object handle "
-                      "0x%08lx.\n",
-                      (unsigned long)arg->handle);
-               return -EINVAL;
-       }
+       base = vmw_fence_obj_lookup(tfile, arg->handle);
+       if (IS_ERR(base))
+               return PTR_ERR(base);
  
         fence = &(container_of(base, struct vmw_user_fence, base)->fence);
  
@@ -771,13 +802,9 @@ int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
         struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
         struct vmw_private *dev_priv = vmw_priv(dev);
  
-       base = ttm_base_object_lookup(tfile, arg->handle);
-       if (unlikely(base == NULL)) {
-               printk(KERN_ERR "Fence signaled invalid fence object handle "
-                      "0x%08lx.\n",
-                      (unsigned long)arg->handle);
-               return -EINVAL;
-       }
+       base = vmw_fence_obj_lookup(tfile, arg->handle);
+       if (IS_ERR(base))
+               return PTR_ERR(base);
  
         fence = &(container_of(base, struct vmw_user_fence, base)->fence);
         fman = fman_from_fence(fence);
@@ -1024,6 +1051,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
                 (struct drm_vmw_fence_event_arg *) data;
         struct vmw_fence_obj *fence = NULL;
         struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
+       struct ttm_object_file *tfile = vmw_fp->tfile;
         struct drm_vmw_fence_rep __user *user_fence_rep =
                 (struct drm_vmw_fence_rep __user *)(unsigned long)
                 arg->fence_rep;
@@ -1037,24 +1065,18 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
          */
         if (arg->handle) {
                 struct ttm_base_object *base =
-                       ttm_base_object_lookup_for_ref(dev_priv->tdev,
-                                                      arg->handle);
-
-               if (unlikely(base == NULL)) {
-                       DRM_ERROR("Fence event invalid fence object handle "
-                                 "0x%08lx.\n",
-                                 (unsigned long)arg->handle);
-                       return -EINVAL;
-               }
+                       vmw_fence_obj_lookup(tfile, arg->handle);
+
+               if (IS_ERR(base))
+                       return PTR_ERR(base);
+
                 fence = &(container_of(base, struct vmw_user_fence,
                                        base)->fence);
                 (void) vmw_fence_obj_reference(fence);
  
                 if (user_fence_rep != NULL) {
-                       bool existed;
-
                         ret = ttm_ref_object_add(vmw_fp->tfile, base,
-                                                TTM_REF_USAGE, &existed);
+                                                TTM_REF_USAGE, NULL, false);
                         if (unlikely(ret != 0)) {
                                 DRM_ERROR("Failed to reference a fence "
                                           "object.\n");
@@ -1097,8 +1119,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
         return 0;
  out_no_create:
         if (user_fence_rep != NULL)
-               ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
-                                         handle, TTM_REF_USAGE);
+               ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE);
  out_no_ref_obj:
         vmw_fence_obj_unreference(&fence);
         return ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c

index b8c6a03c8c54df15def2d359ee5253f213f1816e..5ec24fd801cd2bb1b32b66540b91edcca2ef3b6d 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -114,8 +114,6 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
                 param->value = dev_priv->has_dx;
                 break;
         default:
-               DRM_ERROR("Illegal vmwgfx get param request: %d\n",
-                         param->param);
                 return -EINVAL;
         }
  
@@ -186,7 +184,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
         bool gb_objects = !!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS);
         struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
  
-       if (unlikely(arg->pad64 != 0)) {
+       if (unlikely(arg->pad64 != 0 || arg->max_size == 0)) {
                 DRM_ERROR("Illegal GET_3D_CAP argument.\n");
                 return -EINVAL;
         }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c

index 65b3f0369636710eda49086f72e250478dfe288d..bf23153d4f55515b54c66f5a250b8f04aa7f5051 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -589,7 +589,7 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo,
                 return ret;
  
         ret = ttm_ref_object_add(tfile, &user_bo->prime.base,
-                                TTM_REF_SYNCCPU_WRITE, &existed);
+                                TTM_REF_SYNCCPU_WRITE, &existed, false);
         if (ret != 0 || existed)
                 ttm_bo_synccpu_write_release(&user_bo->dma.base);
  
@@ -773,7 +773,7 @@ int vmw_user_dmabuf_reference(struct ttm_object_file *tfile,
  
         *handle = user_bo->prime.base.hash.key;
         return ttm_ref_object_add(tfile, &user_bo->prime.base,
-                                 TTM_REF_USAGE, NULL);
+                                 TTM_REF_USAGE, NULL, false);
  }
  
  /*
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c

index b445ce9b9757861ecc1ece1071f1c8c3a02a166f..05fa092c942beedb209b25777971a0c196d19d85 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -713,11 +713,14 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
                         128;
  
         num_sizes = 0;
-       for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+       for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+               if (req->mip_levels[i] > DRM_VMW_MAX_MIP_LEVELS)
+                       return -EINVAL;
                 num_sizes += req->mip_levels[i];
+       }
  
-       if (num_sizes > DRM_VMW_MAX_SURFACE_FACES *
-           DRM_VMW_MAX_MIP_LEVELS)
+       if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * DRM_VMW_MAX_MIP_LEVELS ||
+           num_sizes == 0)
                 return -EINVAL;
  
         size = vmw_user_surface_size + 128 +
@@ -891,17 +894,16 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
         uint32_t handle;
         struct ttm_base_object *base;
         int ret;
+       bool require_exist = false;
  
         if (handle_type == DRM_VMW_HANDLE_PRIME) {
                 ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle);
                 if (unlikely(ret != 0))
                         return ret;
         } else {
-               if (unlikely(drm_is_render_client(file_priv))) {
-                       DRM_ERROR("Render client refused legacy "
-                                 "surface reference.\n");
-                       return -EACCES;
-               }
+               if (unlikely(drm_is_render_client(file_priv)))
+                       require_exist = true;
+
                 if (ACCESS_ONCE(vmw_fpriv(file_priv)->locked_master)) {
                         DRM_ERROR("Locked master refused legacy "
                                   "surface reference.\n");
@@ -929,17 +931,14 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
  
                 /*
                  * Make sure the surface creator has the same
-                * authenticating master.
+                * authenticating master, or is already registered with us.
                  */
                 if (drm_is_primary_client(file_priv) &&
-                   user_srf->master != file_priv->master) {
-                       DRM_ERROR("Trying to reference surface outside of"
-                                 " master domain.\n");
-                       ret = -EACCES;
-                       goto out_bad_resource;
-               }
+                   user_srf->master != file_priv->master)
+                       require_exist = true;
  
-               ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+               ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL,
+                                        require_exist);
                 if (unlikely(ret != 0)) {
                         DRM_ERROR("Could not add a reference to a surface.\n");
                         goto out_bad_resource;
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig

index 1aeb80e5242461830f1d4075f0fb59bcb6ddc898..8c54cb8f5d6d1013ec1f4a39e8f88fcfe3333758 100644 (file)
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -175,11 +175,11 @@ config HID_CHERRY
         Support for Cherry Cymotion keyboard.
  
  config HID_CHICONY
-       tristate "Chicony Tactical pad"
+       tristate "Chicony devices"
         depends on HID
         default !EXPERT
         ---help---
-       Support for Chicony Tactical pad.
+       Support for Chicony Tactical pad and special keys on Chicony keyboards.
  
  config HID_CORSAIR
         tristate "Corsair devices"
@@ -190,6 +190,7 @@ config HID_CORSAIR
  
         Supported devices:
         - Vengeance K90
+       - Scimitar PRO RGB
  
  config HID_PRODIKEYS
         tristate "Prodikeys PC-MIDI Keyboard support"
diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c

index bc3cec199feefdf437d0c0141c5ff6f73aa10308..f04ed9aabc3f9fea0baf5b074acd83b6d07527c6 100644 (file)
--- a/drivers/hid/hid-chicony.c
+++ b/drivers/hid/hid-chicony.c
@@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = {
         { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
         { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
         { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
         { }
  };
  MODULE_DEVICE_TABLE(hid, ch_devices);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c

index e9e87d337446918f672771551f41041755d83d22..d162f0dc76e3f44e2134eafa5509137ddf0cc411 100644 (file)
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -819,8 +819,7 @@ static int hid_scan_report(struct hid_device *hid)
                 hid->group = HID_GROUP_WACOM;
                 break;
         case USB_VENDOR_ID_SYNAPTICS:
-               if (hid->group == HID_GROUP_GENERIC ||
-                   hid->group == HID_GROUP_MULTITOUCH_WIN_8)
+               if (hid->group == HID_GROUP_GENERIC)
                         if ((parser->scan_flags & HID_SCAN_FLAG_VENDOR_SPECIFIC)
                             && (parser->scan_flags & HID_SCAN_FLAG_GD_POINTER))
                                 /*
@@ -1870,6 +1869,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
         { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
         { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
         { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
         { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
         { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
         { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
@@ -1910,6 +1910,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
         { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
         { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
         { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
         { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
         { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
         { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
@@ -2094,6 +2095,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
         { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) },
         { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) },
         { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) },
         { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
         { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) },
         { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) },
@@ -2110,6 +2112,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
         { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) },
         { HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
         { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
         { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
         { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
         { HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) },
diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c

index c0303f61c26a94f1998f6883d42a0fc8cb41f432..9ba5d98a118042a52dc40b895c3b2e8df67c0b39 100644 (file)
--- a/drivers/hid/hid-corsair.c
+++ b/drivers/hid/hid-corsair.c
@@ -3,8 +3,10 @@
   *
   * Supported devices:
   *  - Vengeance K90 Keyboard
+ *  - Scimitar PRO RGB Gaming Mouse
   *
   * Copyright (c) 2015 Clement Vuchener
+ * Copyright (c) 2017 Oscar Campos
   */
  
  /*
@@ -670,10 +672,51 @@ static int corsair_input_mapping(struct hid_device *dev,
         return 0;
  }
  
+/*
+ * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is
+ * non parseable as they define two consecutive Logical Minimum for
+ * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16
+ * that should be obviousy 0x26 for Logical Magimum of 16 bits. This
+ * prevents poper parsing of the report descriptor due Logical
+ * Minimum being larger than Logical Maximum.
+ *
+ * This driver fixes the report descriptor for:
+ * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse
+ */
+
+static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+        unsigned int *rsize)
+{
+       struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+
+       if (intf->cur_altsetting->desc.bInterfaceNumber == 1) {
+               /*
+                * Corsair Scimitar RGB Pro report descriptor is broken and
+                * defines two different Logical Minimum for the Consumer
+                * Application. The byte 77 should be a 0x26 defining a 16
+                * bits integer for the Logical Maximum but it is a 0x16
+                * instead (Logical Minimum)
+                */
+               switch (hdev->product) {
+               case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB:
+                       if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16
+                       && rdesc[78] == 0xff && rdesc[79] == 0x0f) {
+                               hid_info(hdev, "Fixing up report descriptor\n");
+                               rdesc[77] = 0x26;
+                       }
+                       break;
+               }
+
+       }
+       return rdesc;
+}
+
  static const struct hid_device_id corsair_devices[] = {
         { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90),
                 .driver_data = CORSAIR_USE_K90_MACRO |
                                CORSAIR_USE_K90_BACKLIGHT },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR,
+            USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
         {}
  };
  
@@ -686,10 +729,14 @@ static struct hid_driver corsair_driver = {
         .event = corsair_event,
         .remove = corsair_remove,
         .input_mapping = corsair_input_mapping,
+       .report_fixup = corsair_mouse_report_fixup,
  };
  
  module_hid_driver(corsair_driver);
  
  MODULE_LICENSE("GPL");
+/* Original K90 driver author */
  MODULE_AUTHOR("Clement Vuchener");
+/* Scimitar PRO RGB driver author */
+MODULE_AUTHOR("Oscar Campos");
  MODULE_DESCRIPTION("HID driver for Corsair devices");
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h

index 86c95d30ac801f2895caef97a575955289d352a4..b26c030926c188aff2dd054cd2ae9e9910837887 100644 (file)
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -278,6 +278,9 @@
  #define USB_DEVICE_ID_CORSAIR_K70RGB    0x1b13
  #define USB_DEVICE_ID_CORSAIR_STRAFE    0x1b15
  #define USB_DEVICE_ID_CORSAIR_K65RGB    0x1b17
+#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE  0x1b38
+#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE  0x1b39
+#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB  0x1b3e
  
  #define USB_VENDOR_ID_CREATIVELABS     0x041e
  #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51     0x322c
@@ -557,6 +560,7 @@
  
  #define USB_VENDOR_ID_JESS             0x0c45
  #define USB_DEVICE_ID_JESS_YUREX       0x1010
+#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112
  
  #define USB_VENDOR_ID_JESS2            0x0f30
  #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111
@@ -1024,6 +1028,9 @@
  #define USB_DEVICE_ID_UGEE_TABLET_45           0x0045
  #define USB_DEVICE_ID_YIYNOVA_TABLET           0x004d
  
+#define USB_VENDOR_ID_UGEE             0x28bd
+#define USB_DEVICE_ID_UGEE_TABLET_EX07S                0x0071
+
  #define USB_VENDOR_ID_UNITEC   0x227d
  #define USB_DEVICE_ID_UNITEC_USB_TOUCH_0709    0x0709
  #define USB_DEVICE_ID_UNITEC_USB_TOUCH_0A19    0x0a19
@@ -1078,6 +1085,7 @@
  
  #define USB_VENDOR_ID_XIN_MO                   0x16c0
  #define USB_DEVICE_ID_XIN_MO_DUAL_ARCADE       0x05e1
+#define USB_DEVICE_ID_THT_2P_ARCADE            0x75e1
  
  #define USB_VENDOR_ID_XIROKU           0x1477
  #define USB_DEVICE_ID_XIROKU_SPX       0x1006
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c

index f405b07d03816506215bd19fe3c878393370484a..740996f9bdd49dde3d26659f68d2addbff291c60 100644 (file)
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -2632,6 +2632,8 @@ err_stop:
                 sony_leds_remove(sc);
         if (sc->quirks & SONY_BATTERY_SUPPORT)
                 sony_battery_remove(sc);
+       if (sc->touchpad)
+               sony_unregister_touchpad(sc);
         sony_cancel_work_sync(sc);
         kfree(sc->output_report_dmabuf);
         sony_remove_dev_list(sc);
diff --git a/drivers/hid/hid-uclogic.c b/drivers/hid/hid-uclogic.c

index 1509d7287ff3e60e79c845214e47e901fae650f8..e3e6e5c893cc05e0c934c8c9f505de9fdd06e26a 100644 (file)
--- a/drivers/hid/hid-uclogic.c
+++ b/drivers/hid/hid-uclogic.c
@@ -977,6 +977,7 @@ static int uclogic_probe(struct hid_device *hdev,
                 }
                 break;
         case USB_DEVICE_ID_UGTIZER_TABLET_GP0610:
+       case USB_DEVICE_ID_UGEE_TABLET_EX07S:
                 /* If this is the pen interface */
                 if (intf->cur_altsetting->desc.bInterfaceNumber == 1) {
                         rc = uclogic_tablet_enable(hdev);
@@ -1069,6 +1070,7 @@ static const struct hid_device_id uclogic_devices[] = {
         { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) },
         { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) },
         { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) },
         { }
  };
  MODULE_DEVICE_TABLE(hid, uclogic_devices);
diff --git a/drivers/hid/hid-xinmo.c b/drivers/hid/hid-xinmo.c

index 7df5227a7e61d6ff79acd62cb009e29ffa4b79d8..9ad7731d2e10dad45268b55fbcf9dfc025570682 100644 (file)
--- a/drivers/hid/hid-xinmo.c
+++ b/drivers/hid/hid-xinmo.c
@@ -46,6 +46,7 @@ static int xinmo_event(struct hid_device *hdev, struct hid_field *field,
  
  static const struct hid_device_id xinmo_devices[] = {
         { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
         { }
  };
  
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c

index d6847a664446529831395a962aacab7cb49ab8f5..a69a3c88ab29f5fd736ad18a358fc185f63be99c 100644 (file)
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -80,6 +80,9 @@ static const struct hid_blacklist {
         { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS },
         { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS },
         { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+       { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+       { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+       { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
         { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
         { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
         { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c

index be8f7e2a026f428f51200e395792dd715a612eeb..e2666ef84dc1ca479646fd1211f45341704755a8 100644 (file)
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2165,6 +2165,14 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
  
         wacom_update_name(wacom, wireless ? " (WL)" : "");
  
+       /* pen only Bamboo neither support touch nor pad */
+       if ((features->type == BAMBOO_PEN) &&
+           ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
+           (features->device_type & WACOM_DEVICETYPE_PAD))) {
+               error = -ENODEV;
+               goto fail;
+       }
+
         error = wacom_add_shared_data(hdev);
         if (error)
                 goto fail;
@@ -2208,14 +2216,8 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
         /* touch only Bamboo doesn't support pen */
         if ((features->type == BAMBOO_TOUCH) &&
             (features->device_type & WACOM_DEVICETYPE_PEN)) {
-               error = -ENODEV;
-               goto fail_quirks;
-       }
-
-       /* pen only Bamboo neither support touch nor pad */
-       if ((features->type == BAMBOO_PEN) &&
-           ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
-           (features->device_type & WACOM_DEVICETYPE_PAD))) {
+               cancel_delayed_work_sync(&wacom->init_work);
+               _wacom_query_tablet_data(wacom);
                 error = -ENODEV;
                 goto fail_quirks;
         }
@@ -2579,7 +2581,9 @@ static void wacom_remove(struct hid_device *hdev)
  
         /* make sure we don't trigger the LEDs */
         wacom_led_groups_release(wacom);
-       wacom_release_resources(wacom);
+
+       if (wacom->wacom_wac.features.type != REMOTE)
+               wacom_release_resources(wacom);
  
         hid_set_drvdata(hdev, NULL);
  }
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c

index 4aa3de9f1163b30eb64b4304f285a4167aef0cf0..c68ac65db7ffec361169c326ede5bbf263a00b1b 100644 (file)
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1959,8 +1959,10 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
                 input_set_capability(input, EV_KEY, BTN_TOOL_BRUSH);
                 input_set_capability(input, EV_KEY, BTN_TOOL_PENCIL);
                 input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH);
-               input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE);
-               input_set_capability(input, EV_KEY, BTN_TOOL_LENS);
+               if (!(features->device_type & WACOM_DEVICETYPE_DIRECT)) {
+                       input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE);
+                       input_set_capability(input, EV_KEY, BTN_TOOL_LENS);
+               }
                 break;
         case WACOM_HID_WD_FINGERWHEEL:
                 wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0);
@@ -2004,7 +2006,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
                 return;
         case HID_DG_TOOLSERIALNUMBER:
                 wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL);
-               wacom_wac->serial[0] |= value;
+               wacom_wac->serial[0] |= (__u32)value;
                 return;
         case WACOM_HID_WD_SENSE:
                 wacom_wac->hid_data.sense_state = value;
@@ -2174,6 +2176,16 @@ static void wacom_wac_finger_usage_mapping(struct hid_device *hdev,
                 wacom_wac->hid_data.cc_index = field->index;
                 wacom_wac->hid_data.cc_value_index = usage->usage_index;
                 break;
+       case HID_DG_CONTACTID:
+               if ((field->logical_maximum - field->logical_minimum) < touch_max) {
+                       /*
+                        * The HID descriptor for G11 sensors leaves logical
+                        * maximum set to '1' despite it being a multitouch
+                        * device. Override to a sensible number.
+                        */
+                       field->logical_maximum = 255;
+               }
+               break;
         }
  }
  
@@ -4197,10 +4209,10 @@ static const struct wacom_features wacom_features_0x343 =
           WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
  static const struct wacom_features wacom_features_0x360 =
         { "Wacom Intuos Pro M", 44800, 29600, 8191, 63,
-         INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 };
+         INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
  static const struct wacom_features wacom_features_0x361 =
         { "Wacom Intuos Pro L", 62200, 43200, 8191, 63,
-         INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 };
+         INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
  
  static const struct wacom_features wacom_features_HID_ANY_ID =
         { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID };
diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c

index 7ef819680acda13077f773c5fe65f12f7349cbb4..26b05106f0d3152e74bc0377dc9006203fc2c7db 100644 (file)
--- a/drivers/hsi/clients/ssi_protocol.c
+++ b/drivers/hsi/clients/ssi_protocol.c
@@ -980,7 +980,7 @@ static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev)
                 goto drop;
         /* Pad to 32-bits - FIXME: Revisit*/
         if ((skb->len & 3) && skb_pad(skb, 4 - (skb->len & 3)))
-               goto drop;
+               goto inc_dropped;
  
         /*
          * Modem sends Phonet messages over SSI with its own endianess...
@@ -1032,8 +1032,9 @@ static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev)
  drop2:
         hsi_free_msg(msg);
  drop:
-       dev->stats.tx_dropped++;
         dev_kfree_skb(skb);
+inc_dropped:
+       dev->stats.tx_dropped++;
  
         return 0;
  }
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c

index 81a80c82f1bd2b6a55df393a3df55376d709adfd..321b8833fa6f35a5a1941d319816ed2ffc83a6d2 100644 (file)
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -502,12 +502,15 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
  
         wait_for_completion(&info->waitevent);
  
-       if (channel->rescind) {
-               ret = -ENODEV;
-               goto post_msg_err;
-       }
-
  post_msg_err:
+       /*
+        * If the channel has been rescinded;
+        * we will be awakened by the rescind
+        * handler; set the error code to zero so we don't leak memory.
+        */
+       if (channel->rescind)
+               ret = 0;
+
         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
         list_del(&info->msglistentry);
         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
@@ -530,20 +533,18 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
         int ret;
  
         /*
-        * vmbus_on_event(), running in the tasklet, can race
+        * vmbus_on_event(), running in the per-channel tasklet, can race
          * with vmbus_close_internal() in the case of SMP guest, e.g., when
          * the former is accessing channel->inbound.ring_buffer, the latter
-        * could be freeing the ring_buffer pages.
-        *
-        * To resolve the race, we can serialize them by disabling the
-        * tasklet when the latter is running here.
+        * could be freeing the ring_buffer pages, so here we must stop it
+        * first.
          */
-       hv_event_tasklet_disable(channel);
+       tasklet_disable(&channel->callback_event);
  
         /*
          * In case a device driver's probe() fails (e.g.,
          * util_probe() -> vmbus_open() returns -ENOMEM) and the device is
-        * rescinded later (e.g., we dynamically disble an Integrated Service
+        * rescinded later (e.g., we dynamically disable an Integrated Service
          * in Hyper-V Manager), the driver's remove() invokes vmbus_close():
          * here we should skip most of the below cleanup work.
          */
@@ -605,8 +606,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
                 get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
  
  out:
-       hv_event_tasklet_enable(channel);
-
         return ret;
  }
  
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c

index f33465d78a025680d7515978fdbcc432d51a8062..fbcb063523082840b61bf23d7852f36da4682647 100644 (file)
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -350,7 +350,8 @@ static struct vmbus_channel *alloc_channel(void)
  static void free_channel(struct vmbus_channel *channel)
  {
         tasklet_kill(&channel->callback_event);
-       kfree(channel);
+
+       kfree_rcu(channel, rcu);
  }
  
  static void percpu_channel_enq(void *arg)
@@ -359,14 +360,14 @@ static void percpu_channel_enq(void *arg)
         struct hv_per_cpu_context *hv_cpu
                 = this_cpu_ptr(hv_context.cpu_context);
  
-       list_add_tail(&channel->percpu_list, &hv_cpu->chan_list);
+       list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list);
  }
  
  static void percpu_channel_deq(void *arg)
  {
         struct vmbus_channel *channel = arg;
  
-       list_del(&channel->percpu_list);
+       list_del_rcu(&channel->percpu_list);
  }
  
  
@@ -381,19 +382,6 @@ static void vmbus_release_relid(u32 relid)
                        true);
  }
  
-void hv_event_tasklet_disable(struct vmbus_channel *channel)
-{
-       tasklet_disable(&channel->callback_event);
-}
-
-void hv_event_tasklet_enable(struct vmbus_channel *channel)
-{
-       tasklet_enable(&channel->callback_event);
-
-       /* In case there is any pending event */
-       tasklet_schedule(&channel->callback_event);
-}
-
  void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
  {
         unsigned long flags;
@@ -402,7 +390,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
         BUG_ON(!channel->rescind);
         BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
  
-       hv_event_tasklet_disable(channel);
         if (channel->target_cpu != get_cpu()) {
                 put_cpu();
                 smp_call_function_single(channel->target_cpu,
@@ -411,7 +398,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
                 percpu_channel_deq(channel);
                 put_cpu();
         }
-       hv_event_tasklet_enable(channel);
  
         if (channel->primary_channel == NULL) {
                 list_del(&channel->listentry);
@@ -505,7 +491,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
  
         init_vp_index(newchannel, dev_type);
  
-       hv_event_tasklet_disable(newchannel);
         if (newchannel->target_cpu != get_cpu()) {
                 put_cpu();
                 smp_call_function_single(newchannel->target_cpu,
@@ -515,7 +500,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
                 percpu_channel_enq(newchannel);
                 put_cpu();
         }
-       hv_event_tasklet_enable(newchannel);
  
         /*
          * This state is used to indicate a successful open
@@ -565,7 +549,6 @@ err_deq_chan:
         list_del(&newchannel->listentry);
         mutex_unlock(&vmbus_connection.channel_mutex);
  
-       hv_event_tasklet_disable(newchannel);
         if (newchannel->target_cpu != get_cpu()) {
                 put_cpu();
                 smp_call_function_single(newchannel->target_cpu,
@@ -574,7 +557,6 @@ err_deq_chan:
                 percpu_channel_deq(newchannel);
                 put_cpu();
         }
-       hv_event_tasklet_enable(newchannel);
  
         vmbus_release_relid(newchannel->offermsg.child_relid);
  
@@ -814,6 +796,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
         /* Allocate the channel object and save this offer. */
         newchannel = alloc_channel();
         if (!newchannel) {
+               vmbus_release_relid(offer->child_relid);
                 pr_err("Unable to allocate channel object\n");
                 return;
         }
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c

index 9aee6014339dffc8627d173446e7bfe0dc06247c..a5596a642ed06b31beb5b2e19347e10b9abb0ed2 100644 (file)
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -71,7 +71,6 @@ static DECLARE_WORK(fcopy_send_work, fcopy_send_data);
  static const char fcopy_devname[] = "vmbus/hv_fcopy";
  static u8 *recv_buffer;
  static struct hvutil_transport *hvt;
-static struct completion release_event;
  /*
   * This state maintains the version number registered by the daemon.
   */
@@ -331,7 +330,6 @@ static void fcopy_on_reset(void)
  
         if (cancel_delayed_work_sync(&fcopy_timeout_work))
                 fcopy_respond_to_host(HV_E_FAIL);
-       complete(&release_event);
  }
  
  int hv_fcopy_init(struct hv_util_service *srv)
@@ -339,7 +337,6 @@ int hv_fcopy_init(struct hv_util_service *srv)
         recv_buffer = srv->recv_buffer;
         fcopy_transaction.recv_channel = srv->channel;
  
-       init_completion(&release_event);
         /*
          * When this driver loads, the user level daemon that
          * processes the host requests may not yet be running.
@@ -361,5 +358,4 @@ void hv_fcopy_deinit(void)
         fcopy_transaction.state = HVUTIL_DEVICE_DYING;
         cancel_delayed_work_sync(&fcopy_timeout_work);
         hvutil_transport_destroy(hvt);
-       wait_for_completion(&release_event);
  }
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c

index de263712e247c2b8c47f450f3d1b7eb387f1e073..a1adfe2cfb34244e8f86a48cc1143c0f439be709 100644 (file)
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -101,7 +101,6 @@ static DECLARE_WORK(kvp_sendkey_work, kvp_send_key);
  static const char kvp_devname[] = "vmbus/hv_kvp";
  static u8 *recv_buffer;
  static struct hvutil_transport *hvt;
-static struct completion release_event;
  /*
   * Register the kernel component with the user-level daemon.
   * As part of this registration, pass the LIC version number.
@@ -714,7 +713,6 @@ static void kvp_on_reset(void)
         if (cancel_delayed_work_sync(&kvp_timeout_work))
                 kvp_respond_to_host(NULL, HV_E_FAIL);
         kvp_transaction.state = HVUTIL_DEVICE_INIT;
-       complete(&release_event);
  }
  
  int
@@ -723,7 +721,6 @@ hv_kvp_init(struct hv_util_service *srv)
         recv_buffer = srv->recv_buffer;
         kvp_transaction.recv_channel = srv->channel;
  
-       init_completion(&release_event);
         /*
          * When this driver loads, the user level daemon that
          * processes the host requests may not yet be running.
@@ -747,5 +744,4 @@ void hv_kvp_deinit(void)
         cancel_delayed_work_sync(&kvp_timeout_work);
         cancel_work_sync(&kvp_sendkey_work);
         hvutil_transport_destroy(hvt);
-       wait_for_completion(&release_event);
  }
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c

index bcc03f0748d61cd5a8c9f31447b77fd8b2e7e13b..e659d1b94a5794f11272086374e45b5fc99a3df6 100644 (file)
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -79,7 +79,6 @@ static int dm_reg_value;
  static const char vss_devname[] = "vmbus/hv_vss";
  static __u8 *recv_buffer;
  static struct hvutil_transport *hvt;
-static struct completion release_event;
  
  static void vss_timeout_func(struct work_struct *dummy);
  static void vss_handle_request(struct work_struct *dummy);
@@ -361,13 +360,11 @@ static void vss_on_reset(void)
         if (cancel_delayed_work_sync(&vss_timeout_work))
                 vss_respond_to_host(HV_E_FAIL);
         vss_transaction.state = HVUTIL_DEVICE_INIT;
-       complete(&release_event);
  }
  
  int
  hv_vss_init(struct hv_util_service *srv)
  {
-       init_completion(&release_event);
         if (vmbus_proto_version < VERSION_WIN8_1) {
                 pr_warn("Integration service 'Backup (volume snapshot)'"
                         " not supported on this host version.\n");
@@ -400,5 +397,4 @@ void hv_vss_deinit(void)
         cancel_delayed_work_sync(&vss_timeout_work);
         cancel_work_sync(&vss_handle_request_work);
         hvutil_transport_destroy(hvt);
-       wait_for_completion(&release_event);
  }
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c

index 3042eaa13062bbdfbdba853521b7632d35e619e8..186b10083c552b1e026cc056bd131fe8548b2f03 100644 (file)
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -590,6 +590,8 @@ static int hv_timesync_init(struct hv_util_service *srv)
         if (!hyperv_cs)
                 return -ENODEV;
  
+       spin_lock_init(&host_ts.lock);
+
         INIT_WORK(&wrk.work, hv_set_host_time);
  
         /*
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c

index c235a95152671104cd7042a451b16ccd0d0d06ab..4402a71e23f7f7277c6d1561c4a8db5d46489b8d 100644 (file)
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -182,10 +182,11 @@ static int hvt_op_release(struct inode *inode, struct file *file)
          * connects back.
          */
         hvt_reset(hvt);
-       mutex_unlock(&hvt->lock);
  
         if (mode_old == HVUTIL_TRANSPORT_DESTROY)
-               hvt_transport_free(hvt);
+               complete(&hvt->release);
+
+       mutex_unlock(&hvt->lock);
  
         return 0;
  }
@@ -304,6 +305,7 @@ struct hvutil_transport *hvutil_transport_init(const char *name,
  
         init_waitqueue_head(&hvt->outmsg_q);
         mutex_init(&hvt->lock);
+       init_completion(&hvt->release);
  
         spin_lock(&hvt_list_lock);
         list_add(&hvt->list, &hvt_list);
@@ -351,6 +353,8 @@ void hvutil_transport_destroy(struct hvutil_transport *hvt)
         if (hvt->cn_id.idx > 0 && hvt->cn_id.val > 0)
                 cn_del_callback(&hvt->cn_id);
  
-       if (mode_old != HVUTIL_TRANSPORT_CHARDEV)
-               hvt_transport_free(hvt);
+       if (mode_old == HVUTIL_TRANSPORT_CHARDEV)
+               wait_for_completion(&hvt->release);
+
+       hvt_transport_free(hvt);
  }
diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h

index d98f5225c3e693468fdb27ccc03de8d17e2a8576..79afb626e1668981beaa740b4e7d618f22ab194d 100644 (file)
--- a/drivers/hv/hv_utils_transport.h
+++ b/drivers/hv/hv_utils_transport.h
@@ -41,6 +41,7 @@ struct hvutil_transport {
         int outmsg_len;                     /* its length */
         wait_queue_head_t outmsg_q;         /* poll/read wait queue */
         struct mutex lock;                  /* protects struct members */
+       struct completion release;          /* synchronize with fd release */
  };
  
  struct hvutil_transport *hvutil_transport_init(const char *name,
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c

index da6b59ba594039e6d490e8cc9903f41f99032900..8370b9dc6037c17959abc3d9c45c9b233bd0004f 100644 (file)
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -939,8 +939,10 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
                 if (relid == 0)
                         continue;
  
+               rcu_read_lock();
+
                 /* Find channel based on relid */
-               list_for_each_entry(channel, &hv_cpu->chan_list, percpu_list) {
+               list_for_each_entry_rcu(channel, &hv_cpu->chan_list, percpu_list) {
                         if (channel->offermsg.child_relid != relid)
                                 continue;
  
@@ -956,6 +958,8 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
                                 tasklet_schedule(&channel->callback_event);
                         }
                 }
+
+               rcu_read_unlock();
         }
  }
  
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig

index 0649d53f3d16eeaef7516f830a1a5046a7f3d107..22d5eafd681541374817314a1a2db164bb235ce7 100644 (file)
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -341,6 +341,15 @@ config SENSORS_ASB100
           This driver can also be built as a module.  If so, the module
           will be called asb100.
  
+config SENSORS_ASPEED
+       tristate "ASPEED AST2400/AST2500 PWM and Fan tach driver"
+       help
+         This driver provides support for ASPEED AST2400/AST2500 PWM
+         and Fan Tacho controllers.
+
+         This driver can also be built as a module. If so, the module
+         will be called aspeed_pwm_tacho.
+
  config SENSORS_ATXP1
         tristate "Attansic ATXP1 VID controller"
         depends on I2C
@@ -1643,16 +1652,6 @@ config SENSORS_TMP421
           This driver can also be built as a module.  If so, the module
           will be called tmp421.
  
-config SENSORS_TWL4030_MADC
-       tristate "Texas Instruments TWL4030 MADC Hwmon"
-       depends on TWL4030_MADC
-       help
-       If you say yes here you get hwmon support for triton
-       TWL4030-MADC.
-
-       This driver can also be built as a module. If so it will be called
-       twl4030-madc-hwmon.
-
  config SENSORS_VEXPRESS
         tristate "Versatile Express"
         depends on VEXPRESS_CONFIG
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile

index 5509edf6186acef711284bbde2cbebbc0afdac8f..d4641a9f16c1978514b8cc09e25bbd81f467d75e 100644 (file)
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_SENSORS_ADT7475) += adt7475.o
  obj-$(CONFIG_SENSORS_APPLESMC) += applesmc.o
  obj-$(CONFIG_SENSORS_ARM_SCPI) += scpi-hwmon.o
  obj-$(CONFIG_SENSORS_ASC7621)  += asc7621.o
+obj-$(CONFIG_SENSORS_ASPEED)   += aspeed-pwm-tacho.o
  obj-$(CONFIG_SENSORS_ATXP1)    += atxp1.o
  obj-$(CONFIG_SENSORS_CORETEMP) += coretemp.o
  obj-$(CONFIG_SENSORS_DA9052_ADC)+= da9052-hwmon.o
@@ -157,7 +158,6 @@ obj-$(CONFIG_SENSORS_TMP103)        += tmp103.o
  obj-$(CONFIG_SENSORS_TMP108)   += tmp108.o
  obj-$(CONFIG_SENSORS_TMP401)   += tmp401.o
  obj-$(CONFIG_SENSORS_TMP421)   += tmp421.o
-obj-$(CONFIG_SENSORS_TWL4030_MADC)+= twl4030-madc-hwmon.o
  obj-$(CONFIG_SENSORS_VEXPRESS) += vexpress-hwmon.o
  obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o
  obj-$(CONFIG_SENSORS_VIA686A)  += via686a.o
diff --git a/drivers/hwmon/ad7414.c b/drivers/hwmon/ad7414.c

index 763490acc0df55aba814b1121ff83055d0b12e3d..cec227f138742931193c03b07b441fcf985db488 100644 (file)
--- a/drivers/hwmon/ad7414.c
+++ b/drivers/hwmon/ad7414.c
@@ -217,9 +217,16 @@ static const struct i2c_device_id ad7414_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, ad7414_id);
  
+static const struct of_device_id ad7414_of_match[] = {
+       { .compatible = "ad,ad7414" },
+       { },
+};
+MODULE_DEVICE_TABLE(of, ad7414_of_match);
+
  static struct i2c_driver ad7414_driver = {
         .driver = {
                 .name   = "ad7414",
+               .of_match_table = of_match_ptr(ad7414_of_match),
         },
         .probe  = ad7414_probe,
         .id_table = ad7414_id,
diff --git a/drivers/hwmon/adc128d818.c b/drivers/hwmon/adc128d818.c

index bbe3a5c5b3f51f338661f60f0f6c76390f4489e8..a557b46dbe8ed4ef3af89015b7347469a9d0a101 100644 (file)
--- a/drivers/hwmon/adc128d818.c
+++ b/drivers/hwmon/adc128d818.c
@@ -546,10 +546,17 @@ static const struct i2c_device_id adc128_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, adc128_id);
  
+static const struct of_device_id adc128_of_match[] = {
+       { .compatible = "ti,adc128d818" },
+       { },
+};
+MODULE_DEVICE_TABLE(of, adc128_of_match);
+
  static struct i2c_driver adc128_driver = {
         .class          = I2C_CLASS_HWMON,
         .driver = {
                 .name   = "adc128d818",
+               .of_match_table = of_match_ptr(adc128_of_match),
         },
         .probe          = adc128_probe,
         .remove         = adc128_remove,
diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c

index 2b3105c8aed399f8ce9cd3df03d6a265378131fc..5140c27d16dd033b92bb52bbce08fb896db8c080 100644 (file)
--- a/drivers/hwmon/ads1015.c
+++ b/drivers/hwmon/ads1015.c
@@ -31,6 +31,7 @@
  #include <linux/hwmon-sysfs.h>
  #include <linux/err.h>
  #include <linux/mutex.h>
+#include <linux/of_device.h>
  #include <linux/of.h>
  
  #include <linux/i2c/ads1015.h>
@@ -268,7 +269,12 @@ static int ads1015_probe(struct i2c_client *client,
                             GFP_KERNEL);
         if (!data)
                 return -ENOMEM;
-       data->id = id->driver_data;
+
+       if (client->dev.of_node)
+               data->id = (enum ads1015_chips)
+                       of_device_get_match_data(&client->dev);
+       else
+               data->id = id->driver_data;
         i2c_set_clientdata(client, data);
         mutex_init(&data->update_lock);
  
@@ -303,9 +309,23 @@ static const struct i2c_device_id ads1015_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, ads1015_id);
  
+static const struct of_device_id ads1015_of_match[] = {
+       {
+               .compatible = "ti,ads1015",
+               .data = (void *)ads1015
+       },
+       {
+               .compatible = "ti,ads1115",
+               .data = (void *)ads1115
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, ads1015_of_match);
+
  static struct i2c_driver ads1015_driver = {
         .driver = {
                 .name = "ads1015",
+               .of_match_table = of_match_ptr(ads1015_of_match),
         },
         .probe = ads1015_probe,
         .remove = ads1015_remove,
diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c

index ee396ff167d9d1c2070eeb254475d53dbe4a3f0a..898607bf682b73637a8818e6959c419a5e1acafa 100644 (file)
--- a/drivers/hwmon/ads7828.c
+++ b/drivers/hwmon/ads7828.c
@@ -31,9 +31,11 @@
  #include <linux/i2c.h>
  #include <linux/init.h>
  #include <linux/module.h>
+#include <linux/of_device.h>
  #include <linux/platform_data/ads7828.h>
  #include <linux/regmap.h>
  #include <linux/slab.h>
+#include <linux/regulator/consumer.h>
  
  /* The ADS7828 registers */
  #define ADS7828_CMD_SD_SE      0x80    /* Single ended inputs */
@@ -118,9 +120,12 @@ static int ads7828_probe(struct i2c_client *client,
         struct ads7828_data *data;
         struct device *hwmon_dev;
         unsigned int vref_mv = ADS7828_INT_VREF_MV;
+       unsigned int vref_uv;
         bool diff_input = false;
         bool ext_vref = false;
         unsigned int regval;
+       enum ads7828_chips chip;
+       struct regulator *reg;
  
         data = devm_kzalloc(dev, sizeof(struct ads7828_data), GFP_KERNEL);
         if (!data)
@@ -131,14 +136,32 @@ static int ads7828_probe(struct i2c_client *client,
                 ext_vref = pdata->ext_vref;
                 if (ext_vref && pdata->vref_mv)
                         vref_mv = pdata->vref_mv;
+       } else if (dev->of_node) {
+               diff_input = of_property_read_bool(dev->of_node,
+                                                  "ti,differential-input");
+               reg = devm_regulator_get_optional(dev, "vref");
+               if (!IS_ERR(reg)) {
+                       vref_uv = regulator_get_voltage(reg);
+                       vref_mv = DIV_ROUND_CLOSEST(vref_uv, 1000);
+                       if (vref_mv < ADS7828_EXT_VREF_MV_MIN ||
+                           vref_mv > ADS7828_EXT_VREF_MV_MAX)
+                               return -EINVAL;
+                       ext_vref = true;
+               }
         }
  
+       if (client->dev.of_node)
+               chip = (enum ads7828_chips)
+                       of_device_get_match_data(&client->dev);
+       else
+               chip = id->driver_data;
+
         /* Bound Vref with min/max values */
         vref_mv = clamp_val(vref_mv, ADS7828_EXT_VREF_MV_MIN,
                             ADS7828_EXT_VREF_MV_MAX);
  
         /* ADS7828 uses 12-bit samples, while ADS7830 is 8-bit */
-       if (id->driver_data == ads7828) {
+       if (chip == ads7828) {
                 data->lsb_resol = DIV_ROUND_CLOSEST(vref_mv * 1000, 4096);
                 data->regmap = devm_regmap_init_i2c(client,
                                                     &ads2828_regmap_config);
@@ -177,9 +200,23 @@ static const struct i2c_device_id ads7828_device_ids[] = {
  };
  MODULE_DEVICE_TABLE(i2c, ads7828_device_ids);
  
+static const struct of_device_id ads7828_of_match[] = {
+       {
+               .compatible = "ti,ads7828",
+               .data = (void *)ads7828
+       },
+       {
+               .compatible = "ti,ads7830",
+               .data = (void *)ads7830
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, ads7828_of_match);
+
  static struct i2c_driver ads7828_driver = {
         .driver = {
                 .name = "ads7828",
+               .of_match_table = of_match_ptr(ads7828_of_match),
         },
  
         .id_table = ads7828_device_ids,
diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c

index c646670b9ea9269b95d6a37f26daeb186a939833..c803e3c5fcd41af9602823a753051811408ecb02 100644 (file)
--- a/drivers/hwmon/adt7475.c
+++ b/drivers/hwmon/adt7475.c
@@ -13,6 +13,7 @@
   */
  
  #include <linux/module.h>
+#include <linux/of_device.h>
  #include <linux/init.h>
  #include <linux/slab.h>
  #include <linux/i2c.h>
@@ -58,6 +59,8 @@
  #define REG_VENDID             0x3E
  #define REG_DEVID2             0x3F
  
+#define REG_CONFIG1            0x40
+
  #define REG_STATUS1            0x41
  #define REG_STATUS2            0x42
  
@@ -161,6 +164,27 @@ static const struct i2c_device_id adt7475_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, adt7475_id);
  
+static const struct of_device_id adt7475_of_match[] = {
+       {
+               .compatible = "adi,adt7473",
+               .data = (void *)adt7473
+       },
+       {
+               .compatible = "adi,adt7475",
+               .data = (void *)adt7475
+       },
+       {
+               .compatible = "adi,adt7476",
+               .data = (void *)adt7476
+       },
+       {
+               .compatible = "adi,adt7490",
+               .data = (void *)adt7490
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, adt7475_of_match);
+
  struct adt7475_data {
         struct device *hwmon_dev;
         struct mutex lock;
@@ -1250,6 +1274,7 @@ static void adt7475_remove_files(struct i2c_client *client,
  static int adt7475_probe(struct i2c_client *client,
                          const struct i2c_device_id *id)
  {
+       enum chips chip;
         static const char * const names[] = {
                 [adt7473] = "ADT7473",
                 [adt7475] = "ADT7475",
@@ -1268,8 +1293,13 @@ static int adt7475_probe(struct i2c_client *client,
         mutex_init(&data->lock);
         i2c_set_clientdata(client, data);
  
+       if (client->dev.of_node)
+               chip = (enum chips)of_device_get_match_data(&client->dev);
+       else
+               chip = id->driver_data;
+
         /* Initialize device-specific values */
-       switch (id->driver_data) {
+       switch (chip) {
         case adt7476:
                 data->has_voltage = 0x0e;       /* in1 to in3 */
                 revision = adt7475_read(REG_DEVID2) & 0x07;
@@ -1343,6 +1373,17 @@ static int adt7475_probe(struct i2c_client *client,
         for (i = 0; i < ADT7475_PWM_COUNT; i++)
                 adt7475_read_pwm(client, i);
  
+       /* Start monitoring */
+       switch (chip) {
+       case adt7475:
+       case adt7476:
+               i2c_smbus_write_byte_data(client, REG_CONFIG1,
+                                         adt7475_read(REG_CONFIG1) | 0x01);
+               break;
+       default:
+               break;
+       }
+
         ret = sysfs_create_group(&client->dev.kobj, &adt7475_attr_group);
         if (ret)
                 return ret;
@@ -1428,6 +1469,7 @@ static struct i2c_driver adt7475_driver = {
         .class          = I2C_CLASS_HWMON,
         .driver = {
                 .name   = "adt7475",
+               .of_match_table = of_match_ptr(adt7475_of_match),
         },
         .probe          = adt7475_probe,
         .remove         = adt7475_remove,
diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c

new file mode 100644 (file)

index 0000000..48403a2
--- /dev/null
+++ b/drivers/hwmon/aspeed-pwm-tacho.c
@@ -0,0 +1,835 @@
+/*
+ * Copyright (c) 2016 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/gpio/consumer.h>
+#include <linux/delay.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/sysfs.h>
+#include <linux/regmap.h>
+
+/* ASPEED PWM & FAN Tach Register Definition */
+#define ASPEED_PTCR_CTRL               0x00
+#define ASPEED_PTCR_CLK_CTRL           0x04
+#define ASPEED_PTCR_DUTY0_CTRL         0x08
+#define ASPEED_PTCR_DUTY1_CTRL         0x0c
+#define ASPEED_PTCR_TYPEM_CTRL         0x10
+#define ASPEED_PTCR_TYPEM_CTRL1                0x14
+#define ASPEED_PTCR_TYPEN_CTRL         0x18
+#define ASPEED_PTCR_TYPEN_CTRL1                0x1c
+#define ASPEED_PTCR_TACH_SOURCE                0x20
+#define ASPEED_PTCR_TRIGGER            0x28
+#define ASPEED_PTCR_RESULT             0x2c
+#define ASPEED_PTCR_INTR_CTRL          0x30
+#define ASPEED_PTCR_INTR_STS           0x34
+#define ASPEED_PTCR_TYPEM_LIMIT                0x38
+#define ASPEED_PTCR_TYPEN_LIMIT                0x3C
+#define ASPEED_PTCR_CTRL_EXT           0x40
+#define ASPEED_PTCR_CLK_CTRL_EXT       0x44
+#define ASPEED_PTCR_DUTY2_CTRL         0x48
+#define ASPEED_PTCR_DUTY3_CTRL         0x4c
+#define ASPEED_PTCR_TYPEO_CTRL         0x50
+#define ASPEED_PTCR_TYPEO_CTRL1                0x54
+#define ASPEED_PTCR_TACH_SOURCE_EXT    0x60
+#define ASPEED_PTCR_TYPEO_LIMIT                0x78
+
+/* ASPEED_PTCR_CTRL : 0x00 - General Control Register */
+#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART1   15
+#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART2   6
+#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_MASK    (BIT(7) | BIT(15))
+
+#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART1   14
+#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART2   5
+#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_MASK    (BIT(6) | BIT(14))
+
+#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART1   13
+#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART2   4
+#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_MASK    (BIT(5) | BIT(13))
+
+#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART1   12
+#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART2   3
+#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_MASK    (BIT(4) | BIT(12))
+
+#define        ASPEED_PTCR_CTRL_FAN_NUM_EN(x)  BIT(16 + (x))
+
+#define        ASPEED_PTCR_CTRL_PWMD_EN        BIT(11)
+#define        ASPEED_PTCR_CTRL_PWMC_EN        BIT(10)
+#define        ASPEED_PTCR_CTRL_PWMB_EN        BIT(9)
+#define        ASPEED_PTCR_CTRL_PWMA_EN        BIT(8)
+
+#define        ASPEED_PTCR_CTRL_CLK_SRC        BIT(1)
+#define        ASPEED_PTCR_CTRL_CLK_EN         BIT(0)
+
+/* ASPEED_PTCR_CLK_CTRL : 0x04 - Clock Control Register */
+/* TYPE N */
+#define ASPEED_PTCR_CLK_CTRL_TYPEN_MASK                GENMASK(31, 16)
+#define ASPEED_PTCR_CLK_CTRL_TYPEN_UNIT                24
+#define ASPEED_PTCR_CLK_CTRL_TYPEN_H           20
+#define ASPEED_PTCR_CLK_CTRL_TYPEN_L           16
+/* TYPE M */
+#define ASPEED_PTCR_CLK_CTRL_TYPEM_MASK         GENMASK(15, 0)
+#define ASPEED_PTCR_CLK_CTRL_TYPEM_UNIT                8
+#define ASPEED_PTCR_CLK_CTRL_TYPEM_H           4
+#define ASPEED_PTCR_CLK_CTRL_TYPEM_L           0
+
+/*
+ * ASPEED_PTCR_DUTY_CTRL/1/2/3 : 0x08/0x0C/0x48/0x4C - PWM-FAN duty control
+ * 0/1/2/3 register
+ */
+#define DUTY_CTRL_PWM2_FALL_POINT      24
+#define DUTY_CTRL_PWM2_RISE_POINT      16
+#define DUTY_CTRL_PWM2_RISE_FALL_MASK  GENMASK(31, 16)
+#define DUTY_CTRL_PWM1_FALL_POINT      8
+#define DUTY_CTRL_PWM1_RISE_POINT      0
+#define DUTY_CTRL_PWM1_RISE_FALL_MASK   GENMASK(15, 0)
+
+/* ASPEED_PTCR_TYPEM_CTRL : 0x10/0x18/0x50 - Type M/N/O Ctrl 0 Register */
+#define TYPE_CTRL_FAN_MASK             (GENMASK(5, 1) | GENMASK(31, 16))
+#define TYPE_CTRL_FAN1_MASK            GENMASK(31, 0)
+#define TYPE_CTRL_FAN_PERIOD           16
+#define TYPE_CTRL_FAN_MODE             4
+#define TYPE_CTRL_FAN_DIVISION         1
+#define TYPE_CTRL_FAN_TYPE_EN          1
+
+/* ASPEED_PTCR_TACH_SOURCE : 0x20/0x60 - Tach Source Register */
+/* bit [0,1] at 0x20, bit [2] at 0x60 */
+#define TACH_PWM_SOURCE_BIT01(x)       ((x) * 2)
+#define TACH_PWM_SOURCE_BIT2(x)                ((x) * 2)
+#define TACH_PWM_SOURCE_MASK_BIT01(x)  (0x3 << ((x) * 2))
+#define TACH_PWM_SOURCE_MASK_BIT2(x)   BIT((x) * 2)
+
+/* ASPEED_PTCR_RESULT : 0x2c - Result Register */
+#define RESULT_STATUS_MASK             BIT(31)
+#define RESULT_VALUE_MASK              0xfffff
+
+/* ASPEED_PTCR_CTRL_EXT : 0x40 - General Control Extension #1 Register */
+#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART1   15
+#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART2   6
+#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_MASK    (BIT(7) | BIT(15))
+
+#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART1   14
+#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART2   5
+#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_MASK    (BIT(6) | BIT(14))
+
+#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART1   13
+#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART2   4
+#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_MASK    (BIT(5) | BIT(13))
+
+#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART1   12
+#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART2   3
+#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_MASK    (BIT(4) | BIT(12))
+
+#define        ASPEED_PTCR_CTRL_PWMH_EN        BIT(11)
+#define        ASPEED_PTCR_CTRL_PWMG_EN        BIT(10)
+#define        ASPEED_PTCR_CTRL_PWMF_EN        BIT(9)
+#define        ASPEED_PTCR_CTRL_PWME_EN        BIT(8)
+
+/* ASPEED_PTCR_CLK_EXT_CTRL : 0x44 - Clock Control Extension #1 Register */
+/* TYPE O */
+#define ASPEED_PTCR_CLK_CTRL_TYPEO_MASK         GENMASK(15, 0)
+#define ASPEED_PTCR_CLK_CTRL_TYPEO_UNIT                8
+#define ASPEED_PTCR_CLK_CTRL_TYPEO_H           4
+#define ASPEED_PTCR_CLK_CTRL_TYPEO_L           0
+
+#define PWM_MAX 255
+
+#define M_PWM_DIV_H 0x00
+#define M_PWM_DIV_L 0x05
+#define M_PWM_PERIOD 0x5F
+#define M_TACH_CLK_DIV 0x00
+#define M_TACH_MODE 0x00
+#define M_TACH_UNIT 0x1000
+#define INIT_FAN_CTRL 0xFF
+
+struct aspeed_pwm_tacho_data {
+       struct regmap *regmap;
+       unsigned long clk_freq;
+       bool pwm_present[8];
+       bool fan_tach_present[16];
+       u8 type_pwm_clock_unit[3];
+       u8 type_pwm_clock_division_h[3];
+       u8 type_pwm_clock_division_l[3];
+       u8 type_fan_tach_clock_division[3];
+       u16 type_fan_tach_unit[3];
+       u8 pwm_port_type[8];
+       u8 pwm_port_fan_ctrl[8];
+       u8 fan_tach_ch_source[16];
+       const struct attribute_group *groups[3];
+};
+
+enum type { TYPEM, TYPEN, TYPEO };
+
+struct type_params {
+       u32 l_value;
+       u32 h_value;
+       u32 unit_value;
+       u32 clk_ctrl_mask;
+       u32 clk_ctrl_reg;
+       u32 ctrl_reg;
+       u32 ctrl_reg1;
+};
+
+static const struct type_params type_params[] = {
+       [TYPEM] = {
+               .l_value = ASPEED_PTCR_CLK_CTRL_TYPEM_L,
+               .h_value = ASPEED_PTCR_CLK_CTRL_TYPEM_H,
+               .unit_value = ASPEED_PTCR_CLK_CTRL_TYPEM_UNIT,
+               .clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEM_MASK,
+               .clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL,
+               .ctrl_reg = ASPEED_PTCR_TYPEM_CTRL,
+               .ctrl_reg1 = ASPEED_PTCR_TYPEM_CTRL1,
+       },
+       [TYPEN] = {
+               .l_value = ASPEED_PTCR_CLK_CTRL_TYPEN_L,
+               .h_value = ASPEED_PTCR_CLK_CTRL_TYPEN_H,
+               .unit_value = ASPEED_PTCR_CLK_CTRL_TYPEN_UNIT,
+               .clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEN_MASK,
+               .clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL,
+               .ctrl_reg = ASPEED_PTCR_TYPEN_CTRL,
+               .ctrl_reg1 = ASPEED_PTCR_TYPEN_CTRL1,
+       },
+       [TYPEO] = {
+               .l_value = ASPEED_PTCR_CLK_CTRL_TYPEO_L,
+               .h_value = ASPEED_PTCR_CLK_CTRL_TYPEO_H,
+               .unit_value = ASPEED_PTCR_CLK_CTRL_TYPEO_UNIT,
+               .clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEO_MASK,
+               .clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL_EXT,
+               .ctrl_reg = ASPEED_PTCR_TYPEO_CTRL,
+               .ctrl_reg1 = ASPEED_PTCR_TYPEO_CTRL1,
+       }
+};
+
+enum pwm_port { PWMA, PWMB, PWMC, PWMD, PWME, PWMF, PWMG, PWMH };
+
+struct pwm_port_params {
+       u32 pwm_en;
+       u32 ctrl_reg;
+       u32 type_part1;
+       u32 type_part2;
+       u32 type_mask;
+       u32 duty_ctrl_rise_point;
+       u32 duty_ctrl_fall_point;
+       u32 duty_ctrl_reg;
+       u32 duty_ctrl_rise_fall_mask;
+};
+
+static const struct pwm_port_params pwm_port_params[] = {
+       [PWMA] = {
+               .pwm_en = ASPEED_PTCR_CTRL_PWMA_EN,
+               .ctrl_reg = ASPEED_PTCR_CTRL,
+               .type_part1 = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART1,
+               .type_part2 = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART2,
+               .type_mask = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_MASK,
+               .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT,
+               .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT,
+               .duty_ctrl_reg = ASPEED_PTCR_DUTY0_CTRL,
+               .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK,
+       },
+       [PWMB] = {
+               .pwm_en = ASPEED_PTCR_CTRL_PWMB_EN,
+               .ctrl_reg = ASPEED_PTCR_CTRL,
+               .type_part1 = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART1,
+               .type_part2 = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART2,
+               .type_mask = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_MASK,
+               .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT,
+               .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT,
+               .duty_ctrl_reg = ASPEED_PTCR_DUTY0_CTRL,
+               .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK,
+       },
+       [PWMC] = {
+               .pwm_en = ASPEED_PTCR_CTRL_PWMC_EN,
+               .ctrl_reg = ASPEED_PTCR_CTRL,
+               .type_part1 = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART1,
+               .type_part2 = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART2,
+               .type_mask = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_MASK,
+               .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT,
+               .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT,
+               .duty_ctrl_reg = ASPEED_PTCR_DUTY1_CTRL,
+               .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK,
+       },
+       [PWMD] = {
+               .pwm_en = ASPEED_PTCR_CTRL_PWMD_EN,
+               .ctrl_reg = ASPEED_PTCR_CTRL,
+               .type_part1 = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART1,
+               .type_part2 = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART2,
+               .type_mask = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_MASK,
+               .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT,
+               .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT,
+               .duty_ctrl_reg = ASPEED_PTCR_DUTY1_CTRL,
+               .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK,
+       },
+       [PWME] = {
+               .pwm_en = ASPEED_PTCR_CTRL_PWME_EN,
+               .ctrl_reg = ASPEED_PTCR_CTRL_EXT,
+               .type_part1 = ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART1,
+               .type_part2 = ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART2,
+               .type_mask = ASPEED_PTCR_CTRL_SET_PWME_TYPE_MASK,
+               .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT,
+               .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT,
+               .duty_ctrl_reg = ASPEED_PTCR_DUTY2_CTRL,
+               .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK,
+       },
+       [PWMF] = {
+               .pwm_en = ASPEED_PTCR_CTRL_PWMF_EN,
+               .ctrl_reg = ASPEED_PTCR_CTRL_EXT,
+               .type_part1 = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART1,
+               .type_part2 = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART2,
+               .type_mask = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_MASK,
+               .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT,
+               .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT,
+               .duty_ctrl_reg = ASPEED_PTCR_DUTY2_CTRL,
+               .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK,
+       },
+       [PWMG] = {
+               .pwm_en = ASPEED_PTCR_CTRL_PWMG_EN,
+               .ctrl_reg = ASPEED_PTCR_CTRL_EXT,
+               .type_part1 = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART1,
+               .type_part2 = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART2,
+               .type_mask = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_MASK,
+               .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT,
+               .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT,
+               .duty_ctrl_reg = ASPEED_PTCR_DUTY3_CTRL,
+               .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK,
+       },
+       [PWMH] = {
+               .pwm_en = ASPEED_PTCR_CTRL_PWMH_EN,
+               .ctrl_reg = ASPEED_PTCR_CTRL_EXT,
+               .type_part1 = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART1,
+               .type_part2 = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART2,
+               .type_mask = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_MASK,
+               .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT,
+               .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT,
+               .duty_ctrl_reg = ASPEED_PTCR_DUTY3_CTRL,
+               .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK,
+       }
+};
+
+static int regmap_aspeed_pwm_tacho_reg_write(void *context, unsigned int reg,
+                                            unsigned int val)
+{
+       void __iomem *regs = (void __iomem *)context;
+
+       writel(val, regs + reg);
+       return 0;
+}
+
+static int regmap_aspeed_pwm_tacho_reg_read(void *context, unsigned int reg,
+                                           unsigned int *val)
+{
+       void __iomem *regs = (void __iomem *)context;
+
+       *val = readl(regs + reg);
+       return 0;
+}
+
+static const struct regmap_config aspeed_pwm_tacho_regmap_config = {
+       .reg_bits = 32,
+       .val_bits = 32,
+       .reg_stride = 4,
+       .max_register = ASPEED_PTCR_TYPEO_LIMIT,
+       .reg_write = regmap_aspeed_pwm_tacho_reg_write,
+       .reg_read = regmap_aspeed_pwm_tacho_reg_read,
+       .fast_io = true,
+};
+
+static void aspeed_set_clock_enable(struct regmap *regmap, bool val)
+{
+       regmap_update_bits(regmap, ASPEED_PTCR_CTRL,
+                          ASPEED_PTCR_CTRL_CLK_EN,
+                          val ? ASPEED_PTCR_CTRL_CLK_EN : 0);
+}
+
+static void aspeed_set_clock_source(struct regmap *regmap, int val)
+{
+       regmap_update_bits(regmap, ASPEED_PTCR_CTRL,
+                          ASPEED_PTCR_CTRL_CLK_SRC,
+                          val ? ASPEED_PTCR_CTRL_CLK_SRC : 0);
+}
+
+static void aspeed_set_pwm_clock_values(struct regmap *regmap, u8 type,
+                                       u8 div_high, u8 div_low, u8 unit)
+{
+       u32 reg_value = ((div_high << type_params[type].h_value) |
+                        (div_low << type_params[type].l_value) |
+                        (unit << type_params[type].unit_value));
+
+       regmap_update_bits(regmap, type_params[type].clk_ctrl_reg,
+                          type_params[type].clk_ctrl_mask, reg_value);
+}
+
+static void aspeed_set_pwm_port_enable(struct regmap *regmap, u8 pwm_port,
+                                      bool enable)
+{
+       regmap_update_bits(regmap, pwm_port_params[pwm_port].ctrl_reg,
+                          pwm_port_params[pwm_port].pwm_en,
+                          enable ? pwm_port_params[pwm_port].pwm_en : 0);
+}
+
+static void aspeed_set_pwm_port_type(struct regmap *regmap,
+                                    u8 pwm_port, u8 type)
+{
+       u32 reg_value = (type & 0x1) << pwm_port_params[pwm_port].type_part1;
+
+       reg_value |= (type & 0x2) << pwm_port_params[pwm_port].type_part2;
+
+       regmap_update_bits(regmap, pwm_port_params[pwm_port].ctrl_reg,
+                          pwm_port_params[pwm_port].type_mask, reg_value);
+}
+
+static void aspeed_set_pwm_port_duty_rising_falling(struct regmap *regmap,
+                                                   u8 pwm_port, u8 rising,
+                                                   u8 falling)
+{
+       u32 reg_value = (rising <<
+                        pwm_port_params[pwm_port].duty_ctrl_rise_point);
+       reg_value |= (falling <<
+                     pwm_port_params[pwm_port].duty_ctrl_fall_point);
+
+       regmap_update_bits(regmap, pwm_port_params[pwm_port].duty_ctrl_reg,
+                          pwm_port_params[pwm_port].duty_ctrl_rise_fall_mask,
+                          reg_value);
+}
+
+static void aspeed_set_tacho_type_enable(struct regmap *regmap, u8 type,
+                                        bool enable)
+{
+       regmap_update_bits(regmap, type_params[type].ctrl_reg,
+                          TYPE_CTRL_FAN_TYPE_EN,
+                          enable ? TYPE_CTRL_FAN_TYPE_EN : 0);
+}
+
+static void aspeed_set_tacho_type_values(struct regmap *regmap, u8 type,
+                                        u8 mode, u16 unit, u8 division)
+{
+       u32 reg_value = ((mode << TYPE_CTRL_FAN_MODE) |
+                        (unit << TYPE_CTRL_FAN_PERIOD) |
+                        (division << TYPE_CTRL_FAN_DIVISION));
+
+       regmap_update_bits(regmap, type_params[type].ctrl_reg,
+                          TYPE_CTRL_FAN_MASK, reg_value);
+       regmap_update_bits(regmap, type_params[type].ctrl_reg1,
+                          TYPE_CTRL_FAN1_MASK, unit << 16);
+}
+
+static void aspeed_set_fan_tach_ch_enable(struct regmap *regmap, u8 fan_tach_ch,
+                                         bool enable)
+{
+       regmap_update_bits(regmap, ASPEED_PTCR_CTRL,
+                          ASPEED_PTCR_CTRL_FAN_NUM_EN(fan_tach_ch),
+                          enable ?
+                          ASPEED_PTCR_CTRL_FAN_NUM_EN(fan_tach_ch) : 0);
+}
+
+static void aspeed_set_fan_tach_ch_source(struct regmap *regmap, u8 fan_tach_ch,
+                                         u8 fan_tach_ch_source)
+{
+       u32 reg_value1 = ((fan_tach_ch_source & 0x3) <<
+                         TACH_PWM_SOURCE_BIT01(fan_tach_ch));
+       u32 reg_value2 = (((fan_tach_ch_source & 0x4) >> 2) <<
+                         TACH_PWM_SOURCE_BIT2(fan_tach_ch));
+
+       regmap_update_bits(regmap, ASPEED_PTCR_TACH_SOURCE,
+                          TACH_PWM_SOURCE_MASK_BIT01(fan_tach_ch),
+                          reg_value1);
+
+       regmap_update_bits(regmap, ASPEED_PTCR_TACH_SOURCE_EXT,
+                          TACH_PWM_SOURCE_MASK_BIT2(fan_tach_ch),
+                          reg_value2);
+}
+
+static void aspeed_set_pwm_port_fan_ctrl(struct aspeed_pwm_tacho_data *priv,
+                                        u8 index, u8 fan_ctrl)
+{
+       u16 period, dc_time_on;
+
+       period = priv->type_pwm_clock_unit[priv->pwm_port_type[index]];
+       period += 1;
+       dc_time_on = (fan_ctrl * period) / PWM_MAX;
+
+       if (dc_time_on == 0) {
+               aspeed_set_pwm_port_enable(priv->regmap, index, false);
+       } else {
+               if (dc_time_on == period)
+                       dc_time_on = 0;
+
+               aspeed_set_pwm_port_duty_rising_falling(priv->regmap, index, 0,
+                                                       dc_time_on);
+               aspeed_set_pwm_port_enable(priv->regmap, index, true);
+       }
+}
+
+static u32 aspeed_get_fan_tach_ch_measure_period(struct aspeed_pwm_tacho_data
+                                                *priv, u8 type)
+{
+       u32 clk;
+       u16 tacho_unit;
+       u8 clk_unit, div_h, div_l, tacho_div;
+
+       clk = priv->clk_freq;
+       clk_unit = priv->type_pwm_clock_unit[type];
+       div_h = priv->type_pwm_clock_division_h[type];
+       div_h = 0x1 << div_h;
+       div_l = priv->type_pwm_clock_division_l[type];
+       if (div_l == 0)
+               div_l = 1;
+       else
+               div_l = div_l * 2;
+
+       tacho_unit = priv->type_fan_tach_unit[type];
+       tacho_div = priv->type_fan_tach_clock_division[type];
+
+       tacho_div = 0x4 << (tacho_div * 2);
+       return clk / (clk_unit * div_h * div_l * tacho_div * tacho_unit);
+}
+
+static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
+                                     u8 fan_tach_ch)
+{
+       u32 raw_data, tach_div, clk_source, sec, val;
+       u8 fan_tach_ch_source, type;
+
+       regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0);
+       regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0x1 << fan_tach_ch);
+
+       fan_tach_ch_source = priv->fan_tach_ch_source[fan_tach_ch];
+       type = priv->pwm_port_type[fan_tach_ch_source];
+
+       sec = (1000 / aspeed_get_fan_tach_ch_measure_period(priv, type));
+       msleep(sec);
+
+       regmap_read(priv->regmap, ASPEED_PTCR_RESULT, &val);
+       raw_data = val & RESULT_VALUE_MASK;
+       tach_div = priv->type_fan_tach_clock_division[type];
+       tach_div = 0x4 << (tach_div * 2);
+       clk_source = priv->clk_freq;
+
+       if (raw_data == 0)
+               return 0;
+
+       return (clk_source * 60) / (2 * raw_data * tach_div);
+}
+
+static ssize_t set_pwm(struct device *dev, struct device_attribute *attr,
+                      const char *buf, size_t count)
+{
+       struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+       int index = sensor_attr->index;
+       int ret;
+       struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
+       long fan_ctrl;
+
+       ret = kstrtol(buf, 10, &fan_ctrl);
+       if (ret != 0)
+               return ret;
+
+       if (fan_ctrl < 0 || fan_ctrl > PWM_MAX)
+               return -EINVAL;
+
+       if (priv->pwm_port_fan_ctrl[index] == fan_ctrl)
+               return count;
+
+       priv->pwm_port_fan_ctrl[index] = fan_ctrl;
+       aspeed_set_pwm_port_fan_ctrl(priv, index, fan_ctrl);
+
+       return count;
+}
+
+static ssize_t show_pwm(struct device *dev, struct device_attribute *attr,
+                       char *buf)
+{
+       struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+       int index = sensor_attr->index;
+       struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%u\n", priv->pwm_port_fan_ctrl[index]);
+}
+
+static ssize_t show_rpm(struct device *dev, struct device_attribute *attr,
+                       char *buf)
+{
+       struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+       int index = sensor_attr->index;
+       u32 rpm;
+       struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
+
+       rpm = aspeed_get_fan_tach_ch_rpm(priv, index);
+
+       return sprintf(buf, "%u\n", rpm);
+}
+
+static umode_t pwm_is_visible(struct kobject *kobj,
+                             struct attribute *a, int index)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
+
+       if (!priv->pwm_present[index])
+               return 0;
+       return a->mode;
+}
+
+static umode_t fan_dev_is_visible(struct kobject *kobj,
+                                 struct attribute *a, int index)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
+
+       if (!priv->fan_tach_present[index])
+               return 0;
+       return a->mode;
+}
+
+static SENSOR_DEVICE_ATTR(pwm0, 0644,
+                       show_pwm, set_pwm, 0);
+static SENSOR_DEVICE_ATTR(pwm1, 0644,
+                       show_pwm, set_pwm, 1);
+static SENSOR_DEVICE_ATTR(pwm2, 0644,
+                       show_pwm, set_pwm, 2);
+static SENSOR_DEVICE_ATTR(pwm3, 0644,
+                       show_pwm, set_pwm, 3);
+static SENSOR_DEVICE_ATTR(pwm4, 0644,
+                       show_pwm, set_pwm, 4);
+static SENSOR_DEVICE_ATTR(pwm5, 0644,
+                       show_pwm, set_pwm, 5);
+static SENSOR_DEVICE_ATTR(pwm6, 0644,
+                       show_pwm, set_pwm, 6);
+static SENSOR_DEVICE_ATTR(pwm7, 0644,
+                       show_pwm, set_pwm, 7);
+static struct attribute *pwm_dev_attrs[] = {
+       &sensor_dev_attr_pwm0.dev_attr.attr,
+       &sensor_dev_attr_pwm1.dev_attr.attr,
+       &sensor_dev_attr_pwm2.dev_attr.attr,
+       &sensor_dev_attr_pwm3.dev_attr.attr,
+       &sensor_dev_attr_pwm4.dev_attr.attr,
+       &sensor_dev_attr_pwm5.dev_attr.attr,
+       &sensor_dev_attr_pwm6.dev_attr.attr,
+       &sensor_dev_attr_pwm7.dev_attr.attr,
+       NULL,
+};
+
+static const struct attribute_group pwm_dev_group = {
+       .attrs = pwm_dev_attrs,
+       .is_visible = pwm_is_visible,
+};
+
+static SENSOR_DEVICE_ATTR(fan0_input, 0444,
+               show_rpm, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan1_input, 0444,
+               show_rpm, NULL, 1);
+static SENSOR_DEVICE_ATTR(fan2_input, 0444,
+               show_rpm, NULL, 2);
+static SENSOR_DEVICE_ATTR(fan3_input, 0444,
+               show_rpm, NULL, 3);
+static SENSOR_DEVICE_ATTR(fan4_input, 0444,
+               show_rpm, NULL, 4);
+static SENSOR_DEVICE_ATTR(fan5_input, 0444,
+               show_rpm, NULL, 5);
+static SENSOR_DEVICE_ATTR(fan6_input, 0444,
+               show_rpm, NULL, 6);
+static SENSOR_DEVICE_ATTR(fan7_input, 0444,
+               show_rpm, NULL, 7);
+static SENSOR_DEVICE_ATTR(fan8_input, 0444,
+               show_rpm, NULL, 8);
+static SENSOR_DEVICE_ATTR(fan9_input, 0444,
+               show_rpm, NULL, 9);
+static SENSOR_DEVICE_ATTR(fan10_input, 0444,
+               show_rpm, NULL, 10);
+static SENSOR_DEVICE_ATTR(fan11_input, 0444,
+               show_rpm, NULL, 11);
+static SENSOR_DEVICE_ATTR(fan12_input, 0444,
+               show_rpm, NULL, 12);
+static SENSOR_DEVICE_ATTR(fan13_input, 0444,
+               show_rpm, NULL, 13);
+static SENSOR_DEVICE_ATTR(fan14_input, 0444,
+               show_rpm, NULL, 14);
+static SENSOR_DEVICE_ATTR(fan15_input, 0444,
+               show_rpm, NULL, 15);
+static struct attribute *fan_dev_attrs[] = {
+       &sensor_dev_attr_fan0_input.dev_attr.attr,
+       &sensor_dev_attr_fan1_input.dev_attr.attr,
+       &sensor_dev_attr_fan2_input.dev_attr.attr,
+       &sensor_dev_attr_fan3_input.dev_attr.attr,
+       &sensor_dev_attr_fan4_input.dev_attr.attr,
+       &sensor_dev_attr_fan5_input.dev_attr.attr,
+       &sensor_dev_attr_fan6_input.dev_attr.attr,
+       &sensor_dev_attr_fan7_input.dev_attr.attr,
+       &sensor_dev_attr_fan8_input.dev_attr.attr,
+       &sensor_dev_attr_fan9_input.dev_attr.attr,
+       &sensor_dev_attr_fan10_input.dev_attr.attr,
+       &sensor_dev_attr_fan11_input.dev_attr.attr,
+       &sensor_dev_attr_fan12_input.dev_attr.attr,
+       &sensor_dev_attr_fan13_input.dev_attr.attr,
+       &sensor_dev_attr_fan14_input.dev_attr.attr,
+       &sensor_dev_attr_fan15_input.dev_attr.attr,
+       NULL
+};
+
+static const struct attribute_group fan_dev_group = {
+       .attrs = fan_dev_attrs,
+       .is_visible = fan_dev_is_visible,
+};
+
+/*
+ * The clock type is type M :
+ * The PWM frequency = 24MHz / (type M clock division L bit *
+ * type M clock division H bit * (type M PWM period bit + 1))
+ */
+static void aspeed_create_type(struct aspeed_pwm_tacho_data *priv)
+{
+       priv->type_pwm_clock_division_h[TYPEM] = M_PWM_DIV_H;
+       priv->type_pwm_clock_division_l[TYPEM] = M_PWM_DIV_L;
+       priv->type_pwm_clock_unit[TYPEM] = M_PWM_PERIOD;
+       aspeed_set_pwm_clock_values(priv->regmap, TYPEM, M_PWM_DIV_H,
+                                   M_PWM_DIV_L, M_PWM_PERIOD);
+       aspeed_set_tacho_type_enable(priv->regmap, TYPEM, true);
+       priv->type_fan_tach_clock_division[TYPEM] = M_TACH_CLK_DIV;
+       priv->type_fan_tach_unit[TYPEM] = M_TACH_UNIT;
+       aspeed_set_tacho_type_values(priv->regmap, TYPEM, M_TACH_MODE,
+                                    M_TACH_UNIT, M_TACH_CLK_DIV);
+}
+
+static void aspeed_create_pwm_port(struct aspeed_pwm_tacho_data *priv,
+                                  u8 pwm_port)
+{
+       aspeed_set_pwm_port_enable(priv->regmap, pwm_port, true);
+       priv->pwm_present[pwm_port] = true;
+
+       priv->pwm_port_type[pwm_port] = TYPEM;
+       aspeed_set_pwm_port_type(priv->regmap, pwm_port, TYPEM);
+
+       priv->pwm_port_fan_ctrl[pwm_port] = INIT_FAN_CTRL;
+       aspeed_set_pwm_port_fan_ctrl(priv, pwm_port, INIT_FAN_CTRL);
+}
+
+static void aspeed_create_fan_tach_channel(struct aspeed_pwm_tacho_data *priv,
+                                          u8 *fan_tach_ch,
+                                          int count,
+                                          u8 pwm_source)
+{
+       u8 val, index;
+
+       for (val = 0; val < count; val++) {
+               index = fan_tach_ch[val];
+               aspeed_set_fan_tach_ch_enable(priv->regmap, index, true);
+               priv->fan_tach_present[index] = true;
+               priv->fan_tach_ch_source[index] = pwm_source;
+               aspeed_set_fan_tach_ch_source(priv->regmap, index, pwm_source);
+       }
+}
+
+static int aspeed_create_fan(struct device *dev,
+                            struct device_node *child,
+                            struct aspeed_pwm_tacho_data *priv)
+{
+       u8 *fan_tach_ch;
+       u32 pwm_port;
+       int ret, count;
+
+       ret = of_property_read_u32(child, "reg", &pwm_port);
+       if (ret)
+               return ret;
+       aspeed_create_pwm_port(priv, (u8)pwm_port);
+
+       count = of_property_count_u8_elems(child, "aspeed,fan-tach-ch");
+       if (count < 1)
+               return -EINVAL;
+       fan_tach_ch = devm_kzalloc(dev, sizeof(*fan_tach_ch) * count,
+                                  GFP_KERNEL);
+       if (!fan_tach_ch)
+               return -ENOMEM;
+       ret = of_property_read_u8_array(child, "aspeed,fan-tach-ch",
+                                       fan_tach_ch, count);
+       if (ret)
+               return ret;
+       aspeed_create_fan_tach_channel(priv, fan_tach_ch, count, pwm_port);
+
+       return 0;
+}
+
+static int aspeed_pwm_tacho_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct device_node *np, *child;
+       struct aspeed_pwm_tacho_data *priv;
+       void __iomem *regs;
+       struct resource *res;
+       struct device *hwmon;
+       struct clk *clk;
+       int ret;
+
+       np = dev->of_node;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENOENT;
+       regs = devm_ioremap_resource(dev, res);
+       if (IS_ERR(regs))
+               return PTR_ERR(regs);
+       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+       priv->regmap = devm_regmap_init(dev, NULL, (__force void *)regs,
+                       &aspeed_pwm_tacho_regmap_config);
+       if (IS_ERR(priv->regmap))
+               return PTR_ERR(priv->regmap);
+       regmap_write(priv->regmap, ASPEED_PTCR_TACH_SOURCE, 0);
+       regmap_write(priv->regmap, ASPEED_PTCR_TACH_SOURCE_EXT, 0);
+
+       clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(clk))
+               return -ENODEV;
+       priv->clk_freq = clk_get_rate(clk);
+       aspeed_set_clock_enable(priv->regmap, true);
+       aspeed_set_clock_source(priv->regmap, 0);
+
+       aspeed_create_type(priv);
+
+       for_each_child_of_node(np, child) {
+               ret = aspeed_create_fan(dev, child, priv);
+               of_node_put(child);
+               if (ret)
+                       return ret;
+       }
+       of_node_put(np);
+
+       priv->groups[0] = &pwm_dev_group;
+       priv->groups[1] = &fan_dev_group;
+       priv->groups[2] = NULL;
+       hwmon = devm_hwmon_device_register_with_groups(dev,
+                                                      "aspeed_pwm_tacho",
+                                                      priv, priv->groups);
+       return PTR_ERR_OR_ZERO(hwmon);
+}
+
+static const struct of_device_id of_pwm_tacho_match_table[] = {
+       { .compatible = "aspeed,ast2400-pwm-tacho", },
+       { .compatible = "aspeed,ast2500-pwm-tacho", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, of_pwm_tacho_match_table);
+
+static struct platform_driver aspeed_pwm_tacho_driver = {
+       .probe          = aspeed_pwm_tacho_probe,
+       .driver         = {
+               .name   = "aspeed_pwm_tacho",
+               .of_match_table = of_pwm_tacho_match_table,
+       },
+};
+
+module_platform_driver(aspeed_pwm_tacho_driver);
+
+MODULE_AUTHOR("Jaghathiswari Rankappagounder Natarajan <jaghu@google.com>");
+MODULE_DESCRIPTION("ASPEED PWM and Fan Tacho device driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c

index cccef87963e050afb99181d63d3ed5dcf401d5a1..975c43d446f8593d0e701efeaf8da133717cff74 100644 (file)
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -646,6 +646,9 @@ static int atk_read_value(struct atk_sensor_data *sensor, u64 *value)
                 else
                         err = atk_read_value_new(sensor, value);
  
+               if (err)
+                       return err;
+
                 sensor->is_valid = true;
                 sensor->last_updated = jiffies;
                 sensor->cached_value = *value;
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c

index 34704b0451b49700b5bf5ce7eb47dea905680a17..3189246302a6b33940072df8fb1e7d05a611024d 100644 (file)
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -995,6 +995,13 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
                 },
                 .driver_data = (void *)&i8k_config_data[DELL_XPS],
         },
+       {
+               .ident = "Dell XPS 15 9560",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "XPS 15 9560"),
+               },
+       },
         { }
  };
  
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c

index 28375d59cc36c8bebfb08c7f86a009fcd4e74678..dd6e17c1076be92407f2d232ffd27f37a1f18682 100644 (file)
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -186,7 +186,7 @@ static ssize_t hwmon_attr_show_string(struct device *dev,
                                       char *buf)
  {
         struct hwmon_device_attribute *hattr = to_hwmon_attr(devattr);
-       char *s;
+       const char *s;
         int ret;
  
         ret = hattr->ops->read_string(dev, hattr->type, hattr->attr,
diff --git a/drivers/hwmon/ina209.c b/drivers/hwmon/ina209.c

index 5378fdefc1f76d7782d9073e5a98206d953b9e1d..aa0768ce8aeab54a0b3cc7988008d434df6eaa3c 100644 (file)
--- a/drivers/hwmon/ina209.c
+++ b/drivers/hwmon/ina209.c
@@ -117,7 +117,7 @@ static long ina209_from_reg(const u8 reg, const u16 val)
         case INA209_SHUNT_VOLTAGE_POS_WARN:
         case INA209_SHUNT_VOLTAGE_NEG_WARN:
                 /* LSB=10 uV. Convert to mV. */
-               return DIV_ROUND_CLOSEST(val, 100);
+               return DIV_ROUND_CLOSEST((s16)val, 100);
  
         case INA209_BUS_VOLTAGE:
         case INA209_BUS_VOLTAGE_MAX_PEAK:
@@ -146,7 +146,7 @@ static long ina209_from_reg(const u8 reg, const u16 val)
  
         case INA209_CURRENT:
                 /* LSB=1 mA (selected). Is in mA */
-               return val;
+               return (s16)val;
         }
  
         /* programmer goofed */
@@ -608,11 +608,18 @@ static const struct i2c_device_id ina209_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, ina209_id);
  
+static const struct of_device_id ina209_of_match[] = {
+       { .compatible = "ti,ina209" },
+       { },
+};
+MODULE_DEVICE_TABLE(of, ina209_of_match);
+
  /* This is the driver that will be inserted */
  static struct i2c_driver ina209_driver = {
         .class          = I2C_CLASS_HWMON,
         .driver = {
                 .name   = "ina209",
+               .of_match_table = of_match_ptr(ina209_of_match),
         },
         .probe          = ina209_probe,
         .remove         = ina209_remove,
diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c

index b24f1d3045f04386f7dc3a1c5af4b7ee7724e891..62e38fa8cda23c790635664364ba90e6d8292414 100644 (file)
--- a/drivers/hwmon/ina2xx.c
+++ b/drivers/hwmon/ina2xx.c
@@ -34,6 +34,7 @@
  #include <linux/hwmon.h>
  #include <linux/hwmon-sysfs.h>
  #include <linux/jiffies.h>
+#include <linux/of_device.h>
  #include <linux/of.h>
  #include <linux/delay.h>
  #include <linux/util_macros.h>
@@ -424,13 +425,19 @@ static int ina2xx_probe(struct i2c_client *client,
         struct device *hwmon_dev;
         u32 val;
         int ret, group = 0;
+       enum ina2xx_ids chip;
+
+       if (client->dev.of_node)
+               chip = (enum ina2xx_ids)of_device_get_match_data(&client->dev);
+       else
+               chip = id->driver_data;
  
         data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
         if (!data)
                 return -ENOMEM;
  
         /* set the device type */
-       data->config = &ina2xx_config[id->driver_data];
+       data->config = &ina2xx_config[chip];
  
         if (of_property_read_u32(dev->of_node, "shunt-resistor", &val) < 0) {
                 struct ina2xx_platform_data *pdata = dev_get_platdata(dev);
@@ -487,9 +494,35 @@ static const struct i2c_device_id ina2xx_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, ina2xx_id);
  
+static const struct of_device_id ina2xx_of_match[] = {
+       {
+               .compatible = "ti,ina219",
+               .data = (void *)ina219
+       },
+       {
+               .compatible = "ti,ina220",
+               .data = (void *)ina219
+       },
+       {
+               .compatible = "ti,ina226",
+               .data = (void *)ina226
+       },
+       {
+               .compatible = "ti,ina230",
+               .data = (void *)ina226
+       },
+       {
+               .compatible = "ti,ina231",
+               .data = (void *)ina226
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, ina2xx_of_match);
+
  static struct i2c_driver ina2xx_driver = {
         .driver = {
                 .name   = "ina2xx",
+               .of_match_table = of_match_ptr(ina2xx_of_match),
         },
         .probe          = ina2xx_probe,
         .id_table       = ina2xx_id,
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c

index efb01c247e2d90680f02980af76148799f3823fc..4dfc7238313ebd393e0e4dc0e17bb4414d1e8b68 100644 (file)
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -3198,7 +3198,7 @@ static int __init sm_it87_init(void)
  {
         int sioaddr[2] = { REG_2E, REG_4E };
         struct it87_sio_data sio_data;
-       unsigned short isa_address;
+       unsigned short isa_address[2];
         bool found = false;
         int i, err;
  
@@ -3208,15 +3208,29 @@ static int __init sm_it87_init(void)
  
         for (i = 0; i < ARRAY_SIZE(sioaddr); i++) {
                 memset(&sio_data, 0, sizeof(struct it87_sio_data));
-               isa_address = 0;
-               err = it87_find(sioaddr[i], &isa_address, &sio_data);
-               if (err || isa_address == 0)
+               isa_address[i] = 0;
+               err = it87_find(sioaddr[i], &isa_address[i], &sio_data);
+               if (err || isa_address[i] == 0)
                         continue;
+               /*
+                * Don't register second chip if its ISA address matches
+                * the first chip's ISA address.
+                */
+               if (i && isa_address[i] == isa_address[0])
+                       break;
  
-               err = it87_device_add(i, isa_address, &sio_data);
+               err = it87_device_add(i, isa_address[i], &sio_data);
                 if (err)
                         goto exit_dev_unregister;
+
                 found = true;
+
+               /*
+                * IT8705F may respond on both SIO addresses.
+                * Stop probing after finding one.
+                */
+               if (sio_data.type == it87)
+                       break;
         }
  
         if (!found) {
diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c

index 2e1948699114002b6f8b2ea7da30e324e43392b4..4c1770920d299229f4d0004250b1296e110762e5 100644 (file)
--- a/drivers/hwmon/lm63.c
+++ b/drivers/hwmon/lm63.c
@@ -46,6 +46,7 @@
  #include <linux/hwmon.h>
  #include <linux/err.h>
  #include <linux/mutex.h>
+#include <linux/of_device.h>
  #include <linux/sysfs.h>
  #include <linux/types.h>
  
@@ -1115,6 +1116,10 @@ static int lm63_probe(struct i2c_client *client,
         mutex_init(&data->update_lock);
  
         /* Set the device type */
+       if (client->dev.of_node)
+               data->kind = (enum chips)of_device_get_match_data(&client->dev);
+       else
+               data->kind = id->driver_data;
         data->kind = id->driver_data;
         if (data->kind == lm64)
                 data->temp2_offset = 16000;
@@ -1149,10 +1154,28 @@ static const struct i2c_device_id lm63_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, lm63_id);
  
+static const struct of_device_id lm63_of_match[] = {
+       {
+               .compatible = "national,lm63",
+               .data = (void *)lm63
+       },
+       {
+               .compatible = "national,lm64",
+               .data = (void *)lm64
+       },
+       {
+               .compatible = "national,lm96163",
+               .data = (void *)lm96163
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, lm63_of_match);
+
  static struct i2c_driver lm63_driver = {
         .class          = I2C_CLASS_HWMON,
         .driver = {
                 .name   = "lm63",
+               .of_match_table = of_match_ptr(lm63_of_match),
         },
         .probe          = lm63_probe,
         .id_table       = lm63_id,
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c

index eff3b24d847370eeca210ee4b83f129cac241dd0..005ffb5ffa92dacd91aed181b5b6bd6b1e26d225 100644 (file)
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -26,6 +26,7 @@
  #include <linux/hwmon.h>
  #include <linux/hwmon-sysfs.h>
  #include <linux/err.h>
+#include <linux/of_device.h>
  #include <linux/of.h>
  #include <linux/regmap.h>
  #include "lm75.h"
@@ -273,7 +274,12 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
         int status, err;
         u8 set_mask, clr_mask;
         int new;
-       enum lm75_type kind = id->driver_data;
+       enum lm75_type kind;
+
+       if (client->dev.of_node)
+               kind = (enum lm75_type)of_device_get_match_data(&client->dev);
+       else
+               kind = id->driver_data;
  
         if (!i2c_check_functionality(client->adapter,
                         I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA))
@@ -424,6 +430,95 @@ static const struct i2c_device_id lm75_ids[] = {
  };
  MODULE_DEVICE_TABLE(i2c, lm75_ids);
  
+static const struct of_device_id lm75_of_match[] = {
+       {
+               .compatible = "adi,adt75",
+               .data = (void *)adt75
+       },
+       {
+               .compatible = "dallas,ds1775",
+               .data = (void *)ds1775
+       },
+       {
+               .compatible = "dallas,ds75",
+               .data = (void *)ds75
+       },
+       {
+               .compatible = "dallas,ds7505",
+               .data = (void *)ds7505
+       },
+       {
+               .compatible = "gmt,g751",
+               .data = (void *)g751
+       },
+       {
+               .compatible = "national,lm75",
+               .data = (void *)lm75
+       },
+       {
+               .compatible = "national,lm75a",
+               .data = (void *)lm75a
+       },
+       {
+               .compatible = "national,lm75b",
+               .data = (void *)lm75b
+       },
+       {
+               .compatible = "maxim,max6625",
+               .data = (void *)max6625
+       },
+       {
+               .compatible = "maxim,max6626",
+               .data = (void *)max6626
+       },
+       {
+               .compatible = "maxim,mcp980x",
+               .data = (void *)mcp980x
+       },
+       {
+               .compatible = "st,stds75",
+               .data = (void *)stds75
+       },
+       {
+               .compatible = "microchip,tcn75",
+               .data = (void *)tcn75
+       },
+       {
+               .compatible = "ti,tmp100",
+               .data = (void *)tmp100
+       },
+       {
+               .compatible = "ti,tmp101",
+               .data = (void *)tmp101
+       },
+       {
+               .compatible = "ti,tmp105",
+               .data = (void *)tmp105
+       },
+       {
+               .compatible = "ti,tmp112",
+               .data = (void *)tmp112
+       },
+       {
+               .compatible = "ti,tmp175",
+               .data = (void *)tmp175
+       },
+       {
+               .compatible = "ti,tmp275",
+               .data = (void *)tmp275
+       },
+       {
+               .compatible = "ti,tmp75",
+               .data = (void *)tmp75
+       },
+       {
+               .compatible = "ti,tmp75c",
+               .data = (void *)tmp75c
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, lm75_of_match);
+
  #define LM75A_ID 0xA1
  
  /* Return 0 if detection is successful, -ENODEV otherwise */
@@ -560,6 +655,7 @@ static struct i2c_driver lm75_driver = {
         .class          = I2C_CLASS_HWMON,
         .driver = {
                 .name   = "lm75",
+               .of_match_table = of_match_ptr(lm75_of_match),
                 .pm     = LM75_DEV_PM_OPS,
         },
         .probe          = lm75_probe,
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c

index 691469ffa24ec73a73ac73deabd13871e18c7df3..0a325878e8f52c9ed216332a81104cd418aaa26d 100644 (file)
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -25,6 +25,7 @@
   */
  
  #include <linux/module.h>
+#include <linux/of_device.h>
  #include <linux/init.h>
  #include <linux/slab.h>
  #include <linux/jiffies.h>
@@ -1552,7 +1553,10 @@ static int lm85_probe(struct i2c_client *client, const struct i2c_device_id *id)
                 return -ENOMEM;
  
         data->client = client;
-       data->type = id->driver_data;
+       if (client->dev.of_node)
+               data->type = (enum chips)of_device_get_match_data(&client->dev);
+       else
+               data->type = id->driver_data;
         mutex_init(&data->update_lock);
  
         /* Fill in the chip specific driver values */
@@ -1623,10 +1627,60 @@ static const struct i2c_device_id lm85_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, lm85_id);
  
+static const struct of_device_id lm85_of_match[] = {
+       {
+               .compatible = "adi,adm1027",
+               .data = (void *)adm1027
+       },
+       {
+               .compatible = "adi,adt7463",
+               .data = (void *)adt7463
+       },
+       {
+               .compatible = "adi,adt7468",
+               .data = (void *)adt7468
+       },
+       {
+               .compatible = "national,lm85",
+               .data = (void *)lm85
+       },
+       {
+               .compatible = "national,lm85b",
+               .data = (void *)lm85
+       },
+       {
+               .compatible = "national,lm85c",
+               .data = (void *)lm85
+       },
+       {
+               .compatible = "smsc,emc6d100",
+               .data = (void *)emc6d100
+       },
+       {
+               .compatible = "smsc,emc6d101",
+               .data = (void *)emc6d100
+       },
+       {
+               .compatible = "smsc,emc6d102",
+               .data = (void *)emc6d102
+       },
+       {
+               .compatible = "smsc,emc6d103",
+               .data = (void *)emc6d103
+       },
+       {
+               .compatible = "smsc,emc6d103s",
+               .data = (void *)emc6d103s
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, lm85_of_match);
+
  static struct i2c_driver lm85_driver = {
         .class          = I2C_CLASS_HWMON,
         .driver = {
                 .name   = "lm85",
+               .of_match_table = of_match_ptr(lm85_of_match),
         },
         .probe          = lm85_probe,
         .id_table       = lm85_id,
diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c

index e06faf9d3f0f4f38375714e998689d2edc7725c4..b48d30760388ba2e9d095dd3ed1fa1c528a18bff 100644 (file)
--- a/drivers/hwmon/lm87.c
+++ b/drivers/hwmon/lm87.c
@@ -66,6 +66,7 @@
  #include <linux/hwmon-vid.h>
  #include <linux/err.h>
  #include <linux/mutex.h>
+#include <linux/regulator/consumer.h>
  
  /*
   * Addresses to scan
@@ -74,8 +75,6 @@
  
  static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
  
-enum chips { lm87, adm1024 };
-
  /*
   * The LM87 registers
   */
@@ -855,8 +854,26 @@ static int lm87_init_client(struct i2c_client *client)
  {
         struct lm87_data *data = i2c_get_clientdata(client);
         int rc;
-
-       if (dev_get_platdata(&client->dev)) {
+       struct device_node *of_node = client->dev.of_node;
+       u8 val = 0;
+       struct regulator *vcc = NULL;
+
+       if (of_node) {
+               if (of_property_read_bool(of_node, "has-temp3"))
+                       val |= CHAN_TEMP3;
+               if (of_property_read_bool(of_node, "has-in6"))
+                       val |= CHAN_NO_FAN(0);
+               if (of_property_read_bool(of_node, "has-in7"))
+                       val |= CHAN_NO_FAN(1);
+               vcc = devm_regulator_get_optional(&client->dev, "vcc");
+               if (!IS_ERR(vcc)) {
+                       if (regulator_get_voltage(vcc) == 5000000)
+                               val |= CHAN_VCC_5V;
+               }
+               data->channel = val;
+               lm87_write_value(client,
+                               LM87_REG_CHANNEL_MODE, data->channel);
+       } else if (dev_get_platdata(&client->dev)) {
                 data->channel = *(u8 *)dev_get_platdata(&client->dev);
                 lm87_write_value(client,
                                  LM87_REG_CHANNEL_MODE, data->channel);
@@ -962,16 +979,24 @@ static int lm87_probe(struct i2c_client *client, const struct i2c_device_id *id)
   */
  
  static const struct i2c_device_id lm87_id[] = {
-       { "lm87", lm87 },
-       { "adm1024", adm1024 },
+       { "lm87", 0 },
+       { "adm1024", 0 },
         { }
  };
  MODULE_DEVICE_TABLE(i2c, lm87_id);
  
+static const struct of_device_id lm87_of_match[] = {
+       { .compatible = "ti,lm87" },
+       { .compatible = "adi,adm1024" },
+       { },
+};
+MODULE_DEVICE_TABLE(of, lm87_of_match);
+
  static struct i2c_driver lm87_driver = {
         .class          = I2C_CLASS_HWMON,
         .driver = {
                 .name   = "lm87",
+               .of_match_table = lm87_of_match,
         },
         .probe          = lm87_probe,
         .id_table       = lm87_id,
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c

index aff5297bc2bcdc60a578e3df5a40b552796fa30a..c2f411c290bf9af92eddb67b525a02522449b0e3 100644 (file)
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -92,6 +92,7 @@
  #include <linux/hwmon.h>
  #include <linux/err.h>
  #include <linux/mutex.h>
+#include <linux/of_device.h>
  #include <linux/sysfs.h>
  #include <linux/interrupt.h>
  #include <linux/regulator/consumer.h>
@@ -235,6 +236,99 @@ static const struct i2c_device_id lm90_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, lm90_id);
  
+static const struct of_device_id lm90_of_match[] = {
+       {
+               .compatible = "adi,adm1032",
+               .data = (void *)adm1032
+       },
+       {
+               .compatible = "adi,adt7461",
+               .data = (void *)adt7461
+       },
+       {
+               .compatible = "adi,adt7461a",
+               .data = (void *)adt7461
+       },
+       {
+               .compatible = "gmt,g781",
+               .data = (void *)g781
+       },
+       {
+               .compatible = "national,lm90",
+               .data = (void *)lm90
+       },
+       {
+               .compatible = "national,lm86",
+               .data = (void *)lm86
+       },
+       {
+               .compatible = "national,lm89",
+               .data = (void *)lm86
+       },
+       {
+               .compatible = "national,lm99",
+               .data = (void *)lm99
+       },
+       {
+               .compatible = "dallas,max6646",
+               .data = (void *)max6646
+       },
+       {
+               .compatible = "dallas,max6647",
+               .data = (void *)max6646
+       },
+       {
+               .compatible = "dallas,max6649",
+               .data = (void *)max6646
+       },
+       {
+               .compatible = "dallas,max6657",
+               .data = (void *)max6657
+       },
+       {
+               .compatible = "dallas,max6658",
+               .data = (void *)max6657
+       },
+       {
+               .compatible = "dallas,max6659",
+               .data = (void *)max6659
+       },
+       {
+               .compatible = "dallas,max6680",
+               .data = (void *)max6680
+       },
+       {
+               .compatible = "dallas,max6681",
+               .data = (void *)max6680
+       },
+       {
+               .compatible = "dallas,max6695",
+               .data = (void *)max6696
+       },
+       {
+               .compatible = "dallas,max6696",
+               .data = (void *)max6696
+       },
+       {
+               .compatible = "onnn,nct1008",
+               .data = (void *)adt7461
+       },
+       {
+               .compatible = "winbond,w83l771",
+               .data = (void *)w83l771
+       },
+       {
+               .compatible = "nxp,sa56004",
+               .data = (void *)sa56004
+       },
+       {
+               .compatible = "ti,tmp451",
+               .data = (void *)tmp451
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, lm90_of_match);
+
  /*
   * chip type specific parameters
   */
@@ -1677,7 +1771,10 @@ static int lm90_probe(struct i2c_client *client,
         mutex_init(&data->update_lock);
  
         /* Set the device type */
-       data->kind = id->driver_data;
+       if (client->dev.of_node)
+               data->kind = (enum chips)of_device_get_match_data(&client->dev);
+       else
+               data->kind = id->driver_data;
         if (data->kind == adm1032) {
                 if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
                         client->flags &= ~I2C_CLIENT_PEC;
@@ -1816,6 +1913,7 @@ static struct i2c_driver lm90_driver = {
         .class          = I2C_CLASS_HWMON,
         .driver = {
                 .name   = "lm90",
+               .of_match_table = of_match_ptr(lm90_of_match),
         },
         .probe          = lm90_probe,
         .alert          = lm90_alert,
diff --git a/drivers/hwmon/lm95245.c b/drivers/hwmon/lm95245.c

index a3bfd88752ca253281bb15f5d3f55dafb3a92039..27cb06d65594680f483ccbe461b18a2441f30650 100644 (file)
--- a/drivers/hwmon/lm95245.c
+++ b/drivers/hwmon/lm95245.c
@@ -622,10 +622,18 @@ static const struct i2c_device_id lm95245_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, lm95245_id);
  
+static const struct of_device_id lm95245_of_match[] = {
+       { .compatible = "national,lm95235" },
+       { .compatible = "national,lm95245" },
+       { },
+};
+MODULE_DEVICE_TABLE(of, lm95245_of_match);
+
  static struct i2c_driver lm95245_driver = {
         .class          = I2C_CLASS_HWMON,
         .driver = {
                 .name   = "lm95245",
+               .of_match_table = of_match_ptr(lm95245_of_match),
         },
         .probe          = lm95245_probe,
         .id_table       = lm95245_id,
diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c

index c1b9275978f9d9ee9172e99e569de7dca48b491d..281491cca5103ad4b82e2f2a52be6f2408262bdb 100644 (file)
--- a/drivers/hwmon/max31790.c
+++ b/drivers/hwmon/max31790.c
@@ -311,7 +311,7 @@ static int max31790_write_pwm(struct device *dev, u32 attr, int channel,
                 data->pwm[channel] = val << 8;
                 err = i2c_smbus_write_word_swapped(client,
                                                    MAX31790_REG_PWMOUT(channel),
-                                                  val);
+                                                  data->pwm[channel]);
                 break;
         case hwmon_pwm_enable:
                 fan_config = data->fan_config[channel];
diff --git a/drivers/hwmon/max6697.c b/drivers/hwmon/max6697.c

index f03a71722849acc85f76af700d44f6cc532dc11c..221fd149205760371a0c128710c5773588ad1dc1 100644 (file)
--- a/drivers/hwmon/max6697.c
+++ b/drivers/hwmon/max6697.c
@@ -24,6 +24,7 @@
  #include <linux/hwmon-sysfs.h>
  #include <linux/err.h>
  #include <linux/mutex.h>
+#include <linux/of_device.h>
  #include <linux/of.h>
  
  #include <linux/platform_data/max6697.h>
@@ -632,7 +633,10 @@ static int max6697_probe(struct i2c_client *client,
         if (!data)
                 return -ENOMEM;
  
-       data->type = id->driver_data;
+       if (client->dev.of_node)
+               data->type = (enum chips)of_device_get_match_data(&client->dev);
+       else
+               data->type = id->driver_data;
         data->chip = &max6697_chip_data[data->type];
         data->client = client;
         mutex_init(&data->update_lock);
@@ -662,10 +666,56 @@ static const struct i2c_device_id max6697_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, max6697_id);
  
+static const struct of_device_id max6697_of_match[] = {
+       {
+               .compatible = "maxim,max6581",
+               .data = (void *)max6581
+       },
+       {
+               .compatible = "maxim,max6602",
+               .data = (void *)max6602
+       },
+       {
+               .compatible = "maxim,max6622",
+               .data = (void *)max6622
+       },
+       {
+               .compatible = "maxim,max6636",
+               .data = (void *)max6636
+       },
+       {
+               .compatible = "maxim,max6689",
+               .data = (void *)max6689
+       },
+       {
+               .compatible = "maxim,max6693",
+               .data = (void *)max6693
+       },
+       {
+               .compatible = "maxim,max6694",
+               .data = (void *)max6694
+       },
+       {
+               .compatible = "maxim,max6697",
+               .data = (void *)max6697
+       },
+       {
+               .compatible = "maxim,max6698",
+               .data = (void *)max6698
+       },
+       {
+               .compatible = "maxim,max6699",
+               .data = (void *)max6699
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, max6697_of_match);
+
  static struct i2c_driver max6697_driver = {
         .class = I2C_CLASS_HWMON,
         .driver = {
                 .name   = "max6697",
+               .of_match_table = of_match_ptr(max6697_of_match),
         },
         .probe = max6697_probe,
         .id_table = max6697_id,
diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c

index 4ab5293c7bf0d5589868efd15f6c596b783856a3..00d6995af4c23c50ea3e0de261bc641bd2ba88ee 100644 (file)
--- a/drivers/hwmon/pmbus/adm1275.c
+++ b/drivers/hwmon/pmbus/adm1275.c
@@ -101,8 +101,8 @@ static const struct coefficients adm1075_coefficients[] = {
         [0] = { 27169, 0, -1 },         /* voltage */
         [1] = { 806, 20475, -1 },       /* current, irange25 */
         [2] = { 404, 20475, -1 },       /* current, irange50 */
-       [3] = { 0, -1, 8549 },          /* power, irange25 */
-       [4] = { 0, -1, 4279 },          /* power, irange50 */
+       [3] = { 8549, 0, -1 },          /* power, irange25 */
+       [4] = { 4279, 0, -1 },          /* power, irange50 */
  };
  
  static const struct coefficients adm1275_coefficients[] = {
diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c

index 3e3aa950277f4340ee12451d764838a1341671c4..3518f0c0893447d07b18d1aeb0e21839bb6a954d 100644 (file)
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -21,6 +21,7 @@
  
  #include <linux/kernel.h>
  #include <linux/module.h>
+#include <linux/of_device.h>
  #include <linux/init.h>
  #include <linux/err.h>
  #include <linux/slab.h>
@@ -119,6 +120,35 @@ static const struct i2c_device_id ucd9000_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, ucd9000_id);
  
+static const struct of_device_id ucd9000_of_match[] = {
+       {
+               .compatible = "ti,ucd9000",
+               .data = (void *)ucd9000
+       },
+       {
+               .compatible = "ti,ucd90120",
+               .data = (void *)ucd90120
+       },
+       {
+               .compatible = "ti,ucd90124",
+               .data = (void *)ucd90124
+       },
+       {
+               .compatible = "ti,ucd90160",
+               .data = (void *)ucd90160
+       },
+       {
+               .compatible = "ti,ucd9090",
+               .data = (void *)ucd9090
+       },
+       {
+               .compatible = "ti,ucd90910",
+               .data = (void *)ucd90910
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, ucd9000_of_match);
+
  static int ucd9000_probe(struct i2c_client *client,
                          const struct i2c_device_id *id)
  {
@@ -126,6 +156,7 @@ static int ucd9000_probe(struct i2c_client *client,
         struct ucd9000_data *data;
         struct pmbus_driver_info *info;
         const struct i2c_device_id *mid;
+       enum chips chip;
         int i, ret;
  
         if (!i2c_check_functionality(client->adapter,
@@ -151,7 +182,12 @@ static int ucd9000_probe(struct i2c_client *client,
                 return -ENODEV;
         }
  
-       if (id->driver_data != ucd9000 && id->driver_data != mid->driver_data)
+       if (client->dev.of_node)
+               chip = (enum chips)of_device_get_match_data(&client->dev);
+       else
+               chip = id->driver_data;
+
+       if (chip != ucd9000 && chip != mid->driver_data)
                 dev_notice(&client->dev,
                            "Device mismatch: Configured %s, detected %s\n",
                            id->name, mid->name);
@@ -234,6 +270,7 @@ static int ucd9000_probe(struct i2c_client *client,
  static struct i2c_driver ucd9000_driver = {
         .driver = {
                 .name = "ucd9000",
+               .of_match_table = of_match_ptr(ucd9000_of_match),
         },
         .probe = ucd9000_probe,
         .remove = pmbus_do_remove,
diff --git a/drivers/hwmon/pmbus/ucd9200.c b/drivers/hwmon/pmbus/ucd9200.c

index 033d6aca47d3c2d7194ec9e935bfaa135f7778db..a8712c5ded4e939843f0e1626525bfc6cee78627 100644 (file)
--- a/drivers/hwmon/pmbus/ucd9200.c
+++ b/drivers/hwmon/pmbus/ucd9200.c
@@ -20,6 +20,7 @@
  
  #include <linux/kernel.h>
  #include <linux/module.h>
+#include <linux/of_device.h>
  #include <linux/init.h>
  #include <linux/err.h>
  #include <linux/slab.h>
@@ -46,12 +47,50 @@ static const struct i2c_device_id ucd9200_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, ucd9200_id);
  
+static const struct of_device_id ucd9200_of_match[] = {
+       {
+               .compatible = "ti,cd9200",
+               .data = (void *)ucd9200
+       },
+       {
+               .compatible = "ti,cd9220",
+               .data = (void *)ucd9220
+       },
+       {
+               .compatible = "ti,cd9222",
+               .data = (void *)ucd9222
+       },
+       {
+               .compatible = "ti,cd9224",
+               .data = (void *)ucd9224
+       },
+       {
+               .compatible = "ti,cd9240",
+               .data = (void *)ucd9240
+       },
+       {
+               .compatible = "ti,cd9244",
+               .data = (void *)ucd9244
+       },
+       {
+               .compatible = "ti,cd9246",
+               .data = (void *)ucd9246
+       },
+       {
+               .compatible = "ti,cd9248",
+               .data = (void *)ucd9248
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, ucd9200_of_match);
+
  static int ucd9200_probe(struct i2c_client *client,
                          const struct i2c_device_id *id)
  {
         u8 block_buffer[I2C_SMBUS_BLOCK_MAX + 1];
         struct pmbus_driver_info *info;
         const struct i2c_device_id *mid;
+       enum chips chip;
         int i, j, ret;
  
         if (!i2c_check_functionality(client->adapter,
@@ -76,7 +115,13 @@ static int ucd9200_probe(struct i2c_client *client,
                 dev_err(&client->dev, "Unsupported device\n");
                 return -ENODEV;
         }
-       if (id->driver_data != ucd9200 && id->driver_data != mid->driver_data)
+
+       if (client->dev.of_node)
+               chip = (enum chips)of_device_get_match_data(&client->dev);
+       else
+               chip = id->driver_data;
+
+       if (chip != ucd9200 && chip != mid->driver_data)
                 dev_notice(&client->dev,
                            "Device mismatch: Configured %s, detected %s\n",
                            id->name, mid->name);
@@ -167,6 +212,7 @@ static int ucd9200_probe(struct i2c_client *client,
  static struct i2c_driver ucd9200_driver = {
         .driver = {
                 .name = "ucd9200",
+               .of_match_table = of_match_ptr(ucd9200_of_match),
         },
         .probe = ucd9200_probe,
         .remove = pmbus_do_remove,
diff --git a/drivers/hwmon/stts751.c b/drivers/hwmon/stts751.c

index 55450680fb583388679da4d9930fc95c4dad753f..d56251d6eec2e49fcf43bc74b20306896e1f0e73 100644 (file)
--- a/drivers/hwmon/stts751.c
+++ b/drivers/hwmon/stts751.c
@@ -85,6 +85,12 @@ static const struct i2c_device_id stts751_id[] = {
         { }
  };
  
+static const struct of_device_id stts751_of_match[] = {
+       { .compatible = "stts751" },
+       { },
+};
+MODULE_DEVICE_TABLE(of, stts751_of_match);
+
  struct stts751_priv {
         struct device *dev;
         struct i2c_client *client;
@@ -819,6 +825,7 @@ static struct i2c_driver stts751_driver = {
         .class          = I2C_CLASS_HWMON,
         .driver = {
                 .name   = DEVNAME,
+               .of_match_table = of_match_ptr(stts751_of_match),
         },
         .probe          = stts751_probe,
         .id_table       = stts751_id,
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c

index 36bba2a816a41388c3b94233886ccc64af659439..5eafbaada7958d81b530637b5ab5c2a3a29066b0 100644 (file)
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -323,8 +323,15 @@ static const struct i2c_device_id tmp102_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, tmp102_id);
  
+static const struct of_device_id tmp102_of_match[] = {
+       { .compatible = "ti,tmp102" },
+       { },
+};
+MODULE_DEVICE_TABLE(of, tmp102_of_match);
+
  static struct i2c_driver tmp102_driver = {
         .driver.name    = DRIVER_NAME,
+       .driver.of_match_table = of_match_ptr(tmp102_of_match),
         .driver.pm      = &tmp102_dev_pm_ops,
         .probe          = tmp102_probe,
         .id_table       = tmp102_id,
diff --git a/drivers/hwmon/tmp103.c b/drivers/hwmon/tmp103.c

index ad571ec795a3354112c3592ecebef641d15b6186..7f85b14544df843b54cdc54b9fb3f3da2474ec94 100644 (file)
--- a/drivers/hwmon/tmp103.c
+++ b/drivers/hwmon/tmp103.c
@@ -150,8 +150,7 @@ static int tmp103_probe(struct i2c_client *client,
         return PTR_ERR_OR_ZERO(hwmon_dev);
  }
  
-#ifdef CONFIG_PM
-static int tmp103_suspend(struct device *dev)
+static int __maybe_unused tmp103_suspend(struct device *dev)
  {
         struct regmap *regmap = dev_get_drvdata(dev);
  
@@ -159,7 +158,7 @@ static int tmp103_suspend(struct device *dev)
                                   TMP103_CONF_SD_MASK, 0);
  }
  
-static int tmp103_resume(struct device *dev)
+static int __maybe_unused tmp103_resume(struct device *dev)
  {
         struct regmap *regmap = dev_get_drvdata(dev);
  
@@ -167,15 +166,7 @@ static int tmp103_resume(struct device *dev)
                                   TMP103_CONF_SD_MASK, TMP103_CONF_SD);
  }
  
-static const struct dev_pm_ops tmp103_dev_pm_ops = {
-       .suspend        = tmp103_suspend,
-       .resume         = tmp103_resume,
-};
-
-#define TMP103_DEV_PM_OPS (&tmp103_dev_pm_ops)
-#else
-#define        TMP103_DEV_PM_OPS NULL
-#endif /* CONFIG_PM */
+static SIMPLE_DEV_PM_OPS(tmp103_dev_pm_ops, tmp103_suspend, tmp103_resume);
  
  static const struct i2c_device_id tmp103_id[] = {
         { "tmp103", 0 },
@@ -183,10 +174,17 @@ static const struct i2c_device_id tmp103_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, tmp103_id);
  
+static const struct of_device_id tmp103_of_match[] = {
+       { .compatible = "ti,tmp103" },
+       { },
+};
+MODULE_DEVICE_TABLE(of, tmp103_of_match);
+
  static struct i2c_driver tmp103_driver = {
         .driver = {
                 .name   = "tmp103",
-               .pm     = TMP103_DEV_PM_OPS,
+               .of_match_table = of_match_ptr(tmp103_of_match),
+               .pm     = &tmp103_dev_pm_ops,
         },
         .probe          = tmp103_probe,
         .id_table       = tmp103_id,
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c

index bfb98b96c7813e4404cac497444d0d6e572a7aff..e36399213324d9ab99e6d59f1ac1621445e2a9e2 100644 (file)
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -29,6 +29,7 @@
  #include <linux/hwmon-sysfs.h>
  #include <linux/err.h>
  #include <linux/mutex.h>
+#include <linux/of_device.h>
  #include <linux/sysfs.h>
  
  /* Addresses to scan */
@@ -69,6 +70,31 @@ static const struct i2c_device_id tmp421_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, tmp421_id);
  
+static const struct of_device_id tmp421_of_match[] = {
+       {
+               .compatible = "ti,tmp421",
+               .data = (void *)2
+       },
+       {
+               .compatible = "ti,tmp422",
+               .data = (void *)3
+       },
+       {
+               .compatible = "ti,tmp423",
+               .data = (void *)4
+       },
+       {
+               .compatible = "ti,tmp441",
+               .data = (void *)2
+       },
+       {
+               .compatible = "ti,tmp422",
+               .data = (void *)3
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, tmp421_of_match);
+
  struct tmp421_data {
         struct i2c_client *client;
         struct mutex update_lock;
@@ -78,7 +104,7 @@ struct tmp421_data {
         struct hwmon_chip_info chip;
         char valid;
         unsigned long last_updated;
-       int channels;
+       unsigned long channels;
         u8 config;
         s16 temp[4];
  };
@@ -272,7 +298,11 @@ static int tmp421_probe(struct i2c_client *client,
                 return -ENOMEM;
  
         mutex_init(&data->update_lock);
-       data->channels = id->driver_data;
+       if (client->dev.of_node)
+               data->channels = (unsigned long)
+                       of_device_get_match_data(&client->dev);
+       else
+               data->channels = id->driver_data;
         data->client = client;
  
         err = tmp421_init_client(client);
@@ -301,6 +331,7 @@ static struct i2c_driver tmp421_driver = {
         .class = I2C_CLASS_HWMON,
         .driver = {
                 .name   = "tmp421",
+               .of_match_table = of_match_ptr(tmp421_of_match),
         },
         .probe = tmp421_probe,
         .id_table = tmp421_id,
diff --git a/drivers/hwmon/twl4030-madc-hwmon.c b/drivers/hwmon/twl4030-madc-hwmon.c

deleted file mode 100644 (file)

index b5caf7f..0000000
--- a/drivers/hwmon/twl4030-madc-hwmon.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- *
- * TWL4030 MADC Hwmon driver-This driver monitors the real time
- * conversion of analog signals like battery temperature,
- * battery type, battery level etc. User can ask for the conversion on a
- * particular channel using the sysfs nodes.
- *
- * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
- * J Keerthy <j-keerthy@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/i2c/twl.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/i2c/twl4030-madc.h>
-#include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
-#include <linux/stddef.h>
-#include <linux/sysfs.h>
-#include <linux/err.h>
-#include <linux/types.h>
-
-/*
- * sysfs hook function
- */
-static ssize_t madc_read(struct device *dev,
-                        struct device_attribute *devattr, char *buf)
-{
-       struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-       struct twl4030_madc_request req = {
-               .channels = 1 << attr->index,
-               .method = TWL4030_MADC_SW2,
-               .type = TWL4030_MADC_WAIT,
-       };
-       long val;
-
-       val = twl4030_madc_conversion(&req);
-       if (val < 0)
-               return val;
-
-       return sprintf(buf, "%d\n", req.rbuf[attr->index]);
-}
-
-/* sysfs nodes to read individual channels from user side */
-static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, madc_read, NULL, 0);
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, madc_read, NULL, 1);
-static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, madc_read, NULL, 2);
-static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, madc_read, NULL, 3);
-static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, madc_read, NULL, 4);
-static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, madc_read, NULL, 5);
-static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO, madc_read, NULL, 6);
-static SENSOR_DEVICE_ATTR(in7_input, S_IRUGO, madc_read, NULL, 7);
-static SENSOR_DEVICE_ATTR(in8_input, S_IRUGO, madc_read, NULL, 8);
-static SENSOR_DEVICE_ATTR(in9_input, S_IRUGO, madc_read, NULL, 9);
-static SENSOR_DEVICE_ATTR(curr10_input, S_IRUGO, madc_read, NULL, 10);
-static SENSOR_DEVICE_ATTR(in11_input, S_IRUGO, madc_read, NULL, 11);
-static SENSOR_DEVICE_ATTR(in12_input, S_IRUGO, madc_read, NULL, 12);
-static SENSOR_DEVICE_ATTR(in15_input, S_IRUGO, madc_read, NULL, 15);
-
-static struct attribute *twl4030_madc_attrs[] = {
-       &sensor_dev_attr_in0_input.dev_attr.attr,
-       &sensor_dev_attr_temp1_input.dev_attr.attr,
-       &sensor_dev_attr_in2_input.dev_attr.attr,
-       &sensor_dev_attr_in3_input.dev_attr.attr,
-       &sensor_dev_attr_in4_input.dev_attr.attr,
-       &sensor_dev_attr_in5_input.dev_attr.attr,
-       &sensor_dev_attr_in6_input.dev_attr.attr,
-       &sensor_dev_attr_in7_input.dev_attr.attr,
-       &sensor_dev_attr_in8_input.dev_attr.attr,
-       &sensor_dev_attr_in9_input.dev_attr.attr,
-       &sensor_dev_attr_curr10_input.dev_attr.attr,
-       &sensor_dev_attr_in11_input.dev_attr.attr,
-       &sensor_dev_attr_in12_input.dev_attr.attr,
-       &sensor_dev_attr_in15_input.dev_attr.attr,
-       NULL
-};
-ATTRIBUTE_GROUPS(twl4030_madc);
-
-static int twl4030_madc_hwmon_probe(struct platform_device *pdev)
-{
-       struct device *hwmon;
-
-       hwmon = devm_hwmon_device_register_with_groups(&pdev->dev,
-                                                      "twl4030_madc", NULL,
-                                                      twl4030_madc_groups);
-       return PTR_ERR_OR_ZERO(hwmon);
-}
-
-static struct platform_driver twl4030_madc_hwmon_driver = {
-       .probe = twl4030_madc_hwmon_probe,
-       .driver = {
-                  .name = "twl4030_madc_hwmon",
-                  },
-};
-
-module_platform_driver(twl4030_madc_hwmon_driver);
-
-MODULE_DESCRIPTION("TWL4030 ADC Hwmon driver");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("J Keerthy");
-MODULE_ALIAS("platform:twl4030_madc_hwmon");
diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c

index ab346ed142debaa7f51fb73d3addad96aa89d01b..ad68b6d9ff17e8adeeac55124845ab86d0577374 100644 (file)
--- a/drivers/hwmon/w83627ehf.c
+++ b/drivers/hwmon/w83627ehf.c
@@ -135,11 +135,16 @@ superio_select(int ioreg, int ld)
         outb(ld, ioreg + 1);
  }
  
-static inline void
+static inline int
  superio_enter(int ioreg)
  {
+       if (!request_muxed_region(ioreg, 2, DRVNAME))
+               return -EBUSY;
+
         outb(0x87, ioreg);
         outb(0x87, ioreg);
+
+       return 0;
  }
  
  static inline void
@@ -148,6 +153,7 @@ superio_exit(int ioreg)
         outb(0xaa, ioreg);
         outb(0x02, ioreg);
         outb(0x02, ioreg + 1);
+       release_region(ioreg, 2);
  }
  
  /*
@@ -1970,8 +1976,6 @@ w83627ehf_check_fan_inputs(const struct w83627ehf_sio_data *sio_data,
                 return;
         }
  
-       superio_enter(sio_data->sioreg);
-
         /* fan4 and fan5 share some pins with the GPIO and serial flash */
         if (sio_data->kind == nct6775) {
                 /* On NCT6775, fan4 shares pins with the fdc interface */
@@ -2013,8 +2017,6 @@ w83627ehf_check_fan_inputs(const struct w83627ehf_sio_data *sio_data,
                 fan4min = fan4pin;
         }
  
-       superio_exit(sio_data->sioreg);
-
         data->has_fan = data->has_fan_min = 0x03; /* fan1 and fan2 */
         data->has_fan |= (fan3pin << 2);
         data->has_fan_min |= (fan3pin << 2);
@@ -2352,7 +2354,11 @@ static int w83627ehf_probe(struct platform_device *pdev)
         w83627ehf_init_device(data, sio_data->kind);
  
         data->vrm = vid_which_vrm();
-       superio_enter(sio_data->sioreg);
+
+       err = superio_enter(sio_data->sioreg);
+       if (err)
+               goto exit_release;
+
         /* Read VID value */
         if (sio_data->kind == w83667hg || sio_data->kind == w83667hg_b ||
             sio_data->kind == nct6775 || sio_data->kind == nct6776) {
@@ -2364,8 +2370,10 @@ static int w83627ehf_probe(struct platform_device *pdev)
                 superio_select(sio_data->sioreg, W83667HG_LD_VID);
                 data->vid = superio_inb(sio_data->sioreg, 0xe3);
                 err = device_create_file(dev, &dev_attr_cpu0_vid);
-               if (err)
+               if (err) {
+                       superio_exit(sio_data->sioreg);
                         goto exit_release;
+               }
         } else if (sio_data->kind != w83627uhg) {
                 superio_select(sio_data->sioreg, W83627EHF_LD_HWM);
                 if (superio_inb(sio_data->sioreg, SIO_REG_VID_CTRL) & 0x80) {
@@ -2401,8 +2409,10 @@ static int w83627ehf_probe(struct platform_device *pdev)
                                 data->vid &= 0x3f;
  
                         err = device_create_file(dev, &dev_attr_cpu0_vid);
-                       if (err)
+                       if (err) {
+                               superio_exit(sio_data->sioreg);
                                 goto exit_release;
+                       }
                 } else {
                         dev_info(dev,
                                  "VID pins in output mode, CPU VID not available\n");
@@ -2424,10 +2434,10 @@ static int w83627ehf_probe(struct platform_device *pdev)
                 pr_info("Enabled fan debounce for chip %s\n", data->name);
         }
  
-       superio_exit(sio_data->sioreg);
-
         w83627ehf_check_fan_inputs(sio_data, data);
  
+       superio_exit(sio_data->sioreg);
+
         /* Read fan clock dividers immediately */
         w83627ehf_update_fan_div_common(dev, data);
  
@@ -2712,8 +2722,11 @@ static int __init w83627ehf_find(int sioaddr, unsigned short *addr,
  
         u16 val;
         const char *sio_name;
+       int err;
  
-       superio_enter(sioaddr);
+       err = superio_enter(sioaddr);
+       if (err)
+               return err;
  
         if (force_id)
                 val = force_id;
diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c

index cdd9b3b26195aa38f03a910d10e483bca61edcca..7563eceeaaeaa3a4e70855a6d9622e960b0ab6c0 100644 (file)
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -221,8 +221,10 @@ static int intel_th_output_activate(struct intel_th_device *thdev)
         else
                 intel_th_trace_enable(thdev);
  
-       if (ret)
+       if (ret) {
                 pm_runtime_put(&thdev->dev);
+               module_put(thdrv->driver.owner);
+       }
  
         return ret;
  }
diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c

index 0bba3842336e6d6b9cb9a2b78ca91d7ff2712dcd..590cf90dd21a61a6855dd99885c99da7d2dbc33a 100644 (file)
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -85,6 +85,16 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
                 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa2a6),
                 .driver_data = (kernel_ulong_t)0,
         },
+       {
+               /* Denverton */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x19e1),
+               .driver_data = (kernel_ulong_t)0,
+       },
+       {
+               /* Gemini Lake */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e),
+               .driver_data = (kernel_ulong_t)0,
+       },
         { 0 },
  };
  
diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c

index 0652281662a8b35b974d084c5d0636f8ac75e450..78792b4d6437c7cca6d84fd4977773f0c65781e2 100644 (file)
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -465,6 +465,7 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
         u8 *tmp_buf;
         int len = 0;
         int xfersz = brcmstb_i2c_get_xfersz(dev);
+       u32 cond, cond_per_msg;
  
         if (dev->is_suspended)
                 return -EBUSY;
@@ -481,10 +482,11 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
                         pmsg->buf ? pmsg->buf[0] : '0', pmsg->len);
  
                 if (i < (num - 1) && (msgs[i + 1].flags & I2C_M_NOSTART))
-                       brcmstb_set_i2c_start_stop(dev, ~(COND_START_STOP));
+                       cond = ~COND_START_STOP;
                 else
-                       brcmstb_set_i2c_start_stop(dev,
-                                                  COND_RESTART | COND_NOSTOP);
+                       cond = COND_RESTART | COND_NOSTOP;
+
+               brcmstb_set_i2c_start_stop(dev, cond);
  
                 /* Send slave address */
                 if (!(pmsg->flags & I2C_M_NOSTART)) {
@@ -497,13 +499,24 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
                         }
                 }
  
+               cond_per_msg = cond;
+
                 /* Perform data transfer */
                 while (len) {
                         bytes_to_xfer = min(len, xfersz);
  
-                       if (len <= xfersz && i == (num - 1))
-                               brcmstb_set_i2c_start_stop(dev,
-                                                          ~(COND_START_STOP));
+                       if (len <= xfersz) {
+                               if (i == (num - 1))
+                                       cond_per_msg = cond_per_msg &
+                                               ~(COND_RESTART | COND_NOSTOP);
+                               else
+                                       cond_per_msg = cond;
+                       } else {
+                               cond_per_msg = (cond_per_msg & ~COND_RESTART) |
+                                       COND_NOSTOP;
+                       }
+
+                       brcmstb_set_i2c_start_stop(dev, cond_per_msg);
  
                         rc = brcmstb_i2c_xfer_bsc_data(dev, tmp_buf,
                                                        bytes_to_xfer, pmsg);
@@ -512,6 +525,8 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
  
                         len -=  bytes_to_xfer;
                         tmp_buf += bytes_to_xfer;
+
+                       cond_per_msg = COND_NOSTART | COND_NOSTOP;
                 }
         }
  
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h

index c1db3a5a340f599b6bee5c0165112703e2e082fc..d9aaf1790e0eff58dc6b26ca9d4881058e890f79 100644 (file)
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -88,6 +88,7 @@ struct dw_i2c_dev {
         void __iomem            *base;
         struct completion       cmd_complete;
         struct clk              *clk;
+       struct reset_control    *rst;
         u32                     (*get_clk_rate_khz) (struct dw_i2c_dev *dev);
         struct dw_pci_controller *controller;
         int                     cmd_err;
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c

index 6ce4313231257f8251b62e02f5aaa390a4619edf..79c4b4ea053969e46226dc749fb12a5b59c3ac2c 100644 (file)
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -38,6 +38,7 @@
  #include <linux/pm_runtime.h>
  #include <linux/property.h>
  #include <linux/io.h>
+#include <linux/reset.h>
  #include <linux/slab.h>
  #include <linux/acpi.h>
  #include <linux/platform_data/i2c-designware.h>
@@ -199,6 +200,14 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
         dev->irq = irq;
         platform_set_drvdata(pdev, dev);
  
+       dev->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
+       if (IS_ERR(dev->rst)) {
+               if (PTR_ERR(dev->rst) == -EPROBE_DEFER)
+                       return -EPROBE_DEFER;
+       } else {
+               reset_control_deassert(dev->rst);
+       }
+
         if (pdata) {
                 dev->clk_freq = pdata->i2c_scl_freq;
         } else {
@@ -235,12 +244,13 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
             && dev->clk_freq != 1000000 && dev->clk_freq != 3400000) {
                 dev_err(&pdev->dev,
                         "Only 100kHz, 400kHz, 1MHz and 3.4MHz supported");
-               return -EINVAL;
+               r = -EINVAL;
+               goto exit_reset;
         }
  
         r = i2c_dw_eval_lock_support(dev);
         if (r)
-               return r;
+               goto exit_reset;
  
         dev->functionality = I2C_FUNC_10BIT_ADDR | DW_IC_DEFAULT_FUNCTIONALITY;
  
@@ -286,10 +296,18 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
         }
  
         r = i2c_dw_probe(dev);
-       if (r && !dev->pm_runtime_disabled)
-               pm_runtime_disable(&pdev->dev);
+       if (r)
+               goto exit_probe;
  
         return r;
+
+exit_probe:
+       if (!dev->pm_runtime_disabled)
+               pm_runtime_disable(&pdev->dev);
+exit_reset:
+       if (!IS_ERR_OR_NULL(dev->rst))
+               reset_control_assert(dev->rst);
+       return r;
  }
  
  static int dw_i2c_plat_remove(struct platform_device *pdev)
@@ -306,6 +324,8 @@ static int dw_i2c_plat_remove(struct platform_device *pdev)
         pm_runtime_put_sync(&pdev->dev);
         if (!dev->pm_runtime_disabled)
                 pm_runtime_disable(&pdev->dev);
+       if (!IS_ERR_OR_NULL(dev->rst))
+               reset_control_assert(dev->rst);
  
         return 0;
  }
diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c

index cbd93ce0661f225dd0492baef8e91a255079ac21..736a82472101733d7ada08b360c9f159c35371cb 100644 (file)
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -457,7 +457,6 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
  
         int_status = readl(i2c->regs + HSI2C_INT_STATUS);
         writel(int_status, i2c->regs + HSI2C_INT_STATUS);
-       trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
  
         /* handle interrupt related to the transfer status */
         if (i2c->variant->hw == HSI2C_EXYNOS7) {
@@ -482,11 +481,13 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
                         goto stop;
                 }
  
+               trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
                 if ((trans_status & HSI2C_MASTER_ST_MASK) == HSI2C_MASTER_ST_LOSE) {
                         i2c->state = -EAGAIN;
                         goto stop;
                 }
         } else if (int_status & HSI2C_INT_I2C) {
+               trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
                 if (trans_status & HSI2C_NO_DEV_ACK) {
                         dev_dbg(i2c->dev, "No ACK from device\n");
                         i2c->state = -ENXIO;
diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c

index 2aa61bbbd307b9aead3730e137bbf5b95e29a366..73b97c71a484ee186fcb488c1b758cbb04178898 100644 (file)
--- a/drivers/i2c/busses/i2c-meson.c
+++ b/drivers/i2c/busses/i2c-meson.c
@@ -175,7 +175,7 @@ static void meson_i2c_put_data(struct meson_i2c *i2c, char *buf, int len)
                 wdata1 |= *buf++ << ((i - 4) * 8);
  
         writel(wdata0, i2c->regs + REG_TOK_WDATA0);
-       writel(wdata0, i2c->regs + REG_TOK_WDATA1);
+       writel(wdata1, i2c->regs + REG_TOK_WDATA1);
  
         dev_dbg(i2c->dev, "%s: data %08x %08x len %d\n", __func__,
                 wdata0, wdata1, len);
diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c

index 4a7d9bc2142ba31db7579ed140cfbe6ed5224c02..45d61714c81bd2cfdec86e7f95ffaaa11abc5565 100644 (file)
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -172,14 +172,6 @@ static const struct i2c_adapter_quirks mt6577_i2c_quirks = {
         .max_comb_2nd_msg_len = 31,
  };
  
-static const struct i2c_adapter_quirks mt8173_i2c_quirks = {
-       .max_num_msgs = 65535,
-       .max_write_len = 65535,
-       .max_read_len = 65535,
-       .max_comb_1st_msg_len = 65535,
-       .max_comb_2nd_msg_len = 65535,
-};
-
  static const struct mtk_i2c_compatible mt6577_compat = {
         .quirks = &mt6577_i2c_quirks,
         .pmic_i2c = 0,
@@ -199,7 +191,6 @@ static const struct mtk_i2c_compatible mt6589_compat = {
  };
  
  static const struct mtk_i2c_compatible mt8173_compat = {
-       .quirks = &mt8173_i2c_quirks,
         .pmic_i2c = 0,
         .dcm = 1,
         .auto_restart = 1,
diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c

index 8f11d347b3ec482815e37d3170fa6abef4537c31..c811af4c8d817bcf353068bf2e3f95f56953155b 100644 (file)
--- a/drivers/i2c/busses/i2c-riic.c
+++ b/drivers/i2c/busses/i2c-riic.c
@@ -218,8 +218,12 @@ static irqreturn_t riic_tend_isr(int irq, void *data)
         }
  
         if (riic->is_last || riic->err) {
-               riic_clear_set_bit(riic, 0, ICIER_SPIE, RIIC_ICIER);
+               riic_clear_set_bit(riic, ICIER_TEIE, ICIER_SPIE, RIIC_ICIER);
                 writeb(ICCR2_SP, riic->base + RIIC_ICCR2);
+       } else {
+               /* Transfer is complete, but do not send STOP */
+               riic_clear_set_bit(riic, ICIER_TEIE, 0, RIIC_ICIER);
+               complete(&riic->msg_done);
         }
  
         return IRQ_HANDLED;
diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c

index 83768e85a919cb5c6eb60af01ce5918027600962..2178266bca794825e948ce275d48a1b13064056c 100644 (file)
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -429,6 +429,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
         while (muxc->num_adapters) {
                 struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters];
                 struct i2c_mux_priv *priv = adap->algo_data;
+               struct device_node *np = adap->dev.of_node;
  
                 muxc->adapter[muxc->num_adapters] = NULL;
  
@@ -438,6 +439,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
  
                 sysfs_remove_link(&priv->adap.dev.kobj, "mux_device");
                 i2c_del_adapter(adap);
+               of_node_put(np);
                 kfree(priv);
         }
  }
diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c

index dfc1c0e37c4022b66da0facee93af030c70c4e73..ad31d21da3165f73ecc8f7beb1feef4fc186cbe5 100644 (file)
--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
+++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
@@ -35,7 +35,6 @@
   * warranty of any kind, whether express or implied.
   */
  
-#include <linux/acpi.h>
  #include <linux/device.h>
  #include <linux/gpio/consumer.h>
  #include <linux/i2c.h>
@@ -117,6 +116,10 @@ static const struct chip_desc chips[] = {
                 .has_irq = 1,
                 .muxtype = pca954x_isswi,
         },
+       [pca_9546] = {
+               .nchans = 4,
+               .muxtype = pca954x_isswi,
+       },
         [pca_9547] = {
                 .nchans = 8,
                 .enable = 0x8,
@@ -134,28 +137,13 @@ static const struct i2c_device_id pca954x_id[] = {
         { "pca9543", pca_9543 },
         { "pca9544", pca_9544 },
         { "pca9545", pca_9545 },
-       { "pca9546", pca_9545 },
+       { "pca9546", pca_9546 },
         { "pca9547", pca_9547 },
         { "pca9548", pca_9548 },
         { }
  };
  MODULE_DEVICE_TABLE(i2c, pca954x_id);
  
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id pca954x_acpi_ids[] = {
-       { .id = "PCA9540", .driver_data = pca_9540 },
-       { .id = "PCA9542", .driver_data = pca_9542 },
-       { .id = "PCA9543", .driver_data = pca_9543 },
-       { .id = "PCA9544", .driver_data = pca_9544 },
-       { .id = "PCA9545", .driver_data = pca_9545 },
-       { .id = "PCA9546", .driver_data = pca_9545 },
-       { .id = "PCA9547", .driver_data = pca_9547 },
-       { .id = "PCA9548", .driver_data = pca_9548 },
-       { }
-};
-MODULE_DEVICE_TABLE(acpi, pca954x_acpi_ids);
-#endif
-
  #ifdef CONFIG_OF
  static const struct of_device_id pca954x_of_match[] = {
         { .compatible = "nxp,pca9540", .data = &chips[pca_9540] },
@@ -393,17 +381,8 @@ static int pca954x_probe(struct i2c_client *client,
         match = of_match_device(of_match_ptr(pca954x_of_match), &client->dev);
         if (match)
                 data->chip = of_device_get_match_data(&client->dev);
-       else if (id)
+       else
                 data->chip = &chips[id->driver_data];
-       else {
-               const struct acpi_device_id *acpi_id;
-
-               acpi_id = acpi_match_device(ACPI_PTR(pca954x_acpi_ids),
-                                               &client->dev);
-               if (!acpi_id)
-                       return -ENODEV;
-               data->chip = &chips[acpi_id->driver_data];
-       }
  
         data->last_chan = 0;               /* force the first selection */
  
@@ -492,7 +471,6 @@ static struct i2c_driver pca954x_driver = {
                 .name   = "pca954x",
                 .pm     = &pca954x_pm,
                 .of_match_table = of_match_ptr(pca954x_of_match),
-               .acpi_match_table = ACPI_PTR(pca954x_acpi_ids),
         },
         .probe          = pca954x_probe,
         .remove         = pca954x_remove,
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c

index feb30061123bc07c14b7d1ae0b5b7ce19d533790..5901937284e70dcd8e67260feed755c537730e14 100644 (file)
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -107,7 +107,8 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
         memcpy(scsi_req(rq)->cmd, pc->c, 12);
         if (drive->media == ide_tape)
                 scsi_req(rq)->cmd[13] = REQ_IDETAPE_PC1;
-       error = blk_execute_rq(drive->queue, disk, rq, 0);
+       blk_execute_rq(drive->queue, disk, rq, 0);
+       error = scsi_req(rq)->result ? -EIO : 0;
  put_req:
         blk_put_request(rq);
         return error;
@@ -454,7 +455,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
                         debug_log("%s: I/O error\n", drive->name);
  
                         if (drive->media != ide_tape)
-                               pc->rq->errors++;
+                               scsi_req(pc->rq)->result++;
  
                         if (scsi_req(rq)->cmd[0] == REQUEST_SENSE) {
                                 printk(KERN_ERR PFX "%s: I/O error in request "
@@ -488,13 +489,13 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
                         drive->failed_pc = NULL;
  
                 if (ata_misc_request(rq)) {
-                       rq->errors = 0;
+                       scsi_req(rq)->result = 0;
                         error = 0;
                 } else {
  
                         if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
-                               if (rq->errors == 0)
-                                       rq->errors = -EIO;
+                               if (scsi_req(rq)->result == 0)
+                                       scsi_req(rq)->result = -EIO;
                         }
  
                         error = uptodate ? 0 : -EIO;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c

index 74f1b7dc03f73c444ec2a6d4728304134d88c92a..07e5ff3a64c330b7ef028cc3b85d4bc820393a28 100644 (file)
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -247,10 +247,10 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq)
  
         struct cdrom_info *info = drive->driver_data;
  
-       if (!rq->errors)
+       if (!scsi_req(rq)->result)
                 info->write_timeout = jiffies + ATAPI_WAIT_WRITE_BUSY;
  
-       rq->errors = 1;
+       scsi_req(rq)->result = 1;
  
         if (time_after(jiffies, info->write_timeout))
                 return 0;
@@ -294,8 +294,8 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
         }
  
         /* if we have an error, pass CHECK_CONDITION as the SCSI status byte */
-       if (blk_rq_is_scsi(rq) && !rq->errors)
-               rq->errors = SAM_STAT_CHECK_CONDITION;
+       if (blk_rq_is_scsi(rq) && !scsi_req(rq)->result)
+               scsi_req(rq)->result = SAM_STAT_CHECK_CONDITION;
  
         if (blk_noretry_request(rq))
                 do_end_request = 1;
@@ -325,7 +325,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
                  * Arrange to retry the request but be sure to give up if we've
                  * retried too many times.
                  */
-               if (++rq->errors > ERROR_MAX)
+               if (++scsi_req(rq)->result > ERROR_MAX)
                         do_end_request = 1;
                 break;
         case ILLEGAL_REQUEST:
@@ -372,7 +372,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
                         /* go to the default handler for other errors */
                         ide_error(drive, "cdrom_decode_status", stat);
                         return 1;
-               } else if (++rq->errors > ERROR_MAX)
+               } else if (++scsi_req(rq)->result > ERROR_MAX)
                         /* we've racked up too many retries, abort */
                         do_end_request = 1;
         }
@@ -452,7 +452,8 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
                         }
                 }
  
-               error = blk_execute_rq(drive->queue, info->disk, rq, 0);
+               blk_execute_rq(drive->queue, info->disk, rq, 0);
+               error = scsi_req(rq)->result ? -EIO : 0;
  
                 if (buffer)
                         *bufflen = scsi_req(rq)->resid_len;
@@ -683,8 +684,8 @@ out_end:
                         if (cmd->nleft == 0)
                                 uptodate = 1;
                 } else {
-                       if (uptodate <= 0 && rq->errors == 0)
-                               rq->errors = -EIO;
+                       if (uptodate <= 0 && scsi_req(rq)->result == 0)
+                               scsi_req(rq)->result = -EIO;
                 }
  
                 if (uptodate == 0 && rq->bio)
@@ -1379,7 +1380,7 @@ static int ide_cdrom_prep_pc(struct request *rq)
          * appropriate action
          */
         if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) {
-               rq->errors = ILLEGAL_REQUEST;
+               scsi_req(rq)->result = ILLEGAL_REQUEST;
                 return BLKPREP_KILL;
         }
  
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c

index 9fcefbc8425e748cf27b4967f2a16fcc67605965..55cd736c39c6795c80c1c9ed96a03c24c17bd700 100644 (file)
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -307,7 +307,8 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
         scsi_req_init(rq);
         ide_req(rq)->type = ATA_PRIV_MISC;
         rq->rq_flags = RQF_QUIET;
-       ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
+       blk_execute_rq(drive->queue, cd->disk, rq, 0);
+       ret = scsi_req(rq)->result ? -EIO : 0;
         blk_put_request(rq);
         /*
          * A reset will unlock the door. If it was previously locked,
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c

index a45dda5386e4403206ee73a18b8bb8b16b012e65..9b69c32ee560119bd3c74efe09ff0b48a139063a 100644 (file)
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -173,8 +173,8 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
         *(int *)&scsi_req(rq)->cmd[1] = arg;
         rq->special = setting->set;
  
-       if (blk_execute_rq(q, NULL, rq, 0))
-               ret = rq->errors;
+       blk_execute_rq(q, NULL, rq, 0);
+       ret = scsi_req(rq)->result;
         blk_put_request(rq);
  
         return ret;
@@ -186,7 +186,7 @@ ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq)
  
         err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]);
         if (err)
-               rq->errors = err;
-       ide_complete_rq(drive, err, blk_rq_bytes(rq));
+               scsi_req(rq)->result = err;
+       ide_complete_rq(drive, 0, blk_rq_bytes(rq));
         return ide_stopped;
  }
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c

index 186159715b71c6fd7a01eb11bc92239487c4dc7e..7c06237f3479521e083b76469392c8a489ff2dcc 100644 (file)
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -470,7 +470,6 @@ ide_devset_get(multcount, mult_count);
  static int set_multcount(ide_drive_t *drive, int arg)
  {
         struct request *rq;
-       int error;
  
         if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
                 return -EINVAL;
@@ -484,7 +483,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
  
         drive->mult_req = arg;
         drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
-       error = blk_execute_rq(drive->queue, NULL, rq, 0);
+       blk_execute_rq(drive->queue, NULL, rq, 0);
         blk_put_request(rq);
  
         return (drive->mult_count == arg) ? 0 : -EIO;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c

index 17a65ac56491881117edbec9b28518e9cd95f12d..51c81223e56d07d16645c978f968872316222e06 100644 (file)
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -490,7 +490,7 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
          * make sure request is sane
          */
         if (hwif->rq)
-               hwif->rq->errors = 0;
+               scsi_req(hwif->rq)->result = 0;
         return ret;
  }
  
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c

index cf3af68403689a62f9c237d7d1ced636a1aca6b5..4b7ffd7d158dc23852c0989055b26d61c71277c3 100644 (file)
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -12,7 +12,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
         if ((stat & ATA_BUSY) ||
             ((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) {
                 /* other bits are useless when BUSY */
-               rq->errors |= ERROR_RESET;
+               scsi_req(rq)->result |= ERROR_RESET;
         } else if (stat & ATA_ERR) {
                 /* err has different meaning on cdrom and tape */
                 if (err == ATA_ABORTED) {
@@ -25,10 +25,10 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
                         drive->crc_count++;
                 } else if (err & (ATA_BBK | ATA_UNC)) {
                         /* retries won't help these */
-                       rq->errors = ERROR_MAX;
+                       scsi_req(rq)->result = ERROR_MAX;
                 } else if (err & ATA_TRK0NF) {
                         /* help it find track zero */
-                       rq->errors |= ERROR_RECAL;
+                       scsi_req(rq)->result |= ERROR_RECAL;
                 }
         }
  
@@ -39,23 +39,23 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
                 ide_pad_transfer(drive, READ, nsect * SECTOR_SIZE);
         }
  
-       if (rq->errors >= ERROR_MAX || blk_noretry_request(rq)) {
+       if (scsi_req(rq)->result >= ERROR_MAX || blk_noretry_request(rq)) {
                 ide_kill_rq(drive, rq);
                 return ide_stopped;
         }
  
         if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ))
-               rq->errors |= ERROR_RESET;
+               scsi_req(rq)->result |= ERROR_RESET;
  
-       if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
-               ++rq->errors;
+       if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) {
+               ++scsi_req(rq)->result;
                 return ide_do_reset(drive);
         }
  
-       if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
+       if ((scsi_req(rq)->result & ERROR_RECAL) == ERROR_RECAL)
                 drive->special_flags |= IDE_SFLAG_RECALIBRATE;
  
-       ++rq->errors;
+       ++scsi_req(rq)->result;
  
         return ide_stopped;
  }
@@ -68,7 +68,7 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq,
         if ((stat & ATA_BUSY) ||
             ((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) {
                 /* other bits are useless when BUSY */
-               rq->errors |= ERROR_RESET;
+               scsi_req(rq)->result |= ERROR_RESET;
         } else {
                 /* add decoding error stuff */
         }
@@ -77,14 +77,14 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq,
                 /* force an abort */
                 hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE);
  
-       if (rq->errors >= ERROR_MAX) {
+       if (scsi_req(rq)->result >= ERROR_MAX) {
                 ide_kill_rq(drive, rq);
         } else {
-               if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
-                       ++rq->errors;
+               if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) {
+                       ++scsi_req(rq)->result;
                         return ide_do_reset(drive);
                 }
-               ++rq->errors;
+               ++scsi_req(rq)->result;
         }
  
         return ide_stopped;
@@ -130,11 +130,11 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
                         if (cmd)
                                 ide_complete_cmd(drive, cmd, stat, err);
                 } else if (ata_pm_request(rq)) {
-                       rq->errors = 1;
+                       scsi_req(rq)->result = 1;
                         ide_complete_pm_rq(drive, rq);
                         return ide_stopped;
                 }
-               rq->errors = err;
+               scsi_req(rq)->result = err;
                 ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
                 return ide_stopped;
         }
@@ -149,8 +149,8 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
  
         if (rq && ata_misc_request(rq) &&
             scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
-               if (err <= 0 && rq->errors == 0)
-                       rq->errors = -EIO;
+               if (err <= 0 && scsi_req(rq)->result == 0)
+                       scsi_req(rq)->result = -EIO;
                 ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
         }
  }
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c

index a69e8013f1dff9ff0e8b63fea7cee0ca3767a23f..8ac6048cd2df9145daa13f5ac1dd4eb4deb96f3a 100644 (file)
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -98,7 +98,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
         }
  
         if (ata_misc_request(rq))
-               rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
+               scsi_req(rq)->result = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
  
         return uptodate;
  }
@@ -239,7 +239,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
                                         ? rq->rq_disk->disk_name
                                         : "dev?"));
  
-       if (rq->errors >= ERROR_MAX) {
+       if (scsi_req(rq)->result >= ERROR_MAX) {
                 if (drive->failed_pc) {
                         ide_floppy_report_error(floppy, drive->failed_pc);
                         drive->failed_pc = NULL;
@@ -247,7 +247,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
                         printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
  
                 if (ata_misc_request(rq)) {
-                       rq->errors = 0;
+                       scsi_req(rq)->result = 0;
                         ide_complete_rq(drive, 0, blk_rq_bytes(rq));
                         return ide_stopped;
                 } else
@@ -301,8 +301,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
         return ide_floppy_issue_pc(drive, &cmd, pc);
  out_end:
         drive->failed_pc = NULL;
-       if (blk_rq_is_passthrough(rq) && rq->errors == 0)
-               rq->errors = -EIO;
+       if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
+               scsi_req(rq)->result = -EIO;
         ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
         return ide_stopped;
  }
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c

index 043b1fb963cb89982029dddad977c386f8e6bfde..45b3f41a43d4112a7c587c45f25b89b281bfcac3 100644 (file)
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -141,12 +141,12 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
         drive->failed_pc = NULL;
  
         if ((media == ide_floppy || media == ide_tape) && drv_req) {
-               rq->errors = 0;
+               scsi_req(rq)->result = 0;
         } else {
                 if (media == ide_tape)
-                       rq->errors = IDE_DRV_ERROR_GENERAL;
-               else if (blk_rq_is_passthrough(rq) && rq->errors == 0)
-                       rq->errors = -EIO;
+                       scsi_req(rq)->result = IDE_DRV_ERROR_GENERAL;
+               else if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
+                       scsi_req(rq)->result = -EIO;
         }
  
         ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
@@ -271,7 +271,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
  #ifdef DEBUG
         printk("%s: DRIVE_CMD (null)\n", drive->name);
  #endif
-       rq->errors = 0;
+       scsi_req(rq)->result = 0;
         ide_complete_rq(drive, 0, blk_rq_bytes(rq));
  
         return ide_stopped;
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c

index 248a3e0ceb468bdb57e896425a642375e77355be..8c0d17297a7a0ab0950edf4aa06321e85fd65098 100644 (file)
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -128,7 +128,8 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
                 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
                 scsi_req_init(rq);
                 ide_req(rq)->type = ATA_PRIV_TASKFILE;
-               err = blk_execute_rq(drive->queue, NULL, rq, 0);
+               blk_execute_rq(drive->queue, NULL, rq, 0);
+               err = scsi_req(rq)->result ? -EIO : 0;
                 blk_put_request(rq);
  
                 return err;
@@ -227,8 +228,8 @@ static int generic_drive_reset(ide_drive_t *drive)
         ide_req(rq)->type = ATA_PRIV_MISC;
         scsi_req(rq)->cmd_len = 1;
         scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
-       if (blk_execute_rq(drive->queue, NULL, rq, 1))
-               ret = rq->errors;
+       blk_execute_rq(drive->queue, NULL, rq, 1);
+       ret = scsi_req(rq)->result;
         blk_put_request(rq);
         return ret;
  }
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c

index 101aed9a61ca319439d825c6e433a53175e0ed64..94e3107f59b933fc3618f906dc8b1b4554027dd5 100644 (file)
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -37,7 +37,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
         scsi_req(rq)->cmd_len = 1;
         ide_req(rq)->type = ATA_PRIV_MISC;
         rq->special = &timeout;
-       rc = blk_execute_rq(q, NULL, rq, 1);
+       blk_execute_rq(q, NULL, rq, 1);
+       rc = scsi_req(rq)->result ? -EIO : 0;
         blk_put_request(rq);
         if (rc)
                 goto out;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c

index ec951be4b0c8ab0950ca31dbeb070520c925230e..0977fc1f40ce431979163cd001f01961d79d49f2 100644 (file)
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -27,7 +27,8 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
                 mesg.event = PM_EVENT_FREEZE;
         rqpm.pm_state = mesg.event;
  
-       ret = blk_execute_rq(drive->queue, NULL, rq, 0);
+       blk_execute_rq(drive->queue, NULL, rq, 0);
+       ret = scsi_req(rq)->result ? -EIO : 0;
         blk_put_request(rq);
  
         if (ret == 0 && ide_port_acpi(hwif)) {
@@ -55,8 +56,8 @@ static int ide_pm_execute_rq(struct request *rq)
         spin_lock_irq(q->queue_lock);
         if (unlikely(blk_queue_dying(q))) {
                 rq->rq_flags |= RQF_QUIET;
-               rq->errors = -ENXIO;
-               __blk_end_request_all(rq, rq->errors);
+               scsi_req(rq)->result = -ENXIO;
+               __blk_end_request_all(rq, 0);
                 spin_unlock_irq(q->queue_lock);
                 return -ENXIO;
         }
@@ -66,7 +67,7 @@ static int ide_pm_execute_rq(struct request *rq)
  
         wait_for_completion_io(&wait);
  
-       return rq->errors ? -EIO : 0;
+       return scsi_req(rq)->result ? -EIO : 0;
  }
  
  int generic_ide_resume(struct device *dev)
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c

index d8a552b47718ef6a3c37b1da65819d9c0e67b261..a0651f948b76ec22e72ad64c74bec0cb39627a8c 100644 (file)
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -366,7 +366,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
                         err = pc->error;
                 }
         }
-       rq->errors = err;
+       scsi_req(rq)->result = err;
  
         return uptodate;
  }
@@ -879,7 +879,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
                 tape->valid = 0;
  
         ret = size;
-       if (rq->errors == IDE_DRV_ERROR_GENERAL)
+       if (scsi_req(rq)->result == IDE_DRV_ERROR_GENERAL)
                 ret = -EIO;
  out_put:
         blk_put_request(rq);
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c

index 4c0007cb74e378d7088a33261ecd0afd8cacb4ea..d71199d23c9ec02ce36cbb25e3bd87616256f48d 100644 (file)
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -287,7 +287,7 @@ static void ide_pio_datablock(ide_drive_t *drive, struct ide_cmd *cmd,
         u8 saved_io_32bit = drive->io_32bit;
  
         if (cmd->tf_flags & IDE_TFLAG_FS)
-               cmd->rq->errors = 0;
+               scsi_req(cmd->rq)->result = 0;
  
         if (cmd->tf_flags & IDE_TFLAG_IO_16BIT)
                 drive->io_32bit = 0;
@@ -329,7 +329,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
         u8 set_xfer = !!(cmd->tf_flags & IDE_TFLAG_SET_XFER);
  
         ide_complete_cmd(drive, cmd, stat, err);
-       rq->errors = err;
+       scsi_req(rq)->result = err;
  
         if (err == 0 && set_xfer) {
                 ide_set_xfer_rate(drive, nsect);
@@ -452,8 +452,8 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
         rq->special = cmd;
         cmd->rq = rq;
  
-       error = blk_execute_rq(drive->queue, NULL, rq, 0);
-
+       blk_execute_rq(drive->queue, NULL, rq, 0);
+       error = scsi_req(rq)->result ? -EIO : 0;
  put_req:
         blk_put_request(rq);
         return error;
diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c

index ca5759c0c318eb807baf14cb09a1a6b5f3c1b748..43a6cb07819363e8409ed0c28e9f91ae0d49d06d 100644 (file)
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -370,10 +370,12 @@ static int hid_accel_3d_probe(struct platform_device *pdev)
                 name = "accel_3d";
                 channel_spec = accel_3d_channels;
                 channel_size = sizeof(accel_3d_channels);
+               indio_dev->num_channels = ARRAY_SIZE(accel_3d_channels);
         } else {
                 name = "gravity";
                 channel_spec = gravity_channels;
                 channel_size = sizeof(gravity_channels);
+               indio_dev->num_channels = ARRAY_SIZE(gravity_channels);
         }
         ret = hid_sensor_parse_common_attributes(hsdev, hsdev->usage,
                                         &accel_state->common_attributes);
@@ -395,7 +397,6 @@ static int hid_accel_3d_probe(struct platform_device *pdev)
                 goto error_free_dev_mem;
         }
  
-       indio_dev->num_channels = ARRAY_SIZE(accel_3d_channels);
         indio_dev->dev.parent = &pdev->dev;
         indio_dev->info = &accel_3d_info;
         indio_dev->name = name;
diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c

index ad9dec30bb304ffbbb0f67f01b753f7bc88cd891..4282ceca3d8f9f417a1a511b3d608896a7ad4159 100644 (file)
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -169,7 +169,9 @@ static irqreturn_t tiadc_irq_h(int irq, void *private)
  {
         struct iio_dev *indio_dev = private;
         struct tiadc_device *adc_dev = iio_priv(indio_dev);
-       unsigned int status, config;
+       unsigned int status, config, adc_fsm;
+       unsigned short count = 0;
+
         status = tiadc_readl(adc_dev, REG_IRQSTATUS);
  
         /*
@@ -183,6 +185,15 @@ static irqreturn_t tiadc_irq_h(int irq, void *private)
                 tiadc_writel(adc_dev, REG_CTRL, config);
                 tiadc_writel(adc_dev, REG_IRQSTATUS, IRQENB_FIFO1OVRRUN
                                 | IRQENB_FIFO1UNDRFLW | IRQENB_FIFO1THRES);
+
+               /* wait for idle state.
+                * ADC needs to finish the current conversion
+                * before disabling the module
+                */
+               do {
+                       adc_fsm = tiadc_readl(adc_dev, REG_ADCFSM);
+               } while (adc_fsm != 0x10 && count++ < 100);
+
                 tiadc_writel(adc_dev, REG_CTRL, (config | CNTRLREG_TSCSSENB));
                 return IRQ_HANDLED;
         } else if (status & IRQENB_FIFO1THRES) {
diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c

index d6c372bb433b4983bca0d99b448ccd5af4e3cdab..c17596f7ed2c30786f6765c091de121870be47d1 100644 (file)
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
@@ -61,7 +61,7 @@ static int cros_ec_sensors_read(struct iio_dev *indio_dev,
                 ret = st->core.read_ec_sensors_data(indio_dev, 1 << idx, &data);
                 if (ret < 0)
                         break;
-
+               ret = IIO_VAL_INT;
                 *val = data;
                 break;
         case IIO_CHAN_INFO_CALIBBIAS:
@@ -76,7 +76,7 @@ static int cros_ec_sensors_read(struct iio_dev *indio_dev,
                 for (i = CROS_EC_SENSOR_X; i < CROS_EC_SENSOR_MAX_AXIS; i++)
                         st->core.calib[i] =
                                 st->core.resp->sensor_offset.offset[i];
-
+               ret = IIO_VAL_INT;
                 *val = st->core.calib[idx];
                 break;
         case IIO_CHAN_INFO_SCALE:
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c

index 7afdac42ed42d9d87d71c5627d85d0016b652f29..01e02b9926d410c566e1b598fcc392fc2594ff01 100644 (file)
--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
@@ -379,6 +379,8 @@ int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev,
  {
  
         struct hid_sensor_hub_attribute_info timestamp;
+       s32 value;
+       int ret;
  
         hid_sensor_get_reporting_interval(hsdev, usage_id, st);
  
@@ -417,6 +419,14 @@ int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev,
                 st->sensitivity.index, st->sensitivity.report_id,
                 timestamp.index, timestamp.report_id);
  
+       ret = sensor_hub_get_feature(hsdev,
+                               st->power_state.report_id,
+                               st->power_state.index, sizeof(value), &value);
+       if (ret < 0)
+               return ret;
+       if (value < 0)
+               return -EINVAL;
+
         return 0;
  }
  EXPORT_SYMBOL(hid_sensor_parse_common_attributes);
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c

index a3cce3a38300796b6b0899aa7fe2190481c7137d..ecf592d69043ae82e2e85184096471bfebef95e2 100644 (file)
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -51,8 +51,6 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state)
                         st->report_state.report_id,
                         st->report_state.index,
                         HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM);
-
-               poll_value = hid_sensor_read_poll_value(st);
         } else {
                 int val;
  
@@ -89,7 +87,9 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state)
         sensor_hub_get_feature(st->hsdev, st->power_state.report_id,
                                st->power_state.index,
                                sizeof(state_val), &state_val);
-       if (state && poll_value)
+       if (state)
+               poll_value = hid_sensor_read_poll_value(st);
+       if (poll_value > 0)
                 msleep_interruptible(poll_value * 2);
  
         return 0;
diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c

index f7fcfa886f72181e36b89b38daa327dbf8d17cd0..821919dd245bc8597ce901fffe26001382a9a71c 100644 (file)
--- a/drivers/iio/gyro/bmg160_core.c
+++ b/drivers/iio/gyro/bmg160_core.c
@@ -27,6 +27,7 @@
  #include <linux/iio/trigger_consumer.h>
  #include <linux/iio/triggered_buffer.h>
  #include <linux/regmap.h>
+#include <linux/delay.h>
  #include "bmg160.h"
  
  #define BMG160_IRQ_NAME                "bmg160_event"
@@ -52,6 +53,9 @@
  #define BMG160_DEF_BW                  100
  #define BMG160_REG_PMU_BW_RES          BIT(7)
  
+#define BMG160_GYRO_REG_RESET          0x14
+#define BMG160_GYRO_RESET_VAL          0xb6
+
  #define BMG160_REG_INT_MAP_0           0x17
  #define BMG160_INT_MAP_0_BIT_ANY       BIT(1)
  
@@ -236,6 +240,14 @@ static int bmg160_chip_init(struct bmg160_data *data)
         int ret;
         unsigned int val;
  
+       /*
+        * Reset chip to get it in a known good state. A delay of 30ms after
+        * reset is required according to the datasheet.
+        */
+       regmap_write(data->regmap, BMG160_GYRO_REG_RESET,
+                    BMG160_GYRO_RESET_VAL);
+       usleep_range(30000, 30700);
+
         ret = regmap_read(data->regmap, BMG160_REG_CHIP_ID, &val);
         if (ret < 0) {
                 dev_err(dev, "Error reading reg_chip_id\n");
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c

index 78532ce074497985dfa22fb881b3639e480ae7ed..81b572d7699a89bb17baa21321744c7b518e5c0a 100644 (file)
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
@@ -193,8 +193,8 @@ int st_lsm6dsx_update_watermark(struct st_lsm6dsx_sensor *sensor, u16 watermark)
         if (err < 0)
                 goto out;
  
-       fifo_watermark = ((data & ~ST_LSM6DSX_FIFO_TH_MASK) << 8) |
-                         (fifo_watermark & ST_LSM6DSX_FIFO_TH_MASK);
+       fifo_watermark = ((data << 8) & ~ST_LSM6DSX_FIFO_TH_MASK) |
+                        (fifo_watermark & ST_LSM6DSX_FIFO_TH_MASK);
  
         wdata = cpu_to_le16(fifo_watermark);
         err = hw->tf->write(hw->dev, ST_LSM6DSX_REG_FIFO_THL_ADDR,
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c

index d18ded45bedd98893dde42f182924b671b490e64..3ff91e02fee346a244cfbce3c2143ce4c3fd2bf0 100644 (file)
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -610,10 +610,9 @@ static ssize_t __iio_format_value(char *buf, size_t len, unsigned int type,
                 tmp0 = (int)div_s64_rem(tmp, 1000000000, &tmp1);
                 return snprintf(buf, len, "%d.%09u", tmp0, abs(tmp1));
         case IIO_VAL_FRACTIONAL_LOG2:
-               tmp = (s64)vals[0] * 1000000000LL >> vals[1];
-               tmp1 = do_div(tmp, 1000000000LL);
-               tmp0 = tmp;
-               return snprintf(buf, len, "%d.%09u", tmp0, tmp1);
+               tmp = shift_right((s64)vals[0] * 1000000000LL, vals[1]);
+               tmp0 = (int)div_s64_rem(tmp, 1000000000LL, &tmp1);
+               return snprintf(buf, len, "%d.%09u", tmp0, abs(tmp1));
         case IIO_VAL_INT_MULTIPLE:
         {
                 int i;
diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c

index 6dd8cbd7ce9531a5883173f34eb30edc45301083..e13370dc9b1cb424c4c2d43f0e92b9fadca78df9 100644 (file)
--- a/drivers/iio/magnetometer/ak8974.c
+++ b/drivers/iio/magnetometer/ak8974.c
@@ -763,7 +763,7 @@ power_off:
         return ret;
  }
  
-static int __exit ak8974_remove(struct i2c_client *i2c)
+static int ak8974_remove(struct i2c_client *i2c)
  {
         struct iio_dev *indio_dev = i2c_get_clientdata(i2c);
         struct ak8974 *ak8974 = iio_priv(indio_dev);
@@ -845,7 +845,7 @@ static struct i2c_driver ak8974_driver = {
                 .of_match_table = of_match_ptr(ak8974_of_match),
         },
         .probe    = ak8974_probe,
-       .remove   = __exit_p(ak8974_remove),
+       .remove   = ak8974_remove,
         .id_table = ak8974_id,
  };
  module_i2c_driver(ak8974_driver);
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c

index 5f2680855552875e1c9569d9bf6c429466d00fde..fd0edca0e656001b6b4487dc754695da4a81297f 100644 (file)
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -457,6 +457,7 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
                         .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
                 },
                 .multi_read_bit = true,
+               .bootime = 2,
         },
  };
  
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c

index e95510117a6dd7069f1251796eaebd1ea8283693..f2ae75fa3128b9101985bb92e882b596404c7ebc 100644 (file)
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -29,7 +29,13 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
  {
         int i, n, completed = 0;
  
-       while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+       /*
+        * budget might be (-1) if the caller does not
+        * want to bound this call, thus we need unsigned
+        * minimum here.
+        */
+       while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
+                       budget - completed), cq->wc)) > 0) {
                 for (i = 0; i < n; i++) {
                         struct ib_wc *wc = &cq->wc[i];
  
@@ -196,7 +202,7 @@ void ib_free_cq(struct ib_cq *cq)
                 irq_poll_disable(&cq->iop);
                 break;
         case IB_POLL_WORKQUEUE:
-               flush_work(&cq->work);
+               cancel_work_sync(&cq->work);
                 break;
         default:
                 WARN_ON_ONCE(1);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c

index 593d2ce6ec7cec115b58006a5cb13c49bec72d83..7c9e34d679d325d101f937e97ea25dfe76f79f80 100644 (file)
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -336,12 +336,26 @@ int ib_register_device(struct ib_device *device,
         struct device *parent = device->dev.parent;
  
         WARN_ON_ONCE(!parent);
-       if (!device->dev.dma_ops)
-               device->dev.dma_ops = parent->dma_ops;
-       if (!device->dev.dma_mask)
-               device->dev.dma_mask = parent->dma_mask;
-       if (!device->dev.coherent_dma_mask)
-               device->dev.coherent_dma_mask = parent->coherent_dma_mask;
+       WARN_ON_ONCE(device->dma_device);
+       if (device->dev.dma_ops) {
+               /*
+                * The caller provided custom DMA operations. Copy the
+                * DMA-related fields that are used by e.g. dma_alloc_coherent()
+                * into device->dev.
+                */
+               device->dma_device = &device->dev;
+               if (!device->dev.dma_mask)
+                       device->dev.dma_mask = parent->dma_mask;
+               if (!device->dev.coherent_dma_mask)
+                       device->dev.coherent_dma_mask =
+                               parent->coherent_dma_mask;
+       } else {
+               /*
+                * The caller did not provide custom DMA operations. Use the
+                * DMA mapping operations of the parent device.
+                */
+               device->dma_device = parent;
+       }
  
         mutex_lock(&device_mutex);
  
@@ -1015,8 +1029,7 @@ static int __init ib_core_init(void)
                 return -ENOMEM;
  
         ib_comp_wq = alloc_workqueue("ib-comp-wq",
-                       WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
-                       WQ_UNBOUND_MAX_ACTIVE);
+                       WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
         if (!ib_comp_wq) {
                 ret = -ENOMEM;
                 goto err;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c

index 0f5d43d1f5fc30d812a6a3988d90b30cdb459da0..70c3e9e795082b7152ea754a52fc68c412dee7ca 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -160,6 +160,9 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
                 return NOTIFY_DONE;
  
         iwdev = &hdl->device;
+       if (iwdev->init_state < INET_NOTIFIER)
+               return NOTIFY_DONE;
+
         netdev = iwdev->ldev->netdev;
         upper_dev = netdev_master_upper_dev_get(netdev);
         if (netdev != event_netdev)
@@ -214,6 +217,9 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
                 return NOTIFY_DONE;
  
         iwdev = &hdl->device;
+       if (iwdev->init_state < INET_NOTIFIER)
+               return NOTIFY_DONE;
+
         netdev = iwdev->ldev->netdev;
         if (netdev != event_netdev)
                 return NOTIFY_DONE;
@@ -260,6 +266,8 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *
                 if (!iwhdl)
                         return NOTIFY_DONE;
                 iwdev = &iwhdl->device;
+               if (iwdev->init_state < INET_NOTIFIER)
+                       return NOTIFY_DONE;
                 p = (__be32 *)neigh->primary_key;
                 i40iw_copy_ip_ntohl(local_ipaddr, p);
                 if (neigh->nud_state & NUD_VALID) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

index bc9fb144e57b8e3a05863223156f25b7f17e6a34..c52edeafd616a3be52fec913430405e5adff13d2 100644 (file)
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -372,7 +372,7 @@ static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id,
         return 0;
  }
  
-static u8 ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
+static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
                                    bool dpp_pool)
  {
         int status;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c

index 12c4208fd7013b78c5e1bf736d82461c05c814c3..af9f596bb68b294cc587f13924a645ac70e6235c 100644 (file)
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -7068,7 +7068,7 @@ static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start,
         unsigned long flags;
  
         while (wait) {
-               unsigned long shadow;
+               unsigned long shadow = 0;
                 int cstart, previ = -1;
  
                 /*
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h

index 3cd96c1b95029d2d365279d3ccfeebe038e3a2e4..9fbe22d3467b222988cf33fee7c04f1996a84928 100644 (file)
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -69,6 +69,9 @@
   */
  #define PCI_DEVICE_ID_VMWARE_PVRDMA    0x0820
  
+#define PVRDMA_NUM_RING_PAGES          4
+#define PVRDMA_QP_NUM_HEADER_PAGES     1
+
  struct pvrdma_dev;
  
  struct pvrdma_page_dir {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h

index e69d6f3cae32b5018ea4a09efd299ec402aa4a4f..09078ccfaec719b8800ce87beaafbaa92b50cbff 100644 (file)
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
@@ -132,7 +132,7 @@ enum pvrdma_pci_resource {
  
  enum pvrdma_device_ctl {
         PVRDMA_DEVICE_CTL_ACTIVATE,     /* Activate device. */
-       PVRDMA_DEVICE_CTL_QUIESCE,      /* Quiesce device. */
+       PVRDMA_DEVICE_CTL_UNQUIESCE,    /* Unquiesce device. */
         PVRDMA_DEVICE_CTL_RESET,        /* Reset device. */
  };
  
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c

index 100bea5c42ffb74375552131ebb1fbd5cbdc3659..34ebc7615411d9e3574e50fc0bf083056ef756cf 100644 (file)
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -56,7 +56,7 @@
  #include "pvrdma.h"
  
  #define DRV_NAME       "vmw_pvrdma"
-#define DRV_VERSION    "1.0.0.0-k"
+#define DRV_VERSION    "1.0.1.0-k"
  
  static DEFINE_MUTEX(pvrdma_device_list_lock);
  static LIST_HEAD(pvrdma_device_list);
@@ -660,7 +660,16 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
                 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
                 break;
         case NETDEV_UP:
-               pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+               pvrdma_write_reg(dev, PVRDMA_REG_CTL,
+                                PVRDMA_DEVICE_CTL_UNQUIESCE);
+
+               mb();
+
+               if (pvrdma_read_reg(dev, PVRDMA_REG_ERR))
+                       dev_err(&dev->pdev->dev,
+                               "failed to activate device during link up\n");
+               else
+                       pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
                 break;
         default:
                 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
@@ -858,7 +867,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
         dev->dsr->resp_slot_dma = (u64)slot_dma;
  
         /* Async event ring */
-       dev->dsr->async_ring_pages.num_pages = 4;
+       dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
         ret = pvrdma_page_dir_init(dev, &dev->async_pdir,
                                    dev->dsr->async_ring_pages.num_pages, true);
         if (ret)
@@ -867,7 +876,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
         dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma;
  
         /* CQ notification ring */
-       dev->dsr->cq_ring_pages.num_pages = 4;
+       dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
         ret = pvrdma_page_dir_init(dev, &dev->cq_pdir,
                                    dev->dsr->cq_ring_pages.num_pages, true);
         if (ret)
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c

index dbbfd35e7da7ade590bc0b13784e82ba9266f213..30062aad3af1a2cfa6d14ce9bd60250644b85ab8 100644 (file)
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -170,8 +170,9 @@ static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
                                              sizeof(struct pvrdma_sge) *
                                              qp->sq.max_sg);
         /* Note: one extra page for the header. */
-       qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size +
-                              PAGE_SIZE - 1) / PAGE_SIZE;
+       qp->npages_send = PVRDMA_QP_NUM_HEADER_PAGES +
+                         (qp->sq.wqe_cnt * qp->sq.wqe_size + PAGE_SIZE - 1) /
+                                                               PAGE_SIZE;
  
         return 0;
  }
@@ -288,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
                         qp->npages = qp->npages_send + qp->npages_recv;
  
                         /* Skip header page. */
-                       qp->sq.offset = PAGE_SIZE;
+                       qp->sq.offset = PVRDMA_QP_NUM_HEADER_PAGES * PAGE_SIZE;
  
                         /* Recv queue pages are after send pages. */
                         qp->rq.offset = qp->npages_send * PAGE_SIZE;
@@ -341,7 +342,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
         cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
         cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
         cmd->total_chunks = qp->npages;
-       cmd->send_chunks = qp->npages_send - 1;
+       cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES;
         cmd->pdir_dma = qp->pdir.dir_dma;
  
         dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
@@ -554,13 +555,13 @@ out:
         return ret;
  }
  
-static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n)
+static inline void *get_sq_wqe(struct pvrdma_qp *qp, unsigned int n)
  {
         return pvrdma_page_dir_get_ptr(&qp->pdir,
                                        qp->sq.offset + n * qp->sq.wqe_size);
  }
  
-static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n)
+static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
  {
         return pvrdma_page_dir_get_ptr(&qp->pdir,
                                        qp->rq.offset + n * qp->rq.wqe_size);
@@ -598,9 +599,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
         unsigned long flags;
         struct pvrdma_sq_wqe_hdr *wqe_hdr;
         struct pvrdma_sge *sge;
-       int i, index;
-       int nreq;
-       int ret;
+       int i, ret;
  
         /*
          * In states lower than RTS, we can fail immediately. In other states,
@@ -613,9 +612,8 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
  
         spin_lock_irqsave(&qp->sq.lock, flags);
  
-       index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt);
-       for (nreq = 0; wr; nreq++, wr = wr->next) {
-               unsigned int tail;
+       while (wr) {
+               unsigned int tail = 0;
  
                 if (unlikely(!pvrdma_idx_ring_has_space(
                                 qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
@@ -680,7 +678,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                         }
                 }
  
-               wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index);
+               wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, tail);
                 memset(wqe_hdr, 0, sizeof(*wqe_hdr));
                 wqe_hdr->wr_id = wr->wr_id;
                 wqe_hdr->num_sge = wr->num_sge;
@@ -771,12 +769,11 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                 /* Make sure wqe is written before index update */
                 smp_wmb();
  
-               index++;
-               if (unlikely(index >= qp->sq.wqe_cnt))
-                       index = 0;
                 /* Update shared sq ring */
                 pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
                                     qp->sq.wqe_cnt);
+
+               wr = wr->next;
         }
  
         ret = 0;
@@ -806,7 +803,6 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
         struct pvrdma_qp *qp = to_vqp(ibqp);
         struct pvrdma_rq_wqe_hdr *wqe_hdr;
         struct pvrdma_sge *sge;
-       int index, nreq;
         int ret = 0;
         int i;
  
@@ -821,9 +817,8 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
  
         spin_lock_irqsave(&qp->rq.lock, flags);
  
-       index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt);
-       for (nreq = 0; wr; nreq++, wr = wr->next) {
-               unsigned int tail;
+       while (wr) {
+               unsigned int tail = 0;
  
                 if (unlikely(wr->num_sge > qp->rq.max_sg ||
                              wr->num_sge < 0)) {
@@ -843,7 +838,7 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                         goto out;
                 }
  
-               wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index);
+               wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, tail);
                 wqe_hdr->wr_id = wr->wr_id;
                 wqe_hdr->num_sge = wr->num_sge;
                 wqe_hdr->total_len = 0;
@@ -859,12 +854,11 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                 /* Make sure wqe is written before index update */
                 smp_wmb();
  
-               index++;
-               if (unlikely(index >= qp->rq.wqe_cnt))
-                       index = 0;
                 /* Update shared rq ring */
                 pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
                                     qp->rq.wqe_cnt);
+
+               wr = wr->next;
         }
  
         spin_unlock_irqrestore(&qp->rq.lock, flags);
diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c

index e202b8142759f58e0c06317ce1e4f3d798c54f37..6b712eecbd37d9ca5a0bf312028dc7b23c1c2fd8 100644 (file)
--- a/drivers/infiniband/sw/rdmavt/mmap.c
+++ b/drivers/infiniband/sw/rdmavt/mmap.c
@@ -170,9 +170,9 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi,
  
         spin_lock_irq(&rdi->mmap_offset_lock);
         if (rdi->mmap_offset == 0)
-               rdi->mmap_offset = PAGE_SIZE;
+               rdi->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
         ip->offset = rdi->mmap_offset;
-       rdi->mmap_offset += size;
+       rdi->mmap_offset += ALIGN(size, SHMLBA);
         spin_unlock_irq(&rdi->mmap_offset_lock);
  
         INIT_LIST_HEAD(&ip->pending_mmaps);
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig

index 7d1ac27ed2516dad367354f36b15903cb3197ade..6332dedc11e8a3306697e494ab89996b34d453ab 100644 (file)
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -22,4 +22,4 @@ config RDMA_RXE
         To configure and work with soft-RoCE driver please use the
         following wiki page under "configure Soft-RoCE (RXE)" section:
  
-       https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
+       https://github.com/linux-rdma/rdma-core/blob/master/Documentation/rxe.md
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c

index c572a4c09359c9f91fcf7f55466fadff012fbc82..bd812e00988ed32f5517ce952a0e2186c4678f87 100644 (file)
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -156,10 +156,10 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe,
         spin_lock_bh(&rxe->mmap_offset_lock);
  
         if (rxe->mmap_offset == 0)
-               rxe->mmap_offset = PAGE_SIZE;
+               rxe->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
  
         ip->info.offset = rxe->mmap_offset;
-       rxe->mmap_offset += size;
+       rxe->mmap_offset += ALIGN(size, SHMLBA);
  
         spin_unlock_bh(&rxe->mmap_offset_lock);
  
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c

index dbfde0dc6ff7e7bcf3c6b182b5f49fc5daf65009..9f95f50b290904fe67797cacfa5af2f565e3188d 100644 (file)
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -729,11 +729,11 @@ next_wqe:
         ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
         if (ret) {
                 qp->need_req_skb = 1;
-               kfree_skb(skb);
  
                 rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
  
                 if (ret == -EAGAIN) {
+                       kfree_skb(skb);
                         rxe_run_task(&qp->req.task, 1);
                         goto exit;
                 }
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c

index d404a8aba7afcaf8ab8addd38949ddf5adcd0847..c9dd385ce62e2c65b97e5bf16e5d8f6288b6f19c 100644 (file)
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -813,18 +813,17 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
                 WARN_ON_ONCE(1);
         }
  
-       /* We successfully processed this new request. */
-       qp->resp.msn++;
-
         /* next expected psn, read handles this separately */
         qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
  
         qp->resp.opcode = pkt->opcode;
         qp->resp.status = IB_WC_SUCCESS;
  
-       if (pkt->mask & RXE_COMP_MASK)
+       if (pkt->mask & RXE_COMP_MASK) {
+               /* We successfully processed this new request. */
+               qp->resp.msn++;
                 return RESPST_COMPLETE;
-       else if (qp_type(qp) == IB_QPT_RC)
+       } else if (qp_type(qp) == IB_QPT_RC)
                 return RESPST_ACKNOWLEDGE;
         else
                 return RESPST_CLEANUP;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h

index 9d0b22ad58c15759c3b92472083da15af5c42bac..c1ae4aeae2f90e5a9c1376a296529d912d77d457 100644 (file)
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -430,6 +430,7 @@ struct iser_fr_desc {
         struct list_head                  list;
         struct iser_reg_resources         rsc;
         struct iser_pi_context           *pi_ctx;
+       struct list_head                  all_list;
  };
  
  /**
@@ -443,6 +444,7 @@ struct iser_fr_pool {
         struct list_head        list;
         spinlock_t              lock;
         int                     size;
+       struct list_head        all_list;
  };
  
  /**
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c

index 30b622f2ab7382ca04a8affbd26dfd01ff98716c..c538a38c91ce95acf8e00fcdf43fa28588ad2f49 100644 (file)
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
         int i, ret;
  
         INIT_LIST_HEAD(&fr_pool->list);
+       INIT_LIST_HEAD(&fr_pool->all_list);
         spin_lock_init(&fr_pool->lock);
         fr_pool->size = 0;
         for (i = 0; i < cmds_max; i++) {
@@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
                 }
  
                 list_add_tail(&desc->list, &fr_pool->list);
+               list_add_tail(&desc->all_list, &fr_pool->all_list);
                 fr_pool->size++;
         }
  
@@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
         struct iser_fr_desc *desc, *tmp;
         int i = 0;
  
-       if (list_empty(&fr_pool->list))
+       if (list_empty(&fr_pool->all_list))
                 return;
  
         iser_info("freeing conn %p fr pool\n", ib_conn);
  
-       list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) {
-               list_del(&desc->list);
+       list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) {
+               list_del(&desc->all_list);
                 iser_free_reg_res(&desc->rsc);
                 if (desc->pi_ctx)
                         iser_free_pi_ctx(desc->pi_ctx);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c

index 91cbe86b25c8ec2d693bea5452c04d0ffa73ba0b..fcbed35e95a824979bb48fba9f05614591e97491 100644 (file)
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -817,6 +817,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count)
                 rx_wr->sg_list = &rx_desc->rx_sg;
                 rx_wr->num_sge = 1;
                 rx_wr->next = rx_wr + 1;
+               rx_desc->in_use = false;
         }
         rx_wr--;
         rx_wr->next = NULL; /* mark end of work requests list */
@@ -835,6 +836,15 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
         struct ib_recv_wr *rx_wr_failed, rx_wr;
         int ret;
  
+       if (!rx_desc->in_use) {
+               /*
+                * if the descriptor is not in-use we already reposted it
+                * for recv, so just silently return
+                */
+               return 0;
+       }
+
+       rx_desc->in_use = false;
         rx_wr.wr_cqe = &rx_desc->rx_cqe;
         rx_wr.sg_list = &rx_desc->rx_sg;
         rx_wr.num_sge = 1;
@@ -1397,6 +1407,8 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc)
                 return;
         }
  
+       rx_desc->in_use = true;
+
         ib_dma_sync_single_for_cpu(ib_dev, rx_desc->dma_addr,
                         ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
  
@@ -1659,10 +1671,23 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
         ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr);
         isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
  
-       if (ret)
-               transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0);
-       else
-               isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd);
+       if (ret) {
+               /*
+                * transport_generic_request_failure() expects to have
+                * plus two references to handle queue-full, so re-add
+                * one here as target-core will have already dropped
+                * it after the first isert_put_datain() callback.
+                */
+               kref_get(&cmd->cmd_kref);
+               transport_generic_request_failure(cmd, cmd->pi_err);
+       } else {
+               /*
+                * XXX: isert_put_response() failure is not retried.
+                */
+               ret = isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd);
+               if (ret)
+                       pr_warn_ratelimited("isert_put_response() ret: %d\n", ret);
+       }
  }
  
  static void
@@ -1699,13 +1724,15 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
         cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
         spin_unlock_bh(&cmd->istate_lock);
  
-       if (ret) {
-               target_put_sess_cmd(se_cmd);
-               transport_send_check_condition_and_sense(se_cmd,
-                                                        se_cmd->pi_err, 0);
-       } else {
+       /*
+        * transport_generic_request_failure() will drop the extra
+        * se_cmd->cmd_kref reference after T10-PI error, and handle
+        * any non-zero ->queue_status() callback error retries.
+        */
+       if (ret)
+               transport_generic_request_failure(se_cmd, se_cmd->pi_err);
+       else
                 target_execute_cmd(se_cmd);
-       }
  }
  
  static void
@@ -2171,26 +2198,28 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
                 chain_wr = &isert_cmd->tx_desc.send_wr;
         }
  
-       isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr);
-       isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", isert_cmd);
-       return 1;
+       rc = isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr);
+       isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ rc: %d\n",
+                 isert_cmd, rc);
+       return rc;
  }
  
  static int
  isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
  {
         struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+       int ret;
  
         isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
                  isert_cmd, cmd->se_cmd.data_length, cmd->write_data_done);
  
         isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
-       isert_rdma_rw_ctx_post(isert_cmd, conn->context,
-                       &isert_cmd->tx_desc.tx_cqe, NULL);
+       ret = isert_rdma_rw_ctx_post(isert_cmd, conn->context,
+                                    &isert_cmd->tx_desc.tx_cqe, NULL);
  
-       isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
-                isert_cmd);
-       return 0;
+       isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE rc: %d\n",
+                isert_cmd, ret);
+       return ret;
  }
  
  static int
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h

index c02ada57d7f5c4fa5b765a6401ac8e7bbd6096bb..87d994de8c910d998c12a4205c0e66eb1220b12d 100644 (file)
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -60,7 +60,7 @@
  
  #define ISER_RX_PAD_SIZE       (ISCSI_DEF_MAX_RECV_SEG_LEN + 4096 - \
                 (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \
-                sizeof(struct ib_cqe)))
+                sizeof(struct ib_cqe) + sizeof(bool)))
  
  #define ISCSI_ISER_SG_TABLESIZE                256
  
@@ -85,6 +85,7 @@ struct iser_rx_desc {
         u64             dma_addr;
         struct ib_sge   rx_sg;
         struct ib_cqe   rx_cqe;
+       bool            in_use;
         char            pad[ISER_RX_PAD_SIZE];
  } __packed;
  
diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c

index d96aa27dfcdc9776260c72753b48b63691af1b04..db64adfbe1aff092b4418832c1c81cce0901e044 100644 (file)
--- a/drivers/input/joystick/iforce/iforce-usb.c
+++ b/drivers/input/joystick/iforce/iforce-usb.c
@@ -141,6 +141,9 @@ static int iforce_usb_probe(struct usb_interface *intf,
  
         interface = intf->cur_altsetting;
  
+       if (interface->desc.bNumEndpoints < 2)
+               return -ENODEV;
+
         epirq = &interface->endpoint[0].desc;
         epout = &interface->endpoint[1].desc;
  
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c

index 155fcb3b6230a01da6d1de3ab772e508b56fef78..153b1ee13e03eccd764d4a92a85bd579c0f89946 100644 (file)
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -202,6 +202,7 @@ static const struct xpad_device {
         { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
         { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 },
         { 0x1532, 0x0037, "Razer Sabertooth", 0, XTYPE_XBOX360 },
+       { 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE },
         { 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 },
         { 0x15e4, 0x3f0a, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 },
         { 0x15e4, 0x3f10, "Batarang Xbox 360 controller", 0, XTYPE_XBOX360 },
@@ -326,6 +327,7 @@ static struct usb_device_id xpad_table[] = {
         XPAD_XBOX360_VENDOR(0x1430),            /* RedOctane X-Box 360 controllers */
         XPAD_XBOX360_VENDOR(0x146b),            /* BigBen Interactive Controllers */
         XPAD_XBOX360_VENDOR(0x1532),            /* Razer Sabertooth */
+       XPAD_XBOXONE_VENDOR(0x1532),            /* Razer Wildcat */
         XPAD_XBOX360_VENDOR(0x15e4),            /* Numark X-Box 360 controllers */
         XPAD_XBOX360_VENDOR(0x162e),            /* Joytech X-Box 360 controllers */
         XPAD_XBOX360_VENDOR(0x1689),            /* Razer Onza */
diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c

index 9cc6d057c302a1753f33e9072ab5f5384f1eec40..23c191a2a0715abe9584760742327b25f058d53b 100644 (file)
--- a/drivers/input/misc/cm109.c
+++ b/drivers/input/misc/cm109.c
@@ -700,6 +700,10 @@ static int cm109_usb_probe(struct usb_interface *intf,
         int error = -ENOMEM;
  
         interface = intf->cur_altsetting;
+
+       if (interface->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
         endpoint = &interface->endpoint[0].desc;
  
         if (!usb_endpoint_is_int_in(endpoint))
diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c

index 9c0ea36913b4a98293911f62a2d649dc08a855cd..f4e8fbec6a942a8ea7a48e268b1e96f9fe458369 100644 (file)
--- a/drivers/input/misc/ims-pcu.c
+++ b/drivers/input/misc/ims-pcu.c
@@ -1667,6 +1667,10 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc
                 return -EINVAL;
  
         alt = pcu->ctrl_intf->cur_altsetting;
+
+       if (alt->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
         pcu->ep_ctrl = &alt->endpoint[0].desc;
         pcu->max_ctrl_size = usb_endpoint_maxp(pcu->ep_ctrl);
  
diff --git a/drivers/input/misc/yealink.c b/drivers/input/misc/yealink.c

index 79c964c075f14029a8072a6ad441927a0a40cf04..6e7ff9561d9261f31f919720925e12d8f1720b98 100644 (file)
--- a/drivers/input/misc/yealink.c
+++ b/drivers/input/misc/yealink.c
@@ -875,6 +875,10 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
         int ret, pipe, i;
  
         interface = intf->cur_altsetting;
+
+       if (interface->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
         endpoint = &interface->endpoint[0].desc;
         if (!usb_endpoint_is_int_in(endpoint))
                 return -ENODEV;
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c

index 72b28ebfe360030cbeeadec8a52385a9de09a7f0..f210e19ddba66b86312b09c6925c55ea9fc0fab7 100644 (file)
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -1282,10 +1282,8 @@ static int alps_decode_ss4_v2(struct alps_fields *f,
         /* handle buttons */
         if (pkt_id == SS4_PACKET_ID_STICK) {
                 f->ts_left = !!(SS4_BTN_V2(p) & 0x01);
-               if (!(priv->flags & ALPS_BUTTONPAD)) {
-                       f->ts_right = !!(SS4_BTN_V2(p) & 0x02);
-                       f->ts_middle = !!(SS4_BTN_V2(p) & 0x04);
-               }
+               f->ts_right = !!(SS4_BTN_V2(p) & 0x02);
+               f->ts_middle = !!(SS4_BTN_V2(p) & 0x04);
         } else {
                 f->left = !!(SS4_BTN_V2(p) & 0x01);
                 if (!(priv->flags & ALPS_BUTTONPAD)) {
@@ -2462,14 +2460,34 @@ static int alps_update_device_area_ss4_v2(unsigned char otp[][4],
         int num_y_electrode;
         int x_pitch, y_pitch, x_phys, y_phys;
  
-       num_x_electrode = SS4_NUMSENSOR_XOFFSET + (otp[1][0] & 0x0F);
-       num_y_electrode = SS4_NUMSENSOR_YOFFSET + ((otp[1][0] >> 4) & 0x0F);
+       if (IS_SS4PLUS_DEV(priv->dev_id)) {
+               num_x_electrode =
+                       SS4PLUS_NUMSENSOR_XOFFSET + (otp[0][2] & 0x0F);
+               num_y_electrode =
+                       SS4PLUS_NUMSENSOR_YOFFSET + ((otp[0][2] >> 4) & 0x0F);
+
+               priv->x_max =
+                       (num_x_electrode - 1) * SS4PLUS_COUNT_PER_ELECTRODE;
+               priv->y_max =
+                       (num_y_electrode - 1) * SS4PLUS_COUNT_PER_ELECTRODE;
  
-       priv->x_max = (num_x_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
-       priv->y_max = (num_y_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
+               x_pitch = (otp[0][1] & 0x0F) + SS4PLUS_MIN_PITCH_MM;
+               y_pitch = ((otp[0][1] >> 4) & 0x0F) + SS4PLUS_MIN_PITCH_MM;
  
-       x_pitch = ((otp[1][2] >> 2) & 0x07) + SS4_MIN_PITCH_MM;
-       y_pitch = ((otp[1][2] >> 5) & 0x07) + SS4_MIN_PITCH_MM;
+       } else {
+               num_x_electrode =
+                       SS4_NUMSENSOR_XOFFSET + (otp[1][0] & 0x0F);
+               num_y_electrode =
+                       SS4_NUMSENSOR_YOFFSET + ((otp[1][0] >> 4) & 0x0F);
+
+               priv->x_max =
+                       (num_x_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
+               priv->y_max =
+                       (num_y_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
+
+               x_pitch = ((otp[1][2] >> 2) & 0x07) + SS4_MIN_PITCH_MM;
+               y_pitch = ((otp[1][2] >> 5) & 0x07) + SS4_MIN_PITCH_MM;
+       }
  
         x_phys = x_pitch * (num_x_electrode - 1); /* In 0.1 mm units */
         y_phys = y_pitch * (num_y_electrode - 1); /* In 0.1 mm units */
@@ -2485,7 +2503,10 @@ static int alps_update_btn_info_ss4_v2(unsigned char otp[][4],
  {
         unsigned char is_btnless;
  
-       is_btnless = (otp[1][1] >> 3) & 0x01;
+       if (IS_SS4PLUS_DEV(priv->dev_id))
+               is_btnless = (otp[1][0] >> 1) & 0x01;
+       else
+               is_btnless = (otp[1][1] >> 3) & 0x01;
  
         if (is_btnless)
                 priv->flags |= ALPS_BUTTONPAD;
@@ -2493,6 +2514,21 @@ static int alps_update_btn_info_ss4_v2(unsigned char otp[][4],
         return 0;
  }
  
+static int alps_update_dual_info_ss4_v2(unsigned char otp[][4],
+                                      struct alps_data *priv)
+{
+       bool is_dual = false;
+
+       if (IS_SS4PLUS_DEV(priv->dev_id))
+               is_dual = (otp[0][0] >> 4) & 0x01;
+
+       if (is_dual)
+               priv->flags |= ALPS_DUALPOINT |
+                                       ALPS_DUALPOINT_WITH_PRESSURE;
+
+       return 0;
+}
+
  static int alps_set_defaults_ss4_v2(struct psmouse *psmouse,
                                     struct alps_data *priv)
  {
@@ -2508,6 +2544,8 @@ static int alps_set_defaults_ss4_v2(struct psmouse *psmouse,
  
         alps_update_btn_info_ss4_v2(otp, priv);
  
+       alps_update_dual_info_ss4_v2(otp, priv);
+
         return 0;
  }
  
@@ -2753,10 +2791,6 @@ static int alps_set_protocol(struct psmouse *psmouse,
                 if (alps_set_defaults_ss4_v2(psmouse, priv))
                         return -EIO;
  
-               if (priv->fw_ver[1] == 0x1)
-                       priv->flags |= ALPS_DUALPOINT |
-                                       ALPS_DUALPOINT_WITH_PRESSURE;
-
                 break;
         }
  
@@ -2827,10 +2861,7 @@ static int alps_identify(struct psmouse *psmouse, struct alps_data *priv)
                            ec[2] >= 0x90 && ec[2] <= 0x9d) {
                         protocol = &alps_v3_protocol_data;
                 } else if (e7[0] == 0x73 && e7[1] == 0x03 &&
-                          e7[2] == 0x14 && ec[1] == 0x02) {
-                       protocol = &alps_v8_protocol_data;
-               } else if (e7[0] == 0x73 && e7[1] == 0x03 &&
-                          e7[2] == 0x28 && ec[1] == 0x01) {
+                          (e7[2] == 0x14 || e7[2] == 0x28)) {
                         protocol = &alps_v8_protocol_data;
                 } else {
                         psmouse_dbg(psmouse,
@@ -2840,7 +2871,8 @@ static int alps_identify(struct psmouse *psmouse, struct alps_data *priv)
         }
  
         if (priv) {
-               /* Save the Firmware version */
+               /* Save Device ID and Firmware version */
+               memcpy(priv->dev_id, e7, 3);
                 memcpy(priv->fw_ver, ec, 3);
                 error = alps_set_protocol(psmouse, priv, protocol);
                 if (error)
diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h

index 6d279aa27cb9a10d70a2e732fe9599297883cbc0..4334f2805d93c7a3e8454a8cf0d950a14999278c 100644 (file)
--- a/drivers/input/mouse/alps.h
+++ b/drivers/input/mouse/alps.h
@@ -54,6 +54,16 @@ enum SS4_PACKET_ID {
  
  #define SS4_MASK_NORMAL_BUTTONS                0x07
  
+#define SS4PLUS_COUNT_PER_ELECTRODE    128
+#define SS4PLUS_NUMSENSOR_XOFFSET      16
+#define SS4PLUS_NUMSENSOR_YOFFSET      5
+#define SS4PLUS_MIN_PITCH_MM           37
+
+#define IS_SS4PLUS_DEV(_b)     (((_b[0]) == 0x73) &&   \
+                                ((_b[1]) == 0x03) &&   \
+                                ((_b[2]) == 0x28)              \
+                               )
+
  #define SS4_IS_IDLE_V2(_b)     (((_b[0]) == 0x18) &&           \
                                  ((_b[1]) == 0x10) &&           \
                                  ((_b[2]) == 0x00) &&           \
@@ -283,6 +293,7 @@ struct alps_data {
         int addr_command;
         u16 proto_version;
         u8 byte0, mask0;
+       u8 dev_id[3];
         u8 fw_ver[3];
         int flags;
         int x_max;
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c

index 352050e9031dc31ab87e7d3cdb2948b2ead7dee1..d5ab9ddef3e37eeb553307c7406adc1f2c011057 100644 (file)
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -218,17 +218,19 @@ static int elan_query_product(struct elan_tp_data *data)
  
  static int elan_check_ASUS_special_fw(struct elan_tp_data *data)
  {
-       if (data->ic_type != 0x0E)
-               return false;
-
-       switch (data->product_id) {
-       case 0x05 ... 0x07:
-       case 0x09:
-       case 0x13:
+       if (data->ic_type == 0x0E) {
+               switch (data->product_id) {
+               case 0x05 ... 0x07:
+               case 0x09:
+               case 0x13:
+                       return true;
+               }
+       } else if (data->ic_type == 0x08 && data->product_id == 0x26) {
+               /* ASUS EeeBook X205TA */
                 return true;
-       default:
-               return false;
         }
+
+       return false;
  }
  
  static int __elan_initialize(struct elan_tp_data *data)
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c

index efc8ec3423514ad33dd2ebaf0c861d41bb960140..e73d968023f7ce7de418dcf1315b7c554773d604 100644 (file)
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1118,6 +1118,7 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse,
   * Asus UX32VD             0x361f02        00, 15, 0e      clickpad
   * Avatar AVIU-145A2       0x361f00        ?               clickpad
   * Fujitsu LIFEBOOK E544   0x470f00        d0, 12, 09      2 hw buttons
+ * Fujitsu LIFEBOOK E547   0x470f00        50, 12, 09      2 hw buttons
   * Fujitsu LIFEBOOK E554   0x570f01        40, 14, 0c      2 hw buttons
   * Fujitsu T725            0x470f01        05, 12, 09      2 hw buttons
   * Fujitsu H730            0x570f00        c0, 14, 0c      3 hw buttons (**)
@@ -1523,6 +1524,13 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = {
                         DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E544"),
                 },
         },
+       {
+               /* Fujitsu LIFEBOOK E547 does not work with crc_enabled == 0 */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E547"),
+               },
+       },
         {
                 /* Fujitsu LIFEBOOK E554  does not work with crc_enabled == 0 */
                 .matches = {
diff --git a/drivers/input/rmi4/rmi_f30.c b/drivers/input/rmi4/rmi_f30.c

index 1986786133824d5b7cc815ec98efe61afd2624e2..34dfee555b201b0577e6e5fc0c58c63e7cc8cac7 100644 (file)
--- a/drivers/input/rmi4/rmi_f30.c
+++ b/drivers/input/rmi4/rmi_f30.c
@@ -170,6 +170,10 @@ static int rmi_f30_config(struct rmi_function *fn)
                                 rmi_get_platform_data(fn->rmi_dev);
         int error;
  
+       /* can happen if f30_data.disable is set */
+       if (!f30)
+               return 0;
+
         if (pdata->f30_data.trackstick_buttons) {
                 /* Try [re-]establish link to F03. */
                 f30->f03 = rmi_find_function(fn->rmi_dev, 0x03);
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h

index 05afd16ea9c9efc0bb52efe49510444478e1e220..09720d950686c844b49f1d7f32710e160d21624a 100644 (file)
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -119,6 +119,13 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = {
                         DMI_MATCH(DMI_PRODUCT_VERSION, "DL760"),
                 },
         },
+       {
+               /* Dell Embedded Box PC 3000 */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Embedded Box PC 3000"),
+               },
+       },
         {
                 /* OQO Model 01 */
                 .matches = {
@@ -513,6 +520,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = {
                         DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"),
                 },
         },
+       {
+               /* TUXEDO BU1406 */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"),
+               },
+       },
         { }
  };
  
@@ -606,6 +620,13 @@ static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = {
                         DMI_MATCH(DMI_PRODUCT_NAME, "20046"),
                 },
         },
+       {
+               /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
+               },
+       },
         { }
  };
  
diff --git a/drivers/input/tablet/hanwang.c b/drivers/input/tablet/hanwang.c

index cd852059b99e81899f8436f31071c929eb470b83..df4bea96d7ed7d10e66478c929feb1094ff5f09c 100644 (file)
--- a/drivers/input/tablet/hanwang.c
+++ b/drivers/input/tablet/hanwang.c
@@ -340,6 +340,9 @@ static int hanwang_probe(struct usb_interface *intf, const struct usb_device_id
         int error;
         int i;
  
+       if (intf->cur_altsetting->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
         hanwang = kzalloc(sizeof(struct hanwang), GFP_KERNEL);
         input_dev = input_allocate_device();
         if (!hanwang || !input_dev) {
diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c

index e850d7e8afbc4d22afb1bc73b2e99fc3f1cf5e8f..4d9d64908b595f9828e8e82e47e57b6b2935c07f 100644 (file)
--- a/drivers/input/tablet/kbtab.c
+++ b/drivers/input/tablet/kbtab.c
@@ -122,6 +122,9 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i
         struct input_dev *input_dev;
         int error = -ENOMEM;
  
+       if (intf->cur_altsetting->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
         kbtab = kzalloc(sizeof(struct kbtab), GFP_KERNEL);
         input_dev = input_allocate_device();
         if (!kbtab || !input_dev)
diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c

index aefb6e11f88a0838917b0a3b7a59dc9eae6b37f2..4c0eecae065c113a26400469d7def3c2b9bcc635 100644 (file)
--- a/drivers/input/touchscreen/sur40.c
+++ b/drivers/input/touchscreen/sur40.c
@@ -527,6 +527,9 @@ static int sur40_probe(struct usb_interface *interface,
         if (iface_desc->desc.bInterfaceClass != 0xFF)
                 return -ENODEV;
  
+       if (iface_desc->desc.bNumEndpoints < 5)
+               return -ENODEV;
+
         /* Use endpoint #4 (0x86). */
         endpoint = &iface_desc->endpoint[4].desc;
         if (endpoint->bEndpointAddress != TOUCH_ENDPOINT)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c

index 98940d1392cb0cd19d648b6a25f2a5ba36c052d4..b17536d6e69bdbf956a61d8c3dff06351a1a51df 100644 (file)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3202,7 +3202,7 @@ static void amd_iommu_get_resv_regions(struct device *dev,
  
         region = iommu_alloc_resv_region(MSI_RANGE_START,
                                          MSI_RANGE_END - MSI_RANGE_START + 1,
-                                        0, IOMMU_RESV_RESERVED);
+                                        0, IOMMU_RESV_MSI);
         if (!region)
                 return;
         list_add_tail(&region->list, head);
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c

index 5806a6acc94ecd7543c2435558a0907ec0934ff2..591bb96047c9765fd3e0fb536d3f26ca0a5f187f 100644 (file)
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1888,7 +1888,7 @@ static void arm_smmu_get_resv_regions(struct device *dev,
         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
  
         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
-                                        prot, IOMMU_RESV_MSI);
+                                        prot, IOMMU_RESV_SW_MSI);
         if (!region)
                 return;
  
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c

index abf6496843a617070289377ffad3fd1e119b0aa6..b493c99e17f74de338805167c3ddb104e3354b62 100644 (file)
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1608,7 +1608,7 @@ static void arm_smmu_get_resv_regions(struct device *dev,
         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
  
         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
-                                        prot, IOMMU_RESV_MSI);
+                                        prot, IOMMU_RESV_SW_MSI);
         if (!region)
                 return;
  
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c

index a7e0821c9967e490258921238e6640723e79375d..c01bfcdb238316c049ae0dd6b4bf2d43c6c61440 100644 (file)
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -512,7 +512,13 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
         spin_lock_irqsave(&data->lock, flags);
         if (data->active && data->version >= MAKE_MMU_VER(3, 3)) {
                 clk_enable(data->clk_master);
-               __sysmmu_tlb_invalidate_entry(data, iova, 1);
+               if (sysmmu_block(data)) {
+                       if (data->version >= MAKE_MMU_VER(5, 0))
+                               __sysmmu_tlb_invalidate(data);
+                       else
+                               __sysmmu_tlb_invalidate_entry(data, iova, 1);
+                       sysmmu_unblock(data);
+               }
                 clk_disable(data->clk_master);
         }
         spin_unlock_irqrestore(&data->lock, flags);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c

index 238ad3447712d263ef9d67a109c2d86c03693a87..d412a313a37232997d406e53379e1466d10b93e7 100644 (file)
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -916,7 +916,7 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
                                  * which we used for the IOMMU lookup. Strictly speaking
                                  * we could do this for all PCI devices; we only need to
                                  * get the BDF# from the scope table for ACPI matches. */
-                               if (pdev->is_virtfn)
+                               if (pdev && pdev->is_virtfn)
                                         goto got_pdev;
  
                                 *bus = drhd->devices[i].bus;
@@ -5249,7 +5249,7 @@ static void intel_iommu_get_resv_regions(struct device *device,
  
         reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
                                       IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
-                                     0, IOMMU_RESV_RESERVED);
+                                     0, IOMMU_RESV_MSI);
         if (!reg)
                 return;
         list_add_tail(&reg->list, head);
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c

index 1c049e2e12bf0ddacbc0e8ff9cbb09751996a549..8d6ca28c3e1f14a6c364aae89640ee6535f8f0d6 100644 (file)
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -422,8 +422,12 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
                         pte |= ARM_V7S_ATTR_NS_TABLE;
  
                 __arm_v7s_set_pte(ptep, pte, 1, cfg);
-       } else {
+       } else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
                 cptep = iopte_deref(pte, lvl);
+       } else {
+               /* We require an unmap first */
+               WARN_ON(!selftest_running);
+               return -EEXIST;
         }
  
         /* Rinse, repeat */
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c

index feacc54bec683b535fcba37e47ecb46af014ef5a..f9bc6ebb8140b06c845355560fd6e5d113912073 100644 (file)
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -335,8 +335,12 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
                 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
                         pte |= ARM_LPAE_PTE_NSTABLE;
                 __arm_lpae_set_pte(ptep, pte, cfg);
-       } else {
+       } else if (!iopte_leaf(pte, lvl)) {
                 cptep = iopte_deref(pte, data);
+       } else {
+               /* We require an unmap first */
+               WARN_ON(!selftest_running);
+               return -EEXIST;
         }
  
         /* Rinse, repeat */
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c

index 8ea14f41a979fd4e72e3a6093e5fa8d2a0eff24a..3b67144dead2e3811918af8fa44ec6e67a19c955 100644 (file)
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -72,6 +72,7 @@ static const char * const iommu_group_resv_type_string[] = {
         [IOMMU_RESV_DIRECT]     = "direct",
         [IOMMU_RESV_RESERVED]   = "reserved",
         [IOMMU_RESV_MSI]        = "msi",
+       [IOMMU_RESV_SW_MSI]     = "msi",
  };
  
  #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)          \
@@ -1743,8 +1744,8 @@ void iommu_put_resv_regions(struct device *dev, struct list_head *list)
  }
  
  struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
-                                                 size_t length,
-                                                 int prot, int type)
+                                                 size_t length, int prot,
+                                                 enum iommu_resv_type type)
  {
         struct iommu_resv_region *region;
  
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig

index 125528f39e92c2377846f799a841e23ef1e98654..8162121bb1bcd766e06c74c7a8e6fc337a57a605 100644 (file)
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -262,6 +262,7 @@ config IRQ_MXS
  
  config MVEBU_ODMI
         bool
+       select GENERIC_MSI_IRQ_DOMAIN
  
  config MVEBU_PIC
         bool
diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c

index 1eef56a89b1fbff1ee348f08b4623f6fcf4f6851..f96601268f7194bb5aada8f0f07327f2871d1c76 100644 (file)
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = {
  
  static int __init crossbar_of_init(struct device_node *node)
  {
-       int i, size, max = 0, reserved = 0, entry;
+       u32 max = 0, entry, reg_size;
+       int i, size, reserved = 0;
         const __be32 *irqsr;
         int ret = -ENOMEM;
  
@@ -275,9 +276,9 @@ static int __init crossbar_of_init(struct device_node *node)
         if (!cb->register_offsets)
                 goto err_irq_map;
  
-       of_property_read_u32(node, "ti,reg-size", &size);
+       of_property_read_u32(node, "ti,reg-size", &reg_size);
  
-       switch (size) {
+       switch (reg_size) {
         case 1:
                 cb->write = crossbar_writeb;
                 break;
@@ -303,7 +304,7 @@ static int __init crossbar_of_init(struct device_node *node)
                         continue;
  
                 cb->register_offsets[i] = reserved;
-               reserved += size;
+               reserved += reg_size;
         }
  
         of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map);
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c

index 23201004fd7a68e39055a69abbd41019aa66b12b..f77f840d2b5f7995ee0424445546a140079a5022 100644 (file)
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1601,6 +1601,14 @@ static void __maybe_unused its_enable_quirk_cavium_23144(void *data)
         its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144;
  }
  
+static void __maybe_unused its_enable_quirk_qdf2400_e0065(void *data)
+{
+       struct its_node *its = data;
+
+       /* On QDF2400, the size of the ITE is 16Bytes */
+       its->ite_size = 16;
+}
+
  static const struct gic_quirk its_quirks[] = {
  #ifdef CONFIG_CAVIUM_ERRATUM_22375
         {
@@ -1617,6 +1625,14 @@ static const struct gic_quirk its_quirks[] = {
                 .mask   = 0xffff0fff,
                 .init   = its_enable_quirk_cavium_23144,
         },
+#endif
+#ifdef CONFIG_QCOM_QDF2400_ERRATUM_0065
+       {
+               .desc   = "ITS: QDF2400 erratum 0065",
+               .iidr   = 0x00001070, /* QDF2400 ITS rev 1.x */
+               .mask   = 0xffffffff,
+               .init   = its_enable_quirk_qdf2400_e0065,
+       },
  #endif
         {
         }
diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c

index 15af9a9753e582b797a58de3c9713226800999f8..2d203b422129e53554d5be023595c6946877037c 100644 (file)
--- a/drivers/irqchip/irq-imx-gpcv2.c
+++ b/drivers/irqchip/irq-imx-gpcv2.c
@@ -230,6 +230,8 @@ static int __init imx_gpcv2_irqchip_init(struct device_node *node,
                 return -ENOMEM;
         }
  
+       raw_spin_lock_init(&cd->rlock);
+
         cd->gpc_base = of_iomap(node, 0);
         if (!cd->gpc_base) {
                 pr_err("fsl-gpcv2: unable to map gpc registers\n");
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c

index 11d12bccc4e7f10d72fa41f5464ed4d6b6f02a5f..cd20df12d63d98f0e1ae7315b135493699feca4e 100644 (file)
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -991,8 +991,12 @@ static void __init gic_map_single_int(struct device_node *node,
  
  static void __init gic_map_interrupts(struct device_node *node)
  {
+       gic_map_single_int(node, GIC_LOCAL_INT_WD);
+       gic_map_single_int(node, GIC_LOCAL_INT_COMPARE);
         gic_map_single_int(node, GIC_LOCAL_INT_TIMER);
         gic_map_single_int(node, GIC_LOCAL_INT_PERFCTR);
+       gic_map_single_int(node, GIC_LOCAL_INT_SWINT0);
+       gic_map_single_int(node, GIC_LOCAL_INT_SWINT1);
         gic_map_single_int(node, GIC_LOCAL_INT_FDC);
  }
  
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c

index 1dfd1085a04f87a016a2405e68200f9c09a2263e..9ca691d6c13b4d31cdb5b29221214f377a7d490a 100644 (file)
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -1032,6 +1032,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
                                                      sizeof(avmb1_carddef))))
                                 return -EFAULT;
                         cdef.cardtype = AVM_CARDTYPE_B1;
+                       cdef.cardnr = 0;
                 } else {
                         if ((retval = copy_from_user(&cdef, data,
                                                      sizeof(avmb1_extcarddef))))
diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c

index 11e13c56126fba31fca9c59d66252ffeced55c8a..2da3ff650e1d550cc50fd8572156ddd1cabd7961 100644 (file)
--- a/drivers/isdn/gigaset/bas-gigaset.c
+++ b/drivers/isdn/gigaset/bas-gigaset.c
@@ -2317,6 +2317,9 @@ static int gigaset_probe(struct usb_interface *interface,
                 return -ENODEV;
         }
  
+       if (hostif->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
         dev_info(&udev->dev,
                  "%s: Device matched (Vendor: 0x%x, Product: 0x%x)\n",
                  __func__, le16_to_cpu(udev->descriptor.idVendor),
diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c

index 409849165838fba631eeb74983c7fc7e8e68b152..f64a36007800cf91132b015773a78436cf488227 100644 (file)
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -239,7 +239,7 @@ static void st5481B_mode(struct st5481_bcs *bcs, int mode)
                         }
                 }
         } else {
-               // Disble B channel interrupts
+               // Disable B channel interrupts
                 st5481_usb_device_ctrl_msg(adapter, FFMSK_B1+(bcs->channel * 2), 0, NULL, NULL);
  
                 // Disable B channel FIFOs
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig

index 052714106b7b8b8d5125e5199d1333d0e3cadc8d..ead61a93cb4eb78b3e005ac6833a90b898389a56 100644 (file)
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -33,4 +33,13 @@ config NVM_RRPC
         host. The target is implemented using a linear mapping table and
         cost-based garbage collection. It is optimized for 4K IO sizes.
  
+config NVM_PBLK
+       tristate "Physical Block Device Open-Channel SSD target"
+       ---help---
+       Allows an open-channel SSD to be exposed as a block device to the
+       host. The target assumes the device exposes raw flash and must be
+       explicitly managed by the host.
+
+       Please note the disk format is considered EXPERIMENTAL for now.
+
  endif # NVM
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile

index b2a39e2d28952659932f44ef2f079b75ab0f01ff..82d1a117fb2753c7610ae8790f996eac694ec2dc 100644 (file)
--- a/drivers/lightnvm/Makefile
+++ b/drivers/lightnvm/Makefile
@@ -4,3 +4,8 @@
  
  obj-$(CONFIG_NVM)              := core.o
  obj-$(CONFIG_NVM_RRPC)         += rrpc.o
+obj-$(CONFIG_NVM_PBLK)         += pblk.o
+pblk-y                         := pblk-init.o pblk-core.o pblk-rb.o \
+                                  pblk-write.o pblk-cache.o pblk-read.o \
+                                  pblk-gc.o pblk-recovery.o pblk-map.o \
+                                  pblk-rl.o pblk-sysfs.o
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c

index 5262ba66a7a74c94d91d2c5815eb5bf19f2c62a2..54a06c3a2b8c7c1a31ed642eaf362013ebaa40cf 100644 (file)
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -89,7 +89,7 @@ static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin,
                 WARN_ON(!test_and_clear_bit(i, dev->lun_map));
  }
  
-static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev)
+static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear)
  {
         struct nvm_dev *dev = tgt_dev->parent;
         struct nvm_dev_map *dev_map = tgt_dev->map;
@@ -100,11 +100,14 @@ static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev)
                 int *lun_offs = ch_map->lun_offs;
                 int ch = i + ch_map->ch_off;
  
-               for (j = 0; j < ch_map->nr_luns; j++) {
-                       int lun = j + lun_offs[j];
-                       int lunid = (ch * dev->geo.luns_per_chnl) + lun;
+               if (clear) {
+                       for (j = 0; j < ch_map->nr_luns; j++) {
+                               int lun = j + lun_offs[j];
+                               int lunid = (ch * dev->geo.luns_per_chnl) + lun;
  
-                       WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
+                               WARN_ON(!test_and_clear_bit(lunid,
+                                                       dev->lun_map));
+                       }
                 }
  
                 kfree(ch_map->lun_offs);
@@ -232,6 +235,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
         struct nvm_target *t;
         struct nvm_tgt_dev *tgt_dev;
         void *targetdata;
+       int ret;
  
         tt = nvm_find_target_type(create->tgttype, 1);
         if (!tt) {
@@ -252,34 +256,43 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
                 return -ENOMEM;
  
         t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
-       if (!t)
+       if (!t) {
+               ret = -ENOMEM;
                 goto err_reserve;
+       }
  
         tgt_dev = nvm_create_tgt_dev(dev, s->lun_begin, s->lun_end);
         if (!tgt_dev) {
                 pr_err("nvm: could not create target device\n");
+               ret = -ENOMEM;
                 goto err_t;
         }
  
-       tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
-       if (!tqueue)
+       tdisk = alloc_disk(0);
+       if (!tdisk) {
+               ret = -ENOMEM;
                 goto err_dev;
-       blk_queue_make_request(tqueue, tt->make_rq);
+       }
  
-       tdisk = alloc_disk(0);
-       if (!tdisk)
-               goto err_queue;
+       tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
+       if (!tqueue) {
+               ret = -ENOMEM;
+               goto err_disk;
+       }
+       blk_queue_make_request(tqueue, tt->make_rq);
  
-       sprintf(tdisk->disk_name, "%s", create->tgtname);
+       strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name));
         tdisk->flags = GENHD_FL_EXT_DEVT;
         tdisk->major = 0;
         tdisk->first_minor = 0;
         tdisk->fops = &nvm_fops;
         tdisk->queue = tqueue;
  
-       targetdata = tt->init(tgt_dev, tdisk);
-       if (IS_ERR(targetdata))
+       targetdata = tt->init(tgt_dev, tdisk, create->flags);
+       if (IS_ERR(targetdata)) {
+               ret = PTR_ERR(targetdata);
                 goto err_init;
+       }
  
         tdisk->private_data = targetdata;
         tqueue->queuedata = targetdata;
@@ -289,8 +302,10 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
         set_capacity(tdisk, tt->capacity(targetdata));
         add_disk(tdisk);
  
-       if (tt->sysfs_init && tt->sysfs_init(tdisk))
+       if (tt->sysfs_init && tt->sysfs_init(tdisk)) {
+               ret = -ENOMEM;
                 goto err_sysfs;
+       }
  
         t->type = tt;
         t->disk = tdisk;
@@ -305,16 +320,17 @@ err_sysfs:
         if (tt->exit)
                 tt->exit(targetdata);
  err_init:
-       put_disk(tdisk);
-err_queue:
         blk_cleanup_queue(tqueue);
+       tdisk->queue = NULL;
+err_disk:
+       put_disk(tdisk);
  err_dev:
-       nvm_remove_tgt_dev(tgt_dev);
+       nvm_remove_tgt_dev(tgt_dev, 0);
  err_t:
         kfree(t);
  err_reserve:
         nvm_release_luns_err(dev, s->lun_begin, s->lun_end);
-       return -ENOMEM;
+       return ret;
  }
  
  static void __nvm_remove_target(struct nvm_target *t)
@@ -332,7 +348,7 @@ static void __nvm_remove_target(struct nvm_target *t)
         if (tt->exit)
                 tt->exit(tdisk->private_data);
  
-       nvm_remove_tgt_dev(t->dev);
+       nvm_remove_tgt_dev(t->dev, 1);
         put_disk(tdisk);
  
         list_del(&t->list);
@@ -411,6 +427,18 @@ err_rmap:
         return -ENOMEM;
  }
  
+static void nvm_unregister_map(struct nvm_dev *dev)
+{
+       struct nvm_dev_map *rmap = dev->rmap;
+       int i;
+
+       for (i = 0; i < dev->geo.nr_chnls; i++)
+               kfree(rmap->chnls[i].lun_offs);
+
+       kfree(rmap->chnls);
+       kfree(rmap);
+}
+
  static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
  {
         struct nvm_dev_map *dev_map = tgt_dev->map;
@@ -486,7 +514,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
                 int *lun_roffs;
                 struct ppa_addr gaddr;
                 u64 pba = le64_to_cpu(entries[i]);
-               int off;
                 u64 diff;
  
                 if (!pba)
@@ -496,8 +523,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
                 ch_rmap = &dev_rmap->chnls[gaddr.g.ch];
                 lun_roffs = ch_rmap->lun_offs;
  
-               off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun;
-
                 diff = ((ch_rmap->ch_off * geo->luns_per_chnl) +
                                 (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun;
  
@@ -590,11 +615,11 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
  
         memset(&rqd, 0, sizeof(struct nvm_rq));
  
-       nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1);
+       nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1);
         nvm_rq_tgt_to_dev(tgt_dev, &rqd);
  
         ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
-       nvm_free_rqd_ppalist(dev, &rqd);
+       nvm_free_rqd_ppalist(tgt_dev, &rqd);
         if (ret) {
                 pr_err("nvm: failed bb mark\n");
                 return -EINVAL;
@@ -626,34 +651,45 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
  }
  EXPORT_SYMBOL(nvm_submit_io);
  
-int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int flags)
+static void nvm_end_io_sync(struct nvm_rq *rqd)
  {
-       struct nvm_dev *dev = tgt_dev->parent;
-       struct nvm_rq rqd;
-       int ret;
+       struct completion *waiting = rqd->private;
  
-       if (!dev->ops->erase_block)
-               return 0;
+       complete(waiting);
+}
  
-       nvm_map_to_dev(tgt_dev, ppas);
+int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
+                                                               int nr_ppas)
+{
+       struct nvm_geo *geo = &tgt_dev->geo;
+       struct nvm_rq rqd;
+       int ret;
+       DECLARE_COMPLETION_ONSTACK(wait);
  
         memset(&rqd, 0, sizeof(struct nvm_rq));
  
-       ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, 1, 1);
+       rqd.opcode = NVM_OP_ERASE;
+       rqd.end_io = nvm_end_io_sync;
+       rqd.private = &wait;
+       rqd.flags = geo->plane_mode >> 1;
+
+       ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1);
         if (ret)
                 return ret;
  
-       nvm_rq_tgt_to_dev(tgt_dev, &rqd);
-
-       rqd.flags = flags;
-
-       ret = dev->ops->erase_block(dev, &rqd);
+       ret = nvm_submit_io(tgt_dev, &rqd);
+       if (ret) {
+               pr_err("rrpr: erase I/O submission failed: %d\n", ret);
+               goto free_ppa_list;
+       }
+       wait_for_completion_io(&wait);
  
-       nvm_free_rqd_ppalist(dev, &rqd);
+free_ppa_list:
+       nvm_free_rqd_ppalist(tgt_dev, &rqd);
  
         return ret;
  }
-EXPORT_SYMBOL(nvm_erase_blk);
+EXPORT_SYMBOL(nvm_erase_sync);
  
  int nvm_get_l2p_tbl(struct nvm_tgt_dev *tgt_dev, u64 slba, u32 nlb,
                     nvm_l2p_update_fn *update_l2p, void *priv)
@@ -732,10 +768,11 @@ void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin)
  }
  EXPORT_SYMBOL(nvm_put_area);
  
-int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
+int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
                         const struct ppa_addr *ppas, int nr_ppas, int vblk)
  {
-       struct nvm_geo *geo = &dev->geo;
+       struct nvm_dev *dev = tgt_dev->parent;
+       struct nvm_geo *geo = &tgt_dev->geo;
         int i, plane_cnt, pl_idx;
         struct ppa_addr ppa;
  
@@ -773,12 +810,12 @@ int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
  }
  EXPORT_SYMBOL(nvm_set_rqd_ppalist);
  
-void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd)
+void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
  {
         if (!rqd->ppa_list)
                 return;
  
-       nvm_dev_dma_free(dev, rqd->ppa_list, rqd->dma_ppa_list);
+       nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
  }
  EXPORT_SYMBOL(nvm_free_rqd_ppalist);
  
@@ -972,7 +1009,7 @@ err_fmtype:
         return ret;
  }
  
-void nvm_free(struct nvm_dev *dev)
+static void nvm_free(struct nvm_dev *dev)
  {
         if (!dev)
                 return;
@@ -980,7 +1017,7 @@ void nvm_free(struct nvm_dev *dev)
         if (dev->dma_pool)
                 dev->ops->destroy_dma_pool(dev->dma_pool);
  
-       kfree(dev->rmap);
+       nvm_unregister_map(dev);
         kfree(dev->lptbl);
         kfree(dev->lun_map);
         kfree(dev);
@@ -1174,13 +1211,13 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
         list_for_each_entry(dev, &nvm_devices, devices) {
                 struct nvm_ioctl_device_info *info = &devices->info[i];
  
-               sprintf(info->devname, "%s", dev->name);
+               strlcpy(info->devname, dev->name, sizeof(info->devname));
  
                 /* kept for compatibility */
                 info->bmversion[0] = 1;
                 info->bmversion[1] = 0;
                 info->bmversion[2] = 0;
-               sprintf(info->bmname, "%s", "gennvm");
+               strlcpy(info->bmname, "gennvm", sizeof(info->bmname));
                 i++;
  
                 if (i > 31) {
@@ -1217,8 +1254,16 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
         create.tgtname[DISK_NAME_LEN - 1] = '\0';
  
         if (create.flags != 0) {
-               pr_err("nvm: no flags supported\n");
-               return -EINVAL;
+               __u32 flags = create.flags;
+
+               /* Check for valid flags */
+               if (flags & NVM_TARGET_FACTORY)
+                       flags &= ~NVM_TARGET_FACTORY;
+
+               if (flags) {
+                       pr_err("nvm: flag not supported\n");
+                       return -EINVAL;
+               }
         }
  
         return __nvm_configure_create(&create);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c

new file mode 100644 (file)

index 0000000..59bcea8
--- /dev/null
+++ b/drivers/lightnvm/pblk-cache.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-cache.c - pblk's write cache
+ */
+
+#include "pblk.h"
+
+int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
+{
+       struct pblk_w_ctx w_ctx;
+       sector_t lba = pblk_get_lba(bio);
+       unsigned int bpos, pos;
+       int nr_entries = pblk_get_secs(bio);
+       int i, ret;
+
+       /* Update the write buffer head (mem) with the entries that we can
+        * write. The write in itself cannot fail, so there is no need to
+        * rollback from here on.
+        */
+retry:
+       ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
+       if (ret == NVM_IO_REQUEUE) {
+               io_schedule();
+               goto retry;
+       }
+
+       if (unlikely(!bio_has_data(bio)))
+               goto out;
+
+       w_ctx.flags = flags;
+       pblk_ppa_set_empty(&w_ctx.ppa);
+
+       for (i = 0; i < nr_entries; i++) {
+               void *data = bio_data(bio);
+
+               w_ctx.lba = lba + i;
+
+               pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i);
+               pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos);
+
+               bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
+       }
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_add(nr_entries, &pblk->inflight_writes);
+       atomic_long_add(nr_entries, &pblk->req_writes);
+#endif
+
+out:
+       pblk_write_should_kick(pblk);
+       return ret;
+}
+
+/*
+ * On GC the incoming lbas are not necessarily sequential. Also, some of the
+ * lbas might not be valid entries, which are marked as empty by the GC thread
+ */
+int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list,
+                          unsigned int nr_entries, unsigned int nr_rec_entries,
+                          struct pblk_line *gc_line, unsigned long flags)
+{
+       struct pblk_w_ctx w_ctx;
+       unsigned int bpos, pos;
+       int i, valid_entries;
+
+       /* Update the write buffer head (mem) with the entries that we can
+        * write. The write in itself cannot fail, so there is no need to
+        * rollback from here on.
+        */
+retry:
+       if (!pblk_rb_may_write_gc(&pblk->rwb, nr_rec_entries, &bpos)) {
+               io_schedule();
+               goto retry;
+       }
+
+       w_ctx.flags = flags;
+       pblk_ppa_set_empty(&w_ctx.ppa);
+
+       for (i = 0, valid_entries = 0; i < nr_entries; i++) {
+               if (lba_list[i] == ADDR_EMPTY)
+                       continue;
+
+               w_ctx.lba = lba_list[i];
+
+               pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries);
+               pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_line, pos);
+
+               data += PBLK_EXPOSED_PAGE_SIZE;
+               valid_entries++;
+       }
+
+       WARN_ONCE(nr_rec_entries != valid_entries,
+                                       "pblk: inconsistent GC write\n");
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_add(valid_entries, &pblk->inflight_writes);
+       atomic_long_add(valid_entries, &pblk->recov_gc_writes);
+#endif
+
+       pblk_write_should_kick(pblk);
+       return NVM_IO_OK;
+}
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c

new file mode 100644 (file)

index 0000000..5e44768
--- /dev/null
+++ b/drivers/lightnvm/pblk-core.c
@@ -0,0 +1,1667 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-core.c - pblk's core functionality
+ *
+ */
+
+#include "pblk.h"
+#include <linux/time.h>
+
+static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
+                        struct ppa_addr *ppa)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       int pos = pblk_dev_ppa_to_pos(geo, *ppa);
+
+       pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos);
+       atomic_long_inc(&pblk->erase_failed);
+
+       atomic_dec(&line->blk_in_line);
+       if (test_and_set_bit(pos, line->blk_bitmap))
+               pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
+                                                       line->id, pos);
+
+       pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
+}
+
+static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
+{
+       struct pblk_line *line;
+
+       line = &pblk->lines[pblk_dev_ppa_to_line(rqd->ppa_addr)];
+       atomic_dec(&line->left_seblks);
+
+       if (rqd->error) {
+               struct ppa_addr *ppa;
+
+               ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
+               if (!ppa)
+                       return;
+
+               *ppa = rqd->ppa_addr;
+               pblk_mark_bb(pblk, line, ppa);
+       }
+}
+
+/* Erase completion assumes that only one block is erased at the time */
+static void pblk_end_io_erase(struct nvm_rq *rqd)
+{
+       struct pblk *pblk = rqd->private;
+
+       up(&pblk->erase_sem);
+       __pblk_end_io_erase(pblk, rqd);
+       mempool_free(rqd, pblk->r_rq_pool);
+}
+
+static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+                                 u64 paddr)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct list_head *move_list = NULL;
+
+       /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P
+        * table is modified with reclaimed sectors, a check is done to endure
+        * that newer updates are not overwritten.
+        */
+       spin_lock(&line->lock);
+       if (line->state == PBLK_LINESTATE_GC ||
+                                       line->state == PBLK_LINESTATE_FREE) {
+               spin_unlock(&line->lock);
+               return;
+       }
+
+       if (test_and_set_bit(paddr, line->invalid_bitmap)) {
+               WARN_ONCE(1, "pblk: double invalidate\n");
+               spin_unlock(&line->lock);
+               return;
+       }
+       line->vsc--;
+
+       if (line->state == PBLK_LINESTATE_CLOSED)
+               move_list = pblk_line_gc_list(pblk, line);
+       spin_unlock(&line->lock);
+
+       if (move_list) {
+               spin_lock(&l_mg->gc_lock);
+               spin_lock(&line->lock);
+               /* Prevent moving a line that has just been chosen for GC */
+               if (line->state == PBLK_LINESTATE_GC ||
+                                       line->state == PBLK_LINESTATE_FREE) {
+                       spin_unlock(&line->lock);
+                       spin_unlock(&l_mg->gc_lock);
+                       return;
+               }
+               spin_unlock(&line->lock);
+
+               list_move_tail(&line->list, move_list);
+               spin_unlock(&l_mg->gc_lock);
+       }
+}
+
+void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
+{
+       struct pblk_line *line;
+       u64 paddr;
+       int line_id;
+
+#ifdef CONFIG_NVM_DEBUG
+       /* Callers must ensure that the ppa points to a device address */
+       BUG_ON(pblk_addr_in_cache(ppa));
+       BUG_ON(pblk_ppa_empty(ppa));
+#endif
+
+       line_id = pblk_tgt_ppa_to_line(ppa);
+       line = &pblk->lines[line_id];
+       paddr = pblk_dev_ppa_to_line_addr(pblk, ppa);
+
+       __pblk_map_invalidate(pblk, line, paddr);
+}
+
+void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
+                            u64 paddr)
+{
+       __pblk_map_invalidate(pblk, line, paddr);
+
+       pblk_rb_sync_init(&pblk->rwb, NULL);
+       line->left_ssecs--;
+       if (!line->left_ssecs)
+               pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
+       pblk_rb_sync_end(&pblk->rwb, NULL);
+}
+
+static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
+                                 unsigned int nr_secs)
+{
+       sector_t lba;
+
+       spin_lock(&pblk->trans_lock);
+       for (lba = slba; lba < slba + nr_secs; lba++) {
+               struct ppa_addr ppa;
+
+               ppa = pblk_trans_map_get(pblk, lba);
+
+               if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa))
+                       pblk_map_invalidate(pblk, ppa);
+
+               pblk_ppa_set_empty(&ppa);
+               pblk_trans_map_set(pblk, lba, ppa);
+       }
+       spin_unlock(&pblk->trans_lock);
+}
+
+struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
+{
+       mempool_t *pool;
+       struct nvm_rq *rqd;
+       int rq_size;
+
+       if (rw == WRITE) {
+               pool = pblk->w_rq_pool;
+               rq_size = pblk_w_rq_size;
+       } else {
+               pool = pblk->r_rq_pool;
+               rq_size = pblk_r_rq_size;
+       }
+
+       rqd = mempool_alloc(pool, GFP_KERNEL);
+       memset(rqd, 0, rq_size);
+
+       return rqd;
+}
+
+void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
+{
+       mempool_t *pool;
+
+       if (rw == WRITE)
+               pool = pblk->w_rq_pool;
+       else
+               pool = pblk->r_rq_pool;
+
+       mempool_free(rqd, pool);
+}
+
+void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
+                        int nr_pages)
+{
+       struct bio_vec bv;
+       int i;
+
+       WARN_ON(off + nr_pages != bio->bi_vcnt);
+
+       bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE);
+       for (i = off; i < nr_pages + off; i++) {
+               bv = bio->bi_io_vec[i];
+               mempool_free(bv.bv_page, pblk->page_pool);
+       }
+}
+
+int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
+                      int nr_pages)
+{
+       struct request_queue *q = pblk->dev->q;
+       struct page *page;
+       int i, ret;
+
+       for (i = 0; i < nr_pages; i++) {
+               page = mempool_alloc(pblk->page_pool, flags);
+               if (!page)
+                       goto err;
+
+               ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
+               if (ret != PBLK_EXPOSED_PAGE_SIZE) {
+                       pr_err("pblk: could not add page to bio\n");
+                       mempool_free(page, pblk->page_pool);
+                       goto err;
+               }
+       }
+
+       return 0;
+err:
+       pblk_bio_free_pages(pblk, bio, 0, i - 1);
+       return -1;
+}
+
+static void pblk_write_kick(struct pblk *pblk)
+{
+       wake_up_process(pblk->writer_ts);
+       mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
+}
+
+void pblk_write_timer_fn(unsigned long data)
+{
+       struct pblk *pblk = (struct pblk *)data;
+
+       /* kick the write thread every tick to flush outstanding data */
+       pblk_write_kick(pblk);
+}
+
+void pblk_write_should_kick(struct pblk *pblk)
+{
+       unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
+
+       if (secs_avail >= pblk->min_write_pgs)
+               pblk_write_kick(pblk);
+}
+
+void pblk_end_bio_sync(struct bio *bio)
+{
+       struct completion *waiting = bio->bi_private;
+
+       complete(waiting);
+}
+
+void pblk_end_io_sync(struct nvm_rq *rqd)
+{
+       struct completion *waiting = rqd->private;
+
+       complete(waiting);
+}
+
+void pblk_flush_writer(struct pblk *pblk)
+{
+       struct bio *bio;
+       int ret;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       bio = bio_alloc(GFP_KERNEL, 1);
+       if (!bio)
+               return;
+
+       bio->bi_iter.bi_sector = 0; /* internal bio */
+       bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
+       bio->bi_private = &wait;
+       bio->bi_end_io = pblk_end_bio_sync;
+
+       ret = pblk_write_to_cache(pblk, bio, 0);
+       if (ret == NVM_IO_OK) {
+               if (!wait_for_completion_io_timeout(&wait,
+                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+                       pr_err("pblk: flush cache timed out\n");
+               }
+       } else if (ret != NVM_IO_DONE) {
+               pr_err("pblk: tear down bio failed\n");
+       }
+
+       if (bio->bi_error)
+               pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
+
+       bio_put(bio);
+}
+
+struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct list_head *move_list = NULL;
+
+       if (!line->vsc) {
+               if (line->gc_group != PBLK_LINEGC_FULL) {
+                       line->gc_group = PBLK_LINEGC_FULL;
+                       move_list = &l_mg->gc_full_list;
+               }
+       } else if (line->vsc < lm->mid_thrs) {
+               if (line->gc_group != PBLK_LINEGC_HIGH) {
+                       line->gc_group = PBLK_LINEGC_HIGH;
+                       move_list = &l_mg->gc_high_list;
+               }
+       } else if (line->vsc < lm->high_thrs) {
+               if (line->gc_group != PBLK_LINEGC_MID) {
+                       line->gc_group = PBLK_LINEGC_MID;
+                       move_list = &l_mg->gc_mid_list;
+               }
+       } else if (line->vsc < line->sec_in_line) {
+               if (line->gc_group != PBLK_LINEGC_LOW) {
+                       line->gc_group = PBLK_LINEGC_LOW;
+                       move_list = &l_mg->gc_low_list;
+               }
+       } else if (line->vsc == line->sec_in_line) {
+               if (line->gc_group != PBLK_LINEGC_EMPTY) {
+                       line->gc_group = PBLK_LINEGC_EMPTY;
+                       move_list = &l_mg->gc_empty_list;
+               }
+       } else {
+               line->state = PBLK_LINESTATE_CORRUPT;
+               line->gc_group = PBLK_LINEGC_NONE;
+               move_list =  &l_mg->corrupt_list;
+               pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
+                                               line->id, line->vsc,
+                                               line->sec_in_line,
+                                               lm->high_thrs, lm->mid_thrs);
+       }
+
+       return move_list;
+}
+
+void pblk_discard(struct pblk *pblk, struct bio *bio)
+{
+       sector_t slba = pblk_get_lba(bio);
+       sector_t nr_secs = pblk_get_secs(bio);
+
+       pblk_invalidate_range(pblk, slba, nr_secs);
+}
+
+struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba)
+{
+       struct ppa_addr ppa;
+
+       spin_lock(&pblk->trans_lock);
+       ppa = pblk_trans_map_get(pblk, lba);
+       spin_unlock(&pblk->trans_lock);
+
+       return ppa;
+}
+
+void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd)
+{
+       atomic_long_inc(&pblk->write_failed);
+#ifdef CONFIG_NVM_DEBUG
+       pblk_print_failed_rqd(pblk, rqd, rqd->error);
+#endif
+}
+
+void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
+{
+       /* Empty page read is not necessarily an error (e.g., L2P recovery) */
+       if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
+               atomic_long_inc(&pblk->read_empty);
+               return;
+       }
+
+       switch (rqd->error) {
+       case NVM_RSP_WARN_HIGHECC:
+               atomic_long_inc(&pblk->read_high_ecc);
+               break;
+       case NVM_RSP_ERR_FAILECC:
+       case NVM_RSP_ERR_FAILCRC:
+               atomic_long_inc(&pblk->read_failed);
+               break;
+       default:
+               pr_err("pblk: unknown read error:%d\n", rqd->error);
+       }
+#ifdef CONFIG_NVM_DEBUG
+       pblk_print_failed_rqd(pblk, rqd, rqd->error);
+#endif
+}
+
+int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+
+#ifdef CONFIG_NVM_DEBUG
+       struct ppa_addr *ppa_list;
+
+       ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
+       if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
+
+       if (rqd->opcode == NVM_OP_PWRITE) {
+               struct pblk_line *line;
+               struct ppa_addr ppa;
+               int i;
+
+               for (i = 0; i < rqd->nr_ppas; i++) {
+                       ppa = ppa_list[i];
+                       line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
+
+                       spin_lock(&line->lock);
+                       if (line->state != PBLK_LINESTATE_OPEN) {
+                               pr_err("pblk: bad ppa: line:%d,state:%d\n",
+                                                       line->id, line->state);
+                               WARN_ON(1);
+                               spin_unlock(&line->lock);
+                               return -EINVAL;
+                       }
+                       spin_unlock(&line->lock);
+               }
+       }
+#endif
+       return nvm_submit_io(dev, rqd);
+}
+
+struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
+                             unsigned int nr_secs, unsigned int len,
+                             gfp_t gfp_mask)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       void *kaddr = data;
+       struct page *page;
+       struct bio *bio;
+       int i, ret;
+
+       if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
+               return bio_map_kern(dev->q, kaddr, len, gfp_mask);
+
+       bio = bio_kmalloc(gfp_mask, nr_secs);
+       if (!bio)
+               return ERR_PTR(-ENOMEM);
+
+       for (i = 0; i < nr_secs; i++) {
+               page = vmalloc_to_page(kaddr);
+               if (!page) {
+                       pr_err("pblk: could not map vmalloc bio\n");
+                       bio_put(bio);
+                       bio = ERR_PTR(-ENOMEM);
+                       goto out;
+               }
+
+               ret = bio_add_pc_page(dev->q, bio, page, PAGE_SIZE, 0);
+               if (ret != PAGE_SIZE) {
+                       pr_err("pblk: could not add page to bio\n");
+                       bio_put(bio);
+                       bio = ERR_PTR(-ENOMEM);
+                       goto out;
+               }
+
+               kaddr += PAGE_SIZE;
+       }
+out:
+       return bio;
+}
+
+int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
+                  unsigned long secs_to_flush)
+{
+       int max = pblk->max_write_pgs;
+       int min = pblk->min_write_pgs;
+       int secs_to_sync = 0;
+
+       if (secs_avail >= max)
+               secs_to_sync = max;
+       else if (secs_avail >= min)
+               secs_to_sync = min * (secs_avail / min);
+       else if (secs_to_flush)
+               secs_to_sync = min;
+
+       return secs_to_sync;
+}
+
+static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
+                            int nr_secs)
+{
+       u64 addr;
+       int i;
+
+       /* logic error: ppa out-of-bounds. Prevent generating bad address */
+       if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
+               WARN(1, "pblk: page allocation out of bounds\n");
+               nr_secs = pblk->lm.sec_per_line - line->cur_sec;
+       }
+
+       line->cur_sec = addr = find_next_zero_bit(line->map_bitmap,
+                                       pblk->lm.sec_per_line, line->cur_sec);
+       for (i = 0; i < nr_secs; i++, line->cur_sec++)
+               WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap));
+
+       return addr;
+}
+
+u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
+{
+       u64 addr;
+
+       /* Lock needed in case a write fails and a recovery needs to remap
+        * failed write buffer entries
+        */
+       spin_lock(&line->lock);
+       addr = __pblk_alloc_page(pblk, line, nr_secs);
+       line->left_msecs -= nr_secs;
+       WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n");
+       spin_unlock(&line->lock);
+
+       return addr;
+}
+
+/*
+ * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
+ * taking the per LUN semaphore.
+ */
+static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
+                                    u64 paddr, int dir)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct bio *bio;
+       struct nvm_rq rqd;
+       struct ppa_addr *ppa_list;
+       dma_addr_t dma_ppa_list;
+       void *emeta = line->emeta;
+       int min = pblk->min_write_pgs;
+       int left_ppas = lm->emeta_sec;
+       int id = line->id;
+       int rq_ppas, rq_len;
+       int cmd_op, bio_op;
+       int flags;
+       int i, j;
+       int ret;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       if (dir == WRITE) {
+               bio_op = REQ_OP_WRITE;
+               cmd_op = NVM_OP_PWRITE;
+               flags = pblk_set_progr_mode(pblk, WRITE);
+       } else if (dir == READ) {
+               bio_op = REQ_OP_READ;
+               cmd_op = NVM_OP_PREAD;
+               flags = pblk_set_read_mode(pblk);
+       } else
+               return -EINVAL;
+
+       ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
+       if (!ppa_list)
+               return -ENOMEM;
+
+next_rq:
+       memset(&rqd, 0, sizeof(struct nvm_rq));
+
+       rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+       rq_len = rq_ppas * geo->sec_size;
+
+       bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
+       if (IS_ERR(bio)) {
+               ret = PTR_ERR(bio);
+               goto free_rqd_dma;
+       }
+
+       bio->bi_iter.bi_sector = 0; /* internal bio */
+       bio_set_op_attrs(bio, bio_op, 0);
+
+       rqd.bio = bio;
+       rqd.opcode = cmd_op;
+       rqd.flags = flags;
+       rqd.nr_ppas = rq_ppas;
+       rqd.ppa_list = ppa_list;
+       rqd.dma_ppa_list = dma_ppa_list;
+       rqd.end_io = pblk_end_io_sync;
+       rqd.private = &wait;
+
+       if (dir == WRITE) {
+               for (i = 0; i < rqd.nr_ppas; ) {
+                       spin_lock(&line->lock);
+                       paddr = __pblk_alloc_page(pblk, line, min);
+                       spin_unlock(&line->lock);
+                       for (j = 0; j < min; j++, i++, paddr++)
+                               rqd.ppa_list[i] =
+                                       addr_to_gen_ppa(pblk, paddr, id);
+               }
+       } else {
+               for (i = 0; i < rqd.nr_ppas; ) {
+                       struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
+                       int pos = pblk_dev_ppa_to_pos(geo, ppa);
+
+                       while (test_bit(pos, line->blk_bitmap)) {
+                               paddr += min;
+                               if (pblk_boundary_paddr_checks(pblk, paddr)) {
+                                       pr_err("pblk: corrupt emeta line:%d\n",
+                                                               line->id);
+                                       bio_put(bio);
+                                       ret = -EINTR;
+                                       goto free_rqd_dma;
+                               }
+
+                               ppa = addr_to_gen_ppa(pblk, paddr, id);
+                               pos = pblk_dev_ppa_to_pos(geo, ppa);
+                       }
+
+                       if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
+                               pr_err("pblk: corrupt emeta line:%d\n",
+                                                               line->id);
+                               bio_put(bio);
+                               ret = -EINTR;
+                               goto free_rqd_dma;
+                       }
+
+                       for (j = 0; j < min; j++, i++, paddr++)
+                               rqd.ppa_list[i] =
+                                       addr_to_gen_ppa(pblk, paddr, line->id);
+               }
+       }
+
+       ret = pblk_submit_io(pblk, &rqd);
+       if (ret) {
+               pr_err("pblk: emeta I/O submission failed: %d\n", ret);
+               bio_put(bio);
+               goto free_rqd_dma;
+       }
+
+       if (!wait_for_completion_io_timeout(&wait,
+                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+               pr_err("pblk: emeta I/O timed out\n");
+       }
+       reinit_completion(&wait);
+
+       bio_put(bio);
+
+       if (rqd.error) {
+               if (dir == WRITE)
+                       pblk_log_write_err(pblk, &rqd);
+               else
+                       pblk_log_read_err(pblk, &rqd);
+       }
+
+       emeta += rq_len;
+       left_ppas -= rq_ppas;
+       if (left_ppas)
+               goto next_rq;
+free_rqd_dma:
+       nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
+       return ret;
+}
+
+u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_meta *lm = &pblk->lm;
+       int bit;
+
+       /* This usually only happens on bad lines */
+       bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
+       if (bit >= lm->blk_per_line)
+               return -1;
+
+       return bit * geo->sec_per_pl;
+}
+
+static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
+                                    u64 paddr, int dir)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct bio *bio;
+       struct nvm_rq rqd;
+       __le64 *lba_list = NULL;
+       int i, ret;
+       int cmd_op, bio_op;
+       int flags;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       if (dir == WRITE) {
+               bio_op = REQ_OP_WRITE;
+               cmd_op = NVM_OP_PWRITE;
+               flags = pblk_set_progr_mode(pblk, WRITE);
+               lba_list = pblk_line_emeta_to_lbas(line->emeta);
+       } else if (dir == READ) {
+               bio_op = REQ_OP_READ;
+               cmd_op = NVM_OP_PREAD;
+               flags = pblk_set_read_mode(pblk);
+       } else
+               return -EINVAL;
+
+       memset(&rqd, 0, sizeof(struct nvm_rq));
+
+       rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+                                                       &rqd.dma_ppa_list);
+       if (!rqd.ppa_list)
+               return -ENOMEM;
+
+       bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
+       if (IS_ERR(bio)) {
+               ret = PTR_ERR(bio);
+               goto free_ppa_list;
+       }
+
+       bio->bi_iter.bi_sector = 0; /* internal bio */
+       bio_set_op_attrs(bio, bio_op, 0);
+
+       rqd.bio = bio;
+       rqd.opcode = cmd_op;
+       rqd.flags = flags;
+       rqd.nr_ppas = lm->smeta_sec;
+       rqd.end_io = pblk_end_io_sync;
+       rqd.private = &wait;
+
+       for (i = 0; i < lm->smeta_sec; i++, paddr++) {
+               rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
+               if (dir == WRITE)
+                       lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+       }
+
+       /*
+        * This I/O is sent by the write thread when a line is replace. Since
+        * the write thread is the only one sending write and erase commands,
+        * there is no need to take the LUN semaphore.
+        */
+       ret = pblk_submit_io(pblk, &rqd);
+       if (ret) {
+               pr_err("pblk: smeta I/O submission failed: %d\n", ret);
+               bio_put(bio);
+               goto free_ppa_list;
+       }
+
+       if (!wait_for_completion_io_timeout(&wait,
+                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+               pr_err("pblk: smeta I/O timed out\n");
+       }
+
+       if (rqd.error) {
+               if (dir == WRITE)
+                       pblk_log_write_err(pblk, &rqd);
+               else
+                       pblk_log_read_err(pblk, &rqd);
+       }
+
+free_ppa_list:
+       nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+
+       return ret;
+}
+
+int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
+{
+       u64 bpaddr = pblk_line_smeta_start(pblk, line);
+
+       return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
+}
+
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
+{
+       return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
+}
+
+static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
+                           struct ppa_addr ppa)
+{
+       rqd->opcode = NVM_OP_ERASE;
+       rqd->ppa_addr = ppa;
+       rqd->nr_ppas = 1;
+       rqd->flags = pblk_set_progr_mode(pblk, ERASE);
+       rqd->bio = NULL;
+}
+
+static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
+{
+       struct nvm_rq rqd;
+       int ret;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       memset(&rqd, 0, sizeof(struct nvm_rq));
+
+       pblk_setup_e_rq(pblk, &rqd, ppa);
+
+       rqd.end_io = pblk_end_io_sync;
+       rqd.private = &wait;
+
+       /* The write thread schedules erases so that it minimizes disturbances
+        * with writes. Thus, there is no need to take the LUN semaphore.
+        */
+       ret = pblk_submit_io(pblk, &rqd);
+       if (ret) {
+               struct nvm_tgt_dev *dev = pblk->dev;
+               struct nvm_geo *geo = &dev->geo;
+
+               pr_err("pblk: could not sync erase line:%d,blk:%d\n",
+                                       pblk_dev_ppa_to_line(ppa),
+                                       pblk_dev_ppa_to_pos(geo, ppa));
+
+               rqd.error = ret;
+               goto out;
+       }
+
+       if (!wait_for_completion_io_timeout(&wait,
+                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+               pr_err("pblk: sync erase timed out\n");
+       }
+
+out:
+       rqd.private = pblk;
+       __pblk_end_io_erase(pblk, &rqd);
+
+       return 0;
+}
+
+int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct ppa_addr ppa;
+       int bit = -1;
+
+       /* Erase only good blocks, one at a time */
+       do {
+               spin_lock(&line->lock);
+               bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
+                                                               bit + 1);
+               if (bit >= lm->blk_per_line) {
+                       spin_unlock(&line->lock);
+                       break;
+               }
+
+               ppa = pblk->luns[bit].bppa; /* set ch and lun */
+               ppa.g.blk = line->id;
+
+               atomic_dec(&line->left_eblks);
+               WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
+               spin_unlock(&line->lock);
+
+               if (pblk_blk_erase_sync(pblk, ppa)) {
+                       pr_err("pblk: failed to erase line %d\n", line->id);
+                       return -ENOMEM;
+               }
+       } while (1);
+
+       return 0;
+}
+
+/* For now lines are always assumed full lines. Thus, smeta former and current
+ * lun bitmaps are omitted.
+ */
+static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
+                                 struct pblk_line *cur)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct line_smeta *smeta = line->smeta;
+       struct line_emeta *emeta = line->emeta;
+       int nr_blk_line;
+
+       /* After erasing the line, new bad blocks might appear and we risk
+        * having an invalid line
+        */
+       nr_blk_line = lm->blk_per_line -
+                       bitmap_weight(line->blk_bitmap, lm->blk_per_line);
+       if (nr_blk_line < lm->min_blk_line) {
+               spin_lock(&l_mg->free_lock);
+               spin_lock(&line->lock);
+               line->state = PBLK_LINESTATE_BAD;
+               spin_unlock(&line->lock);
+
+               list_add_tail(&line->list, &l_mg->bad_list);
+               spin_unlock(&l_mg->free_lock);
+
+               pr_debug("pblk: line %d is bad\n", line->id);
+
+               return 0;
+       }
+
+       /* Run-time metadata */
+       line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
+
+       /* Mark LUNs allocated in this line (all for now) */
+       bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
+
+       smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
+       memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
+       smeta->header.id = cpu_to_le32(line->id);
+       smeta->header.type = cpu_to_le16(line->type);
+       smeta->header.version = cpu_to_le16(1);
+
+       /* Start metadata */
+       smeta->seq_nr = cpu_to_le64(line->seq_nr);
+       smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
+
+       /* Fill metadata among lines */
+       if (cur) {
+               memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
+               smeta->prev_id = cpu_to_le32(cur->id);
+               cur->emeta->next_id = cpu_to_le32(line->id);
+       } else {
+               smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
+       }
+
+       /* All smeta must be set at this point */
+       smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
+       smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
+
+       /* End metadata */
+       memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
+       emeta->seq_nr = cpu_to_le64(line->seq_nr);
+       emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
+       emeta->nr_valid_lbas = cpu_to_le64(0);
+       emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
+       emeta->crc = cpu_to_le32(0);
+       emeta->prev_id = smeta->prev_id;
+
+       return 1;
+}
+
+/* For now lines are always assumed full lines. Thus, smeta former and current
+ * lun bitmaps are omitted.
+ */
+static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
+                            int init)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       int nr_bb = 0;
+       u64 off;
+       int bit = -1;
+
+       line->sec_in_line = lm->sec_per_line;
+
+       /* Capture bad block information on line mapping bitmaps */
+       while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
+                                       bit + 1)) < lm->blk_per_line) {
+               off = bit * geo->sec_per_pl;
+               bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
+                                                       lm->sec_per_line);
+               bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
+                                                       lm->sec_per_line);
+               line->sec_in_line -= geo->sec_per_blk;
+               if (bit >= lm->emeta_bb)
+                       nr_bb++;
+       }
+
+       /* Mark smeta metadata sectors as bad sectors */
+       bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
+       off = bit * geo->sec_per_pl;
+retry_smeta:
+       bitmap_set(line->map_bitmap, off, lm->smeta_sec);
+       line->sec_in_line -= lm->smeta_sec;
+       line->smeta_ssec = off;
+       line->cur_sec = off + lm->smeta_sec;
+
+       if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
+               pr_debug("pblk: line smeta I/O failed. Retry\n");
+               off += geo->sec_per_pl;
+               goto retry_smeta;
+       }
+
+       bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
+
+       /* Mark emeta metadata sectors as bad sectors. We need to consider bad
+        * blocks to make sure that there are enough sectors to store emeta
+        */
+       bit = lm->sec_per_line;
+       off = lm->sec_per_line - lm->emeta_sec;
+       bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
+       while (nr_bb) {
+               off -= geo->sec_per_pl;
+               if (!test_bit(off, line->invalid_bitmap)) {
+                       bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl);
+                       nr_bb--;
+               }
+       }
+
+       line->sec_in_line -= lm->emeta_sec;
+       line->emeta_ssec = off;
+       line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
+
+       if (lm->sec_per_line - line->sec_in_line !=
+               bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
+               spin_lock(&line->lock);
+               line->state = PBLK_LINESTATE_BAD;
+               spin_unlock(&line->lock);
+
+               list_add_tail(&line->list, &l_mg->bad_list);
+               pr_err("pblk: unexpected line %d is bad\n", line->id);
+
+               return 0;
+       }
+
+       return 1;
+}
+
+static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+       int blk_in_line = atomic_read(&line->blk_in_line);
+
+       line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
+       if (!line->map_bitmap)
+               return -ENOMEM;
+       memset(line->map_bitmap, 0, lm->sec_bitmap_len);
+
+       /* invalid_bitmap is special since it is used when line is closed. No
+        * need to zeroized; it will be initialized using bb info form
+        * map_bitmap
+        */
+       line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
+       if (!line->invalid_bitmap) {
+               mempool_free(line->map_bitmap, pblk->line_meta_pool);
+               return -ENOMEM;
+       }
+
+       spin_lock(&line->lock);
+       if (line->state != PBLK_LINESTATE_FREE) {
+               spin_unlock(&line->lock);
+               WARN(1, "pblk: corrupted line state\n");
+               return -EINTR;
+       }
+       line->state = PBLK_LINESTATE_OPEN;
+
+       atomic_set(&line->left_eblks, blk_in_line);
+       atomic_set(&line->left_seblks, blk_in_line);
+       spin_unlock(&line->lock);
+
+       /* Bad blocks do not need to be erased */
+       bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
+
+       kref_init(&line->ref);
+
+       return 0;
+}
+
+int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       int ret;
+
+       spin_lock(&l_mg->free_lock);
+       l_mg->data_line = line;
+       list_del(&line->list);
+
+       ret = pblk_line_prepare(pblk, line);
+       if (ret) {
+               list_add(&line->list, &l_mg->free_list);
+               spin_unlock(&l_mg->free_lock);
+               return ret;
+       }
+       spin_unlock(&l_mg->free_lock);
+
+       pblk_rl_free_lines_dec(&pblk->rl, line);
+
+       if (!pblk_line_init_bb(pblk, line, 0)) {
+               list_add(&line->list, &l_mg->free_list);
+               return -EINTR;
+       }
+
+       return 0;
+}
+
+void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
+{
+       mempool_free(line->map_bitmap, pblk->line_meta_pool);
+       line->map_bitmap = NULL;
+       line->smeta = NULL;
+       line->emeta = NULL;
+}
+
+struct pblk_line *pblk_line_get(struct pblk *pblk)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line *line = NULL;
+       int bit;
+
+       lockdep_assert_held(&l_mg->free_lock);
+
+retry_get:
+       if (list_empty(&l_mg->free_list)) {
+               pr_err("pblk: no free lines\n");
+               goto out;
+       }
+
+       line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
+       list_del(&line->list);
+       l_mg->nr_free_lines--;
+
+       bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
+       if (unlikely(bit >= lm->blk_per_line)) {
+               spin_lock(&line->lock);
+               line->state = PBLK_LINESTATE_BAD;
+               spin_unlock(&line->lock);
+
+               list_add_tail(&line->list, &l_mg->bad_list);
+
+               pr_debug("pblk: line %d is bad\n", line->id);
+               goto retry_get;
+       }
+
+       if (pblk_line_prepare(pblk, line)) {
+               pr_err("pblk: failed to prepare line %d\n", line->id);
+               list_add(&line->list, &l_mg->free_list);
+               return NULL;
+       }
+
+out:
+       return line;
+}
+
+static struct pblk_line *pblk_line_retry(struct pblk *pblk,
+                                        struct pblk_line *line)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line *retry_line;
+
+       spin_lock(&l_mg->free_lock);
+       retry_line = pblk_line_get(pblk);
+       if (!retry_line) {
+               l_mg->data_line = NULL;
+               spin_unlock(&l_mg->free_lock);
+               return NULL;
+       }
+
+       retry_line->smeta = line->smeta;
+       retry_line->emeta = line->emeta;
+       retry_line->meta_line = line->meta_line;
+
+       pblk_line_free(pblk, line);
+       l_mg->data_line = retry_line;
+       spin_unlock(&l_mg->free_lock);
+
+       if (pblk_line_erase(pblk, retry_line)) {
+               spin_lock(&l_mg->free_lock);
+               l_mg->data_line = NULL;
+               spin_unlock(&l_mg->free_lock);
+               return NULL;
+       }
+
+       pblk_rl_free_lines_dec(&pblk->rl, retry_line);
+
+       return retry_line;
+}
+
+struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line *line;
+       int meta_line;
+       int is_next = 0;
+
+       spin_lock(&l_mg->free_lock);
+       line = pblk_line_get(pblk);
+       if (!line) {
+               spin_unlock(&l_mg->free_lock);
+               return NULL;
+       }
+
+       line->seq_nr = l_mg->d_seq_nr++;
+       line->type = PBLK_LINETYPE_DATA;
+       l_mg->data_line = line;
+
+       meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+       set_bit(meta_line, &l_mg->meta_bitmap);
+       line->smeta = l_mg->sline_meta[meta_line].meta;
+       line->emeta = l_mg->eline_meta[meta_line].meta;
+       line->meta_line = meta_line;
+
+       /* Allocate next line for preparation */
+       l_mg->data_next = pblk_line_get(pblk);
+       if (l_mg->data_next) {
+               l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+               l_mg->data_next->type = PBLK_LINETYPE_DATA;
+               is_next = 1;
+       }
+       spin_unlock(&l_mg->free_lock);
+
+       pblk_rl_free_lines_dec(&pblk->rl, line);
+       if (is_next)
+               pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
+
+       if (pblk_line_erase(pblk, line))
+               return NULL;
+
+retry_setup:
+       if (!pblk_line_set_metadata(pblk, line, NULL)) {
+               line = pblk_line_retry(pblk, line);
+               if (!line)
+                       return NULL;
+
+               goto retry_setup;
+       }
+
+       if (!pblk_line_init_bb(pblk, line, 1)) {
+               line = pblk_line_retry(pblk, line);
+               if (!line)
+                       return NULL;
+
+               goto retry_setup;
+       }
+
+       return line;
+}
+
+struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line *cur, *new;
+       unsigned int left_seblks;
+       int meta_line;
+       int is_next = 0;
+
+       cur = l_mg->data_line;
+       new = l_mg->data_next;
+       if (!new)
+               return NULL;
+       l_mg->data_line = new;
+
+retry_line:
+       left_seblks = atomic_read(&new->left_seblks);
+       if (left_seblks) {
+               /* If line is not fully erased, erase it */
+               if (atomic_read(&new->left_eblks)) {
+                       if (pblk_line_erase(pblk, new))
+                               return NULL;
+               } else {
+                       io_schedule();
+               }
+               goto retry_line;
+       }
+
+       spin_lock(&l_mg->free_lock);
+       /* Allocate next line for preparation */
+       l_mg->data_next = pblk_line_get(pblk);
+       if (l_mg->data_next) {
+               l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+               l_mg->data_next->type = PBLK_LINETYPE_DATA;
+               is_next = 1;
+       }
+
+retry_meta:
+       meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+       if (meta_line == PBLK_DATA_LINES) {
+               spin_unlock(&l_mg->free_lock);
+               io_schedule();
+               spin_lock(&l_mg->free_lock);
+               goto retry_meta;
+       }
+
+       set_bit(meta_line, &l_mg->meta_bitmap);
+       new->smeta = l_mg->sline_meta[meta_line].meta;
+       new->emeta = l_mg->eline_meta[meta_line].meta;
+       new->meta_line = meta_line;
+
+       memset(new->smeta, 0, lm->smeta_len);
+       memset(new->emeta, 0, lm->emeta_len);
+       spin_unlock(&l_mg->free_lock);
+
+       if (is_next)
+               pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
+
+retry_setup:
+       if (!pblk_line_set_metadata(pblk, new, cur)) {
+               new = pblk_line_retry(pblk, new);
+               if (!new)
+                       return NULL;
+
+               goto retry_setup;
+       }
+
+       if (!pblk_line_init_bb(pblk, new, 1)) {
+               new = pblk_line_retry(pblk, new);
+               if (!new)
+                       return NULL;
+
+               goto retry_setup;
+       }
+
+       return new;
+}
+
+void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
+{
+       if (line->map_bitmap)
+               mempool_free(line->map_bitmap, pblk->line_meta_pool);
+       if (line->invalid_bitmap)
+               mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+
+       line->map_bitmap = NULL;
+       line->invalid_bitmap = NULL;
+       line->smeta = NULL;
+       line->emeta = NULL;
+}
+
+void pblk_line_put(struct kref *ref)
+{
+       struct pblk_line *line = container_of(ref, struct pblk_line, ref);
+       struct pblk *pblk = line->pblk;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+
+       spin_lock(&line->lock);
+       WARN_ON(line->state != PBLK_LINESTATE_GC);
+       line->state = PBLK_LINESTATE_FREE;
+       line->gc_group = PBLK_LINEGC_NONE;
+       pblk_line_free(pblk, line);
+       spin_unlock(&line->lock);
+
+       spin_lock(&l_mg->free_lock);
+       list_add_tail(&line->list, &l_mg->free_list);
+       l_mg->nr_free_lines++;
+       spin_unlock(&l_mg->free_lock);
+
+       pblk_rl_free_lines_inc(&pblk->rl, line);
+}
+
+int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
+{
+       struct nvm_rq *rqd;
+       int err;
+
+       rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
+       memset(rqd, 0, pblk_r_rq_size);
+
+       pblk_setup_e_rq(pblk, rqd, ppa);
+
+       rqd->end_io = pblk_end_io_erase;
+       rqd->private = pblk;
+
+       /* The write thread schedules erases so that it minimizes disturbances
+        * with writes. Thus, there is no need to take the LUN semaphore.
+        */
+       err = pblk_submit_io(pblk, rqd);
+       if (err) {
+               struct nvm_tgt_dev *dev = pblk->dev;
+               struct nvm_geo *geo = &dev->geo;
+
+               pr_err("pblk: could not async erase line:%d,blk:%d\n",
+                                       pblk_dev_ppa_to_line(ppa),
+                                       pblk_dev_ppa_to_pos(geo, ppa));
+       }
+
+       return err;
+}
+
+struct pblk_line *pblk_line_get_data(struct pblk *pblk)
+{
+       return pblk->l_mg.data_line;
+}
+
+struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
+{
+       return pblk->l_mg.data_next;
+}
+
+int pblk_line_is_full(struct pblk_line *line)
+{
+       return (line->left_msecs == 0);
+}
+
+void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct list_head *move_list;
+
+       line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
+
+       if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
+               pr_err("pblk: line %d close I/O failed\n", line->id);
+
+       WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
+                               "pblk: corrupt closed line %d\n", line->id);
+
+       spin_lock(&l_mg->free_lock);
+       WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
+       spin_unlock(&l_mg->free_lock);
+
+       spin_lock(&l_mg->gc_lock);
+       spin_lock(&line->lock);
+       WARN_ON(line->state != PBLK_LINESTATE_OPEN);
+       line->state = PBLK_LINESTATE_CLOSED;
+       move_list = pblk_line_gc_list(pblk, line);
+
+       list_add_tail(&line->list, move_list);
+
+       mempool_free(line->map_bitmap, pblk->line_meta_pool);
+       line->map_bitmap = NULL;
+       line->smeta = NULL;
+       line->emeta = NULL;
+
+       spin_unlock(&line->lock);
+       spin_unlock(&l_mg->gc_lock);
+}
+
+void pblk_line_close_ws(struct work_struct *work)
+{
+       struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
+                                                                       ws);
+       struct pblk *pblk = line_ws->pblk;
+       struct pblk_line *line = line_ws->line;
+
+       pblk_line_close(pblk, line);
+       mempool_free(line_ws, pblk->line_ws_pool);
+}
+
+void pblk_line_mark_bb(struct work_struct *work)
+{
+       struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
+                                                                       ws);
+       struct pblk *pblk = line_ws->pblk;
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct ppa_addr *ppa = line_ws->priv;
+       int ret;
+
+       ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
+       if (ret) {
+               struct pblk_line *line;
+               int pos;
+
+               line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)];
+               pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa);
+
+               pr_err("pblk: failed to mark bb, line:%d, pos:%d\n",
+                               line->id, pos);
+       }
+
+       kfree(ppa);
+       mempool_free(line_ws, pblk->line_ws_pool);
+}
+
+void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
+                     void (*work)(struct work_struct *))
+{
+       struct pblk_line_ws *line_ws;
+
+       line_ws = mempool_alloc(pblk->line_ws_pool, GFP_ATOMIC);
+       if (!line_ws)
+               return;
+
+       line_ws->pblk = pblk;
+       line_ws->line = line;
+       line_ws->priv = priv;
+
+       INIT_WORK(&line_ws->ws, work);
+       queue_work(pblk->kw_wq, &line_ws->ws);
+}
+
+void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+                 unsigned long *lun_bitmap)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_lun *rlun;
+       int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
+       int ret;
+
+       /*
+        * Only send one inflight I/O per LUN. Since we map at a page
+        * granurality, all ppas in the I/O will map to the same LUN
+        */
+#ifdef CONFIG_NVM_DEBUG
+       int i;
+
+       for (i = 1; i < nr_ppas; i++)
+               WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
+                               ppa_list[0].g.ch != ppa_list[i].g.ch);
+#endif
+       /* If the LUN has been locked for this same request, do no attempt to
+        * lock it again
+        */
+       if (test_and_set_bit(lun_id, lun_bitmap))
+               return;
+
+       rlun = &pblk->luns[lun_id];
+       ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+       if (ret) {
+               switch (ret) {
+               case -ETIME:
+                       pr_err("pblk: lun semaphore timed out\n");
+                       break;
+               case -EINTR:
+                       pr_err("pblk: lun semaphore timed out\n");
+                       break;
+               }
+       }
+}
+
+void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+               unsigned long *lun_bitmap)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_lun *rlun;
+       int nr_luns = geo->nr_luns;
+       int bit = -1;
+
+       while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) {
+               rlun = &pblk->luns[bit];
+               up(&rlun->wr_sem);
+       }
+
+       kfree(lun_bitmap);
+}
+
+void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
+{
+       struct ppa_addr l2p_ppa;
+
+       /* logic error: lba out-of-bounds. Ignore update */
+       if (!(lba < pblk->rl.nr_secs)) {
+               WARN(1, "pblk: corrupted L2P map request\n");
+               return;
+       }
+
+       spin_lock(&pblk->trans_lock);
+       l2p_ppa = pblk_trans_map_get(pblk, lba);
+
+       if (!pblk_addr_in_cache(l2p_ppa) && !pblk_ppa_empty(l2p_ppa))
+               pblk_map_invalidate(pblk, l2p_ppa);
+
+       pblk_trans_map_set(pblk, lba, ppa);
+       spin_unlock(&pblk->trans_lock);
+}
+
+void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
+{
+#ifdef CONFIG_NVM_DEBUG
+       /* Callers must ensure that the ppa points to a cache address */
+       BUG_ON(!pblk_addr_in_cache(ppa));
+       BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
+#endif
+
+       pblk_update_map(pblk, lba, ppa);
+}
+
+int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
+                      struct pblk_line *gc_line)
+{
+       struct ppa_addr l2p_ppa;
+       int ret = 1;
+
+#ifdef CONFIG_NVM_DEBUG
+       /* Callers must ensure that the ppa points to a cache address */
+       BUG_ON(!pblk_addr_in_cache(ppa));
+       BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
+#endif
+
+       /* logic error: lba out-of-bounds. Ignore update */
+       if (!(lba < pblk->rl.nr_secs)) {
+               WARN(1, "pblk: corrupted L2P map request\n");
+               return 0;
+       }
+
+       spin_lock(&pblk->trans_lock);
+       l2p_ppa = pblk_trans_map_get(pblk, lba);
+
+       /* Prevent updated entries to be overwritten by GC */
+       if (pblk_addr_in_cache(l2p_ppa) || pblk_ppa_empty(l2p_ppa) ||
+                               pblk_tgt_ppa_to_line(l2p_ppa) != gc_line->id) {
+               ret = 0;
+               goto out;
+       }
+
+       pblk_trans_map_set(pblk, lba, ppa);
+out:
+       spin_unlock(&pblk->trans_lock);
+       return ret;
+}
+
+void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
+                        struct ppa_addr entry_line)
+{
+       struct ppa_addr l2p_line;
+
+#ifdef CONFIG_NVM_DEBUG
+       /* Callers must ensure that the ppa points to a device address */
+       BUG_ON(pblk_addr_in_cache(ppa));
+#endif
+       /* Invalidate and discard padded entries */
+       if (lba == ADDR_EMPTY) {
+#ifdef CONFIG_NVM_DEBUG
+               atomic_long_inc(&pblk->padded_wb);
+#endif
+               pblk_map_invalidate(pblk, ppa);
+               return;
+       }
+
+       /* logic error: lba out-of-bounds. Ignore update */
+       if (!(lba < pblk->rl.nr_secs)) {
+               WARN(1, "pblk: corrupted L2P map request\n");
+               return;
+       }
+
+       spin_lock(&pblk->trans_lock);
+       l2p_line = pblk_trans_map_get(pblk, lba);
+
+       /* Do not update L2P if the cacheline has been updated. In this case,
+        * the mapped ppa must be invalidated
+        */
+       if (l2p_line.ppa != entry_line.ppa) {
+               if (!pblk_ppa_empty(ppa))
+                       pblk_map_invalidate(pblk, ppa);
+               goto out;
+       }
+
+#ifdef CONFIG_NVM_DEBUG
+       WARN_ON(!pblk_addr_in_cache(l2p_line) && !pblk_ppa_empty(l2p_line));
+#endif
+
+       pblk_trans_map_set(pblk, lba, ppa);
+out:
+       spin_unlock(&pblk->trans_lock);
+}
+
+void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
+                        sector_t blba, int nr_secs)
+{
+       int i;
+
+       spin_lock(&pblk->trans_lock);
+       for (i = 0; i < nr_secs; i++)
+               ppas[i] = pblk_trans_map_get(pblk, blba + i);
+       spin_unlock(&pblk->trans_lock);
+}
+
+void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
+                         u64 *lba_list, int nr_secs)
+{
+       sector_t lba;
+       int i;
+
+       spin_lock(&pblk->trans_lock);
+       for (i = 0; i < nr_secs; i++) {
+               lba = lba_list[i];
+               if (lba == ADDR_EMPTY) {
+                       ppas[i].ppa = ADDR_EMPTY;
+               } else {
+                       /* logic error: lba out-of-bounds. Ignore update */
+                       if (!(lba < pblk->rl.nr_secs)) {
+                               WARN(1, "pblk: corrupted L2P map request\n");
+                               continue;
+                       }
+                       ppas[i] = pblk_trans_map_get(pblk, lba);
+               }
+       }
+       spin_unlock(&pblk->trans_lock);
+}
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c

new file mode 100644 (file)

index 0000000..eaf479c
--- /dev/null
+++ b/drivers/lightnvm/pblk-gc.c
@@ -0,0 +1,555 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-gc.c - pblk's garbage collector
+ */
+
+#include "pblk.h"
+#include <linux/delay.h>
+
+static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
+{
+       kfree(gc_rq->data);
+       kfree(gc_rq->lba_list);
+       kfree(gc_rq);
+}
+
+static int pblk_gc_write(struct pblk *pblk)
+{
+       struct pblk_gc *gc = &pblk->gc;
+       struct pblk_gc_rq *gc_rq, *tgc_rq;
+       LIST_HEAD(w_list);
+
+       spin_lock(&gc->w_lock);
+       if (list_empty(&gc->w_list)) {
+               spin_unlock(&gc->w_lock);
+               return 1;
+       }
+
+       list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
+               list_move_tail(&gc_rq->list, &w_list);
+               gc->w_entries--;
+       }
+       spin_unlock(&gc->w_lock);
+
+       list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
+               pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list,
+                               gc_rq->nr_secs, gc_rq->secs_to_gc,
+                               gc_rq->line, PBLK_IOTYPE_GC);
+
+               kref_put(&gc_rq->line->ref, pblk_line_put);
+
+               list_del(&gc_rq->list);
+               pblk_gc_free_gc_rq(gc_rq);
+       }
+
+       return 0;
+}
+
+static void pblk_gc_writer_kick(struct pblk_gc *gc)
+{
+       wake_up_process(gc->gc_writer_ts);
+}
+
+/*
+ * Responsible for managing all memory related to a gc request. Also in case of
+ * failure
+ */
+static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
+                                  u64 *lba_list, unsigned int nr_secs)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_gc *gc = &pblk->gc;
+       struct pblk_gc_rq *gc_rq;
+       void *data;
+       unsigned int secs_to_gc;
+       int ret = NVM_IO_OK;
+
+       data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
+       if (!data) {
+               ret = NVM_IO_ERR;
+               goto free_lba_list;
+       }
+
+       /* Read from GC victim block */
+       if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
+                                                       &secs_to_gc, line)) {
+               ret = NVM_IO_ERR;
+               goto free_data;
+       }
+
+       if (!secs_to_gc)
+               goto free_data;
+
+       gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
+       if (!gc_rq) {
+               ret = NVM_IO_ERR;
+               goto free_data;
+       }
+
+       gc_rq->line = line;
+       gc_rq->data = data;
+       gc_rq->lba_list = lba_list;
+       gc_rq->nr_secs = nr_secs;
+       gc_rq->secs_to_gc = secs_to_gc;
+
+       kref_get(&line->ref);
+
+retry:
+       spin_lock(&gc->w_lock);
+       if (gc->w_entries > 256) {
+               spin_unlock(&gc->w_lock);
+               usleep_range(256, 1024);
+               goto retry;
+       }
+       gc->w_entries++;
+       list_add_tail(&gc_rq->list, &gc->w_list);
+       spin_unlock(&gc->w_lock);
+
+       pblk_gc_writer_kick(&pblk->gc);
+
+       return NVM_IO_OK;
+
+free_data:
+       kfree(data);
+free_lba_list:
+       kfree(lba_list);
+
+       return ret;
+}
+
+static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct list_head *move_list;
+
+       spin_lock(&line->lock);
+       WARN_ON(line->state != PBLK_LINESTATE_GC);
+       line->state = PBLK_LINESTATE_CLOSED;
+       move_list = pblk_line_gc_list(pblk, line);
+       spin_unlock(&line->lock);
+
+       if (move_list) {
+               spin_lock(&l_mg->gc_lock);
+               list_add_tail(&line->list, move_list);
+               spin_unlock(&l_mg->gc_lock);
+       }
+}
+
+static void pblk_gc_line_ws(struct work_struct *work)
+{
+       struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
+                                                                       ws);
+       struct pblk *pblk = line_ws->pblk;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line *line = line_ws->line;
+       struct pblk_line_meta *lm = &pblk->lm;
+       __le64 *lba_list = line_ws->priv;
+       u64 *gc_list;
+       int sec_left;
+       int nr_ppas, bit;
+       int put_line = 1;
+
+       pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
+
+       spin_lock(&line->lock);
+       sec_left = line->vsc;
+       if (!sec_left) {
+               /* Lines are erased before being used (l_mg->data_/log_next) */
+               spin_unlock(&line->lock);
+               goto out;
+       }
+       spin_unlock(&line->lock);
+
+       if (sec_left < 0) {
+               pr_err("pblk: corrupted GC line (%d)\n", line->id);
+               put_line = 0;
+               pblk_put_line_back(pblk, line);
+               goto out;
+       }
+
+       bit = -1;
+next_rq:
+       gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
+       if (!gc_list) {
+               put_line = 0;
+               pblk_put_line_back(pblk, line);
+               goto out;
+       }
+
+       nr_ppas = 0;
+       do {
+               bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
+                                                               bit + 1);
+               if (bit > line->emeta_ssec)
+                       break;
+
+               gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
+       } while (nr_ppas < pblk->max_write_pgs);
+
+       if (unlikely(!nr_ppas)) {
+               kfree(gc_list);
+               goto out;
+       }
+
+       if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
+               pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
+                                               line->id, line->vsc,
+                                               nr_ppas, nr_ppas);
+               put_line = 0;
+               pblk_put_line_back(pblk, line);
+               goto out;
+       }
+
+       sec_left -= nr_ppas;
+       if (sec_left > 0)
+               goto next_rq;
+
+out:
+       pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
+       mempool_free(line_ws, pblk->line_ws_pool);
+       atomic_dec(&pblk->gc.inflight_gc);
+       if (put_line)
+               kref_put(&line->ref, pblk_line_put);
+}
+
+static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line_ws *line_ws;
+       __le64 *lba_list;
+       int ret;
+
+       line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+       line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
+                                                               GFP_KERNEL);
+       if (!line->emeta) {
+               pr_err("pblk: cannot use GC emeta\n");
+               goto fail_free_ws;
+       }
+
+       ret = pblk_line_read_emeta(pblk, line);
+       if (ret) {
+               pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
+               goto fail_free_emeta;
+       }
+
+       /* If this read fails, it means that emeta is corrupted. For now, leave
+        * the line untouched. TODO: Implement a recovery routine that scans and
+        * moves all sectors on the line.
+        */
+       lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
+       if (!lba_list) {
+               pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
+               goto fail_free_emeta;
+       }
+
+       line_ws->pblk = pblk;
+       line_ws->line = line;
+       line_ws->priv = lba_list;
+
+       INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
+       queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
+
+       return 0;
+
+fail_free_emeta:
+       pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
+fail_free_ws:
+       mempool_free(line_ws, pblk->line_ws_pool);
+       pblk_put_line_back(pblk, line);
+
+       return 1;
+}
+
+static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
+{
+       struct pblk_line *line, *tline;
+
+       list_for_each_entry_safe(line, tline, gc_list, list) {
+               if (pblk_gc_line(pblk, line))
+                       pr_err("pblk: failed to GC line %d\n", line->id);
+               list_del(&line->list);
+       }
+}
+
+/*
+ * Lines with no valid sectors will be returned to the free list immediately. If
+ * GC is activated - either because the free block count is under the determined
+ * threshold, or because it is being forced from user space - only lines with a
+ * high count of invalid sectors will be recycled.
+ */
+static void pblk_gc_run(struct pblk *pblk)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_gc *gc = &pblk->gc;
+       struct pblk_line *line, *tline;
+       unsigned int nr_blocks_free, nr_blocks_need;
+       struct list_head *group_list;
+       int run_gc, gc_group = 0;
+       int prev_gc = 0;
+       int inflight_gc = atomic_read(&gc->inflight_gc);
+       LIST_HEAD(gc_list);
+
+       spin_lock(&l_mg->gc_lock);
+       list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
+               spin_lock(&line->lock);
+               WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
+               line->state = PBLK_LINESTATE_GC;
+               spin_unlock(&line->lock);
+
+               list_del(&line->list);
+               kref_put(&line->ref, pblk_line_put);
+       }
+       spin_unlock(&l_mg->gc_lock);
+
+       nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
+       nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
+       run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+
+next_gc_group:
+       group_list = l_mg->gc_lists[gc_group++];
+       spin_lock(&l_mg->gc_lock);
+       while (run_gc && !list_empty(group_list)) {
+               /* No need to queue up more GC lines than we can handle */
+               if (!run_gc || inflight_gc > gc->gc_jobs_active) {
+                       spin_unlock(&l_mg->gc_lock);
+                       pblk_gc_lines(pblk, &gc_list);
+                       return;
+               }
+
+               line = list_first_entry(group_list, struct pblk_line, list);
+               nr_blocks_free += atomic_read(&line->blk_in_line);
+
+               spin_lock(&line->lock);
+               WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
+               line->state = PBLK_LINESTATE_GC;
+               list_move_tail(&line->list, &gc_list);
+               atomic_inc(&gc->inflight_gc);
+               inflight_gc++;
+               spin_unlock(&line->lock);
+
+               prev_gc = 1;
+               run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+       }
+       spin_unlock(&l_mg->gc_lock);
+
+       pblk_gc_lines(pblk, &gc_list);
+
+       if (!prev_gc && pblk->rl.rb_state > gc_group &&
+                                               gc_group < PBLK_NR_GC_LISTS)
+               goto next_gc_group;
+}
+
+
+static void pblk_gc_kick(struct pblk *pblk)
+{
+       struct pblk_gc *gc = &pblk->gc;
+
+       wake_up_process(gc->gc_ts);
+       pblk_gc_writer_kick(gc);
+       mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
+}
+
+static void pblk_gc_timer(unsigned long data)
+{
+       struct pblk *pblk = (struct pblk *)data;
+
+       pblk_gc_kick(pblk);
+}
+
+static int pblk_gc_ts(void *data)
+{
+       struct pblk *pblk = data;
+
+       while (!kthread_should_stop()) {
+               pblk_gc_run(pblk);
+               set_current_state(TASK_INTERRUPTIBLE);
+               io_schedule();
+       }
+
+       return 0;
+}
+
+static int pblk_gc_writer_ts(void *data)
+{
+       struct pblk *pblk = data;
+
+       while (!kthread_should_stop()) {
+               if (!pblk_gc_write(pblk))
+                       continue;
+               set_current_state(TASK_INTERRUPTIBLE);
+               io_schedule();
+       }
+
+       return 0;
+}
+
+static void pblk_gc_start(struct pblk *pblk)
+{
+       pblk->gc.gc_active = 1;
+
+       pr_debug("pblk: gc start\n");
+}
+
+int pblk_gc_status(struct pblk *pblk)
+{
+       struct pblk_gc *gc = &pblk->gc;
+       int ret;
+
+       spin_lock(&gc->lock);
+       ret = gc->gc_active;
+       spin_unlock(&gc->lock);
+
+       return ret;
+}
+
+static void __pblk_gc_should_start(struct pblk *pblk)
+{
+       struct pblk_gc *gc = &pblk->gc;
+
+       lockdep_assert_held(&gc->lock);
+
+       if (gc->gc_enabled && !gc->gc_active)
+               pblk_gc_start(pblk);
+}
+
+void pblk_gc_should_start(struct pblk *pblk)
+{
+       struct pblk_gc *gc = &pblk->gc;
+
+       spin_lock(&gc->lock);
+       __pblk_gc_should_start(pblk);
+       spin_unlock(&gc->lock);
+}
+
+/*
+ * If flush_wq == 1 then no lock should be held by the caller since
+ * flush_workqueue can sleep
+ */
+static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
+{
+       spin_lock(&pblk->gc.lock);
+       pblk->gc.gc_active = 0;
+       spin_unlock(&pblk->gc.lock);
+
+       pr_debug("pblk: gc stop\n");
+}
+
+void pblk_gc_should_stop(struct pblk *pblk)
+{
+       struct pblk_gc *gc = &pblk->gc;
+
+       if (gc->gc_active && !gc->gc_forced)
+               pblk_gc_stop(pblk, 0);
+}
+
+void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
+                             int *gc_active)
+{
+       struct pblk_gc *gc = &pblk->gc;
+
+       spin_lock(&gc->lock);
+       *gc_enabled = gc->gc_enabled;
+       *gc_active = gc->gc_active;
+       spin_unlock(&gc->lock);
+}
+
+void pblk_gc_sysfs_force(struct pblk *pblk, int force)
+{
+       struct pblk_gc *gc = &pblk->gc;
+       int rsv = 0;
+
+       spin_lock(&gc->lock);
+       if (force) {
+               gc->gc_enabled = 1;
+               rsv = 64;
+       }
+       pblk_rl_set_gc_rsc(&pblk->rl, rsv);
+       gc->gc_forced = force;
+       __pblk_gc_should_start(pblk);
+       spin_unlock(&gc->lock);
+}
+
+int pblk_gc_init(struct pblk *pblk)
+{
+       struct pblk_gc *gc = &pblk->gc;
+       int ret;
+
+       gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
+       if (IS_ERR(gc->gc_ts)) {
+               pr_err("pblk: could not allocate GC main kthread\n");
+               return PTR_ERR(gc->gc_ts);
+       }
+
+       gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
+                                                       "pblk-gc-writer-ts");
+       if (IS_ERR(gc->gc_writer_ts)) {
+               pr_err("pblk: could not allocate GC writer kthread\n");
+               ret = PTR_ERR(gc->gc_writer_ts);
+               goto fail_free_main_kthread;
+       }
+
+       setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
+       mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
+
+       gc->gc_active = 0;
+       gc->gc_forced = 0;
+       gc->gc_enabled = 1;
+       gc->gc_jobs_active = 8;
+       gc->w_entries = 0;
+       atomic_set(&gc->inflight_gc, 0);
+
+       gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
+                       WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
+       if (!gc->gc_reader_wq) {
+               pr_err("pblk: could not allocate GC reader workqueue\n");
+               ret = -ENOMEM;
+               goto fail_free_writer_kthread;
+       }
+
+       spin_lock_init(&gc->lock);
+       spin_lock_init(&gc->w_lock);
+       INIT_LIST_HEAD(&gc->w_list);
+
+       return 0;
+
+fail_free_writer_kthread:
+       kthread_stop(gc->gc_writer_ts);
+fail_free_main_kthread:
+       kthread_stop(gc->gc_ts);
+
+       return ret;
+}
+
+void pblk_gc_exit(struct pblk *pblk)
+{
+       struct pblk_gc *gc = &pblk->gc;
+
+       flush_workqueue(gc->gc_reader_wq);
+
+       del_timer(&gc->gc_timer);
+       pblk_gc_stop(pblk, 1);
+
+       if (gc->gc_ts)
+               kthread_stop(gc->gc_ts);
+
+       if (pblk->gc.gc_reader_wq)
+               destroy_workqueue(pblk->gc.gc_reader_wq);
+
+       if (gc->gc_writer_ts)
+               kthread_stop(gc->gc_writer_ts);
+}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c

new file mode 100644 (file)

index 0000000..ae8cd6d
--- /dev/null
+++ b/drivers/lightnvm/pblk-init.c
@@ -0,0 +1,962 @@
+/*
+ * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Implementation of a physical block-device target for Open-channel SSDs.
+ *
+ * pblk-init.c - pblk's initialization.
+ */
+
+#include "pblk.h"
+
+static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
+                                       *pblk_w_rq_cache, *pblk_line_meta_cache;
+static DECLARE_RWSEM(pblk_lock);
+
+static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
+                         struct bio *bio)
+{
+       int ret;
+
+       /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
+        * constraint. Writes can be of arbitrary size.
+        */
+       if (bio_data_dir(bio) == READ) {
+               blk_queue_split(q, &bio, q->bio_split);
+               ret = pblk_submit_read(pblk, bio);
+               if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
+                       bio_put(bio);
+
+               return ret;
+       }
+
+       /* Prevent deadlock in the case of a modest LUN configuration and large
+        * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
+        * available for user I/O.
+        */
+       if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
+               blk_queue_split(q, &bio, q->bio_split);
+
+       return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
+}
+
+static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
+{
+       struct pblk *pblk = q->queuedata;
+
+       if (bio_op(bio) == REQ_OP_DISCARD) {
+               pblk_discard(pblk, bio);
+               if (!(bio->bi_opf & REQ_PREFLUSH)) {
+                       bio_endio(bio);
+                       return BLK_QC_T_NONE;
+               }
+       }
+
+       switch (pblk_rw_io(q, pblk, bio)) {
+       case NVM_IO_ERR:
+               bio_io_error(bio);
+               break;
+       case NVM_IO_DONE:
+               bio_endio(bio);
+               break;
+       }
+
+       return BLK_QC_T_NONE;
+}
+
+static void pblk_l2p_free(struct pblk *pblk)
+{
+       vfree(pblk->trans_map);
+}
+
+static int pblk_l2p_init(struct pblk *pblk)
+{
+       sector_t i;
+       struct ppa_addr ppa;
+       int entry_size = 8;
+
+       if (pblk->ppaf_bitsize < 32)
+               entry_size = 4;
+
+       pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs);
+       if (!pblk->trans_map)
+               return -ENOMEM;
+
+       pblk_ppa_set_empty(&ppa);
+
+       for (i = 0; i < pblk->rl.nr_secs; i++)
+               pblk_trans_map_set(pblk, i, ppa);
+
+       return 0;
+}
+
+static void pblk_rwb_free(struct pblk *pblk)
+{
+       if (pblk_rb_tear_down_check(&pblk->rwb))
+               pr_err("pblk: write buffer error on tear down\n");
+
+       pblk_rb_data_free(&pblk->rwb);
+       vfree(pblk_rb_entries_ref(&pblk->rwb));
+}
+
+static int pblk_rwb_init(struct pblk *pblk)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_rb_entry *entries;
+       unsigned long nr_entries;
+       unsigned int power_size, power_seg_sz;
+
+       nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer);
+
+       entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry));
+       if (!entries)
+               return -ENOMEM;
+
+       power_size = get_count_order(nr_entries);
+       power_seg_sz = get_count_order(geo->sec_size);
+
+       return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz);
+}
+
+/* Minimum pages needed within a lun */
+#define PAGE_POOL_SIZE 16
+#define ADDR_POOL_SIZE 64
+
+static int pblk_set_ppaf(struct pblk *pblk)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct nvm_addr_format ppaf = geo->ppaf;
+       int power_len;
+
+       /* Re-calculate channel and lun format to adapt to configuration */
+       power_len = get_count_order(geo->nr_chnls);
+       if (1 << power_len != geo->nr_chnls) {
+               pr_err("pblk: supports only power-of-two channel config.\n");
+               return -EINVAL;
+       }
+       ppaf.ch_len = power_len;
+
+       power_len = get_count_order(geo->luns_per_chnl);
+       if (1 << power_len != geo->luns_per_chnl) {
+               pr_err("pblk: supports only power-of-two LUN config.\n");
+               return -EINVAL;
+       }
+       ppaf.lun_len = power_len;
+
+       pblk->ppaf.sec_offset = 0;
+       pblk->ppaf.pln_offset = ppaf.sect_len;
+       pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len;
+       pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len;
+       pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len;
+       pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len;
+       pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1;
+       pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) <<
+                                                       pblk->ppaf.pln_offset;
+       pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) <<
+                                                       pblk->ppaf.ch_offset;
+       pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) <<
+                                                       pblk->ppaf.lun_offset;
+       pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) <<
+                                                       pblk->ppaf.pg_offset;
+       pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) <<
+                                                       pblk->ppaf.blk_offset;
+
+       pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len;
+
+       return 0;
+}
+
+static int pblk_init_global_caches(struct pblk *pblk)
+{
+       char cache_name[PBLK_CACHE_NAME_LEN];
+
+       down_write(&pblk_lock);
+       pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws",
+                               sizeof(struct pblk_line_ws), 0, 0, NULL);
+       if (!pblk_blk_ws_cache) {
+               up_write(&pblk_lock);
+               return -ENOMEM;
+       }
+
+       pblk_rec_cache = kmem_cache_create("pblk_rec",
+                               sizeof(struct pblk_rec_ctx), 0, 0, NULL);
+       if (!pblk_rec_cache) {
+               kmem_cache_destroy(pblk_blk_ws_cache);
+               up_write(&pblk_lock);
+               return -ENOMEM;
+       }
+
+       pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
+                               0, 0, NULL);
+       if (!pblk_r_rq_cache) {
+               kmem_cache_destroy(pblk_blk_ws_cache);
+               kmem_cache_destroy(pblk_rec_cache);
+               up_write(&pblk_lock);
+               return -ENOMEM;
+       }
+
+       pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size,
+                               0, 0, NULL);
+       if (!pblk_w_rq_cache) {
+               kmem_cache_destroy(pblk_blk_ws_cache);
+               kmem_cache_destroy(pblk_rec_cache);
+               kmem_cache_destroy(pblk_r_rq_cache);
+               up_write(&pblk_lock);
+               return -ENOMEM;
+       }
+
+       snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s",
+                                                       pblk->disk->disk_name);
+       pblk_line_meta_cache = kmem_cache_create(cache_name,
+                               pblk->lm.sec_bitmap_len, 0, 0, NULL);
+       if (!pblk_line_meta_cache) {
+               kmem_cache_destroy(pblk_blk_ws_cache);
+               kmem_cache_destroy(pblk_rec_cache);
+               kmem_cache_destroy(pblk_r_rq_cache);
+               kmem_cache_destroy(pblk_w_rq_cache);
+               up_write(&pblk_lock);
+               return -ENOMEM;
+       }
+       up_write(&pblk_lock);
+
+       return 0;
+}
+
+static int pblk_core_init(struct pblk *pblk)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       int max_write_ppas;
+       int mod;
+
+       pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
+       max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
+       pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
+                               max_write_ppas : nvm_max_phys_sects(dev);
+       pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
+                                               geo->nr_planes * geo->nr_luns;
+
+       if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
+               pr_err("pblk: cannot support device max_phys_sect\n");
+               return -EINVAL;
+       }
+
+       div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
+       if (mod) {
+               pr_err("pblk: bad configuration of sectors/pages\n");
+               return -EINVAL;
+       }
+
+       if (pblk_init_global_caches(pblk))
+               return -ENOMEM;
+
+       pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0);
+       if (!pblk->page_pool)
+               return -ENOMEM;
+
+       pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
+                                                       pblk_blk_ws_cache);
+       if (!pblk->line_ws_pool)
+               goto free_page_pool;
+
+       pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache);
+       if (!pblk->rec_pool)
+               goto free_blk_ws_pool;
+
+       pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
+       if (!pblk->r_rq_pool)
+               goto free_rec_pool;
+
+       pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
+       if (!pblk->w_rq_pool)
+               goto free_r_rq_pool;
+
+       pblk->line_meta_pool =
+                       mempool_create_slab_pool(16, pblk_line_meta_cache);
+       if (!pblk->line_meta_pool)
+               goto free_w_rq_pool;
+
+       pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
+                                       WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
+       if (!pblk->kw_wq)
+               goto free_line_meta_pool;
+
+       if (pblk_set_ppaf(pblk))
+               goto free_kw_wq;
+
+       if (pblk_rwb_init(pblk))
+               goto free_kw_wq;
+
+       INIT_LIST_HEAD(&pblk->compl_list);
+       return 0;
+
+free_kw_wq:
+       destroy_workqueue(pblk->kw_wq);
+free_line_meta_pool:
+       mempool_destroy(pblk->line_meta_pool);
+free_w_rq_pool:
+       mempool_destroy(pblk->w_rq_pool);
+free_r_rq_pool:
+       mempool_destroy(pblk->r_rq_pool);
+free_rec_pool:
+       mempool_destroy(pblk->rec_pool);
+free_blk_ws_pool:
+       mempool_destroy(pblk->line_ws_pool);
+free_page_pool:
+       mempool_destroy(pblk->page_pool);
+       return -ENOMEM;
+}
+
+static void pblk_core_free(struct pblk *pblk)
+{
+       if (pblk->kw_wq)
+               destroy_workqueue(pblk->kw_wq);
+
+       mempool_destroy(pblk->page_pool);
+       mempool_destroy(pblk->line_ws_pool);
+       mempool_destroy(pblk->rec_pool);
+       mempool_destroy(pblk->r_rq_pool);
+       mempool_destroy(pblk->w_rq_pool);
+       mempool_destroy(pblk->line_meta_pool);
+
+       kmem_cache_destroy(pblk_blk_ws_cache);
+       kmem_cache_destroy(pblk_rec_cache);
+       kmem_cache_destroy(pblk_r_rq_cache);
+       kmem_cache_destroy(pblk_w_rq_cache);
+       kmem_cache_destroy(pblk_line_meta_cache);
+}
+
+static void pblk_luns_free(struct pblk *pblk)
+{
+       kfree(pblk->luns);
+}
+
+static void pblk_lines_free(struct pblk *pblk)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line *line;
+       int i;
+
+       spin_lock(&l_mg->free_lock);
+       for (i = 0; i < l_mg->nr_lines; i++) {
+               line = &pblk->lines[i];
+
+               pblk_line_free(pblk, line);
+               kfree(line->blk_bitmap);
+               kfree(line->erase_bitmap);
+       }
+       spin_unlock(&l_mg->free_lock);
+}
+
+static void pblk_line_meta_free(struct pblk *pblk)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       int i;
+
+       kfree(l_mg->bb_template);
+       kfree(l_mg->bb_aux);
+
+       for (i = 0; i < PBLK_DATA_LINES; i++) {
+               pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
+               pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+       }
+
+       kfree(pblk->lines);
+}
+
+static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun)
+{
+       struct nvm_geo *geo = &dev->geo;
+       struct ppa_addr ppa;
+       u8 *blks;
+       int nr_blks, ret;
+
+       nr_blks = geo->blks_per_lun * geo->plane_mode;
+       blks = kmalloc(nr_blks, GFP_KERNEL);
+       if (!blks)
+               return -ENOMEM;
+
+       ppa.ppa = 0;
+       ppa.g.ch = rlun->bppa.g.ch;
+       ppa.g.lun = rlun->bppa.g.lun;
+
+       ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
+       if (ret)
+               goto out;
+
+       nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
+       if (nr_blks < 0) {
+               ret = nr_blks;
+               goto out;
+       }
+
+       rlun->bb_list = blks;
+
+       return 0;
+out:
+       kfree(blks);
+       return ret;
+}
+
+static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_lun *rlun;
+       int bb_cnt = 0;
+       int i;
+
+       line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
+       if (!line->blk_bitmap)
+               return -ENOMEM;
+
+       line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
+       if (!line->erase_bitmap) {
+               kfree(line->blk_bitmap);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < lm->blk_per_line; i++) {
+               rlun = &pblk->luns[i];
+               if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
+                       continue;
+
+               set_bit(i, line->blk_bitmap);
+               bb_cnt++;
+       }
+
+       return bb_cnt;
+}
+
+static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_lun *rlun;
+       int i, ret;
+
+       /* TODO: Implement unbalanced LUN support */
+       if (geo->luns_per_chnl < 0) {
+               pr_err("pblk: unbalanced LUN config.\n");
+               return -EINVAL;
+       }
+
+       pblk->luns = kcalloc(geo->nr_luns, sizeof(struct pblk_lun), GFP_KERNEL);
+       if (!pblk->luns)
+               return -ENOMEM;
+
+       for (i = 0; i < geo->nr_luns; i++) {
+               /* Stripe across channels */
+               int ch = i % geo->nr_chnls;
+               int lun_raw = i / geo->nr_chnls;
+               int lunid = lun_raw + ch * geo->luns_per_chnl;
+
+               rlun = &pblk->luns[i];
+               rlun->bppa = luns[lunid];
+
+               sema_init(&rlun->wr_sem, 1);
+
+               ret = pblk_bb_discovery(dev, rlun);
+               if (ret) {
+                       while (--i >= 0)
+                               kfree(pblk->luns[i].bb_list);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static int pblk_lines_configure(struct pblk *pblk, int flags)
+{
+       struct pblk_line *line = NULL;
+       int ret = 0;
+
+       if (!(flags & NVM_TARGET_FACTORY)) {
+               line = pblk_recov_l2p(pblk);
+               if (IS_ERR(line)) {
+                       pr_err("pblk: could not recover l2p table\n");
+                       ret = -EFAULT;
+               }
+       }
+
+       if (!line) {
+               /* Configure next line for user data */
+               line = pblk_line_get_first_data(pblk);
+               if (!line) {
+                       pr_err("pblk: line list corrupted\n");
+                       ret = -EFAULT;
+               }
+       }
+
+       return ret;
+}
+
+/* See comment over struct line_emeta definition */
+static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
+{
+       return (sizeof(struct line_emeta) +
+                       ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
+                       (pblk->l_mg.nr_lines * sizeof(u32)) +
+                       lm->blk_bitmap_len);
+}
+
+static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       sector_t provisioned;
+
+       pblk->over_pct = 20;
+
+       provisioned = nr_free_blks;
+       provisioned *= (100 - pblk->over_pct);
+       sector_div(provisioned, 100);
+
+       /* Internally pblk manages all free blocks, but all calculations based
+        * on user capacity consider only provisioned blocks
+        */
+       pblk->rl.total_blocks = nr_free_blks;
+       pblk->rl.nr_secs = nr_free_blks * geo->sec_per_blk;
+       pblk->capacity = provisioned * geo->sec_per_blk;
+       atomic_set(&pblk->rl.free_blocks, nr_free_blks);
+}
+
+static int pblk_lines_init(struct pblk *pblk)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line *line;
+       unsigned int smeta_len, emeta_len;
+       long nr_bad_blks, nr_meta_blks, nr_free_blks;
+       int bb_distance;
+       int i;
+       int ret;
+
+       lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
+       lm->blk_per_line = geo->nr_luns;
+       lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
+       lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long);
+       lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
+       lm->high_thrs = lm->sec_per_line / 2;
+       lm->mid_thrs = lm->sec_per_line / 4;
+
+       /* Calculate necessary pages for smeta. See comment over struct
+        * line_smeta definition
+        */
+       lm->smeta_len = sizeof(struct line_smeta) +
+                               PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+
+       i = 1;
+add_smeta_page:
+       lm->smeta_sec = i * geo->sec_per_pl;
+       lm->smeta_len = lm->smeta_sec * geo->sec_size;
+
+       smeta_len = sizeof(struct line_smeta) +
+                               PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+       if (smeta_len > lm->smeta_len) {
+               i++;
+               goto add_smeta_page;
+       }
+
+       /* Calculate necessary pages for emeta. See comment over struct
+        * line_emeta definition
+        */
+       i = 1;
+add_emeta_page:
+       lm->emeta_sec = i * geo->sec_per_pl;
+       lm->emeta_len = lm->emeta_sec * geo->sec_size;
+
+       emeta_len = calc_emeta_len(pblk, lm);
+       if (emeta_len > lm->emeta_len) {
+               i++;
+               goto add_emeta_page;
+       }
+       lm->emeta_bb = geo->nr_luns - i;
+
+       nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
+                               (geo->sec_per_blk / 2)) / geo->sec_per_blk;
+       lm->min_blk_line = nr_meta_blks + 1;
+
+       l_mg->nr_lines = geo->blks_per_lun;
+       l_mg->log_line = l_mg->data_line = NULL;
+       l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
+       l_mg->nr_free_lines = 0;
+       bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+
+       /* smeta is always small enough to fit on a kmalloc memory allocation,
+        * emeta depends on the number of LUNs allocated to the pblk instance
+        */
+       l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
+       for (i = 0; i < PBLK_DATA_LINES; i++) {
+               l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
+               if (!l_mg->sline_meta[i].meta)
+                       while (--i >= 0) {
+                               kfree(l_mg->sline_meta[i].meta);
+                               ret = -ENOMEM;
+                               goto fail;
+                       }
+       }
+
+       if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
+               l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
+
+               for (i = 0; i < PBLK_DATA_LINES; i++) {
+                       l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
+                       if (!l_mg->eline_meta[i].meta)
+                               while (--i >= 0) {
+                                       vfree(l_mg->eline_meta[i].meta);
+                                       ret = -ENOMEM;
+                                       goto fail;
+                               }
+               }
+       } else {
+               l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
+
+               for (i = 0; i < PBLK_DATA_LINES; i++) {
+                       l_mg->eline_meta[i].meta =
+                                       kmalloc(lm->emeta_len, GFP_KERNEL);
+                       if (!l_mg->eline_meta[i].meta)
+                               while (--i >= 0) {
+                                       kfree(l_mg->eline_meta[i].meta);
+                                       ret = -ENOMEM;
+                                       goto fail;
+                               }
+               }
+       }
+
+       l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
+       if (!l_mg->bb_template) {
+               ret = -ENOMEM;
+               goto fail_free_meta;
+       }
+
+       l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
+       if (!l_mg->bb_aux) {
+               ret = -ENOMEM;
+               goto fail_free_bb_template;
+       }
+
+       bb_distance = (geo->nr_luns) * geo->sec_per_pl;
+       for (i = 0; i < lm->sec_per_line; i += bb_distance)
+               bitmap_set(l_mg->bb_template, i, geo->sec_per_pl);
+
+       INIT_LIST_HEAD(&l_mg->free_list);
+       INIT_LIST_HEAD(&l_mg->corrupt_list);
+       INIT_LIST_HEAD(&l_mg->bad_list);
+       INIT_LIST_HEAD(&l_mg->gc_full_list);
+       INIT_LIST_HEAD(&l_mg->gc_high_list);
+       INIT_LIST_HEAD(&l_mg->gc_mid_list);
+       INIT_LIST_HEAD(&l_mg->gc_low_list);
+       INIT_LIST_HEAD(&l_mg->gc_empty_list);
+
+       l_mg->gc_lists[0] = &l_mg->gc_high_list;
+       l_mg->gc_lists[1] = &l_mg->gc_mid_list;
+       l_mg->gc_lists[2] = &l_mg->gc_low_list;
+
+       spin_lock_init(&l_mg->free_lock);
+       spin_lock_init(&l_mg->gc_lock);
+
+       pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
+                                                               GFP_KERNEL);
+       if (!pblk->lines) {
+               ret = -ENOMEM;
+               goto fail_free_bb_aux;
+       }
+
+       nr_free_blks = 0;
+       for (i = 0; i < l_mg->nr_lines; i++) {
+               int blk_in_line;
+
+               line = &pblk->lines[i];
+
+               line->pblk = pblk;
+               line->id = i;
+               line->type = PBLK_LINETYPE_FREE;
+               line->state = PBLK_LINESTATE_FREE;
+               line->gc_group = PBLK_LINEGC_NONE;
+               spin_lock_init(&line->lock);
+
+               nr_bad_blks = pblk_bb_line(pblk, line);
+               if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
+                       ret = -EINVAL;
+                       goto fail_free_lines;
+               }
+
+               blk_in_line = lm->blk_per_line - nr_bad_blks;
+               if (blk_in_line < lm->min_blk_line) {
+                       line->state = PBLK_LINESTATE_BAD;
+                       list_add_tail(&line->list, &l_mg->bad_list);
+                       continue;
+               }
+
+               nr_free_blks += blk_in_line;
+               atomic_set(&line->blk_in_line, blk_in_line);
+
+               l_mg->nr_free_lines++;
+               list_add_tail(&line->list, &l_mg->free_list);
+       }
+
+       pblk_set_provision(pblk, nr_free_blks);
+
+       sema_init(&pblk->erase_sem, 1);
+
+       /* Cleanup per-LUN bad block lists - managed within lines on run-time */
+       for (i = 0; i < geo->nr_luns; i++)
+               kfree(pblk->luns[i].bb_list);
+
+       return 0;
+fail_free_lines:
+       kfree(pblk->lines);
+fail_free_bb_aux:
+       kfree(l_mg->bb_aux);
+fail_free_bb_template:
+       kfree(l_mg->bb_template);
+fail_free_meta:
+       for (i = 0; i < PBLK_DATA_LINES; i++) {
+               pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
+               pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+       }
+fail:
+       for (i = 0; i < geo->nr_luns; i++)
+               kfree(pblk->luns[i].bb_list);
+
+       return ret;
+}
+
+static int pblk_writer_init(struct pblk *pblk)
+{
+       setup_timer(&pblk->wtimer, pblk_write_timer_fn, (unsigned long)pblk);
+       mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100));
+
+       pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t");
+       if (IS_ERR(pblk->writer_ts)) {
+               pr_err("pblk: could not allocate writer kthread\n");
+               return PTR_ERR(pblk->writer_ts);
+       }
+
+       return 0;
+}
+
+static void pblk_writer_stop(struct pblk *pblk)
+{
+       if (pblk->writer_ts)
+               kthread_stop(pblk->writer_ts);
+       del_timer(&pblk->wtimer);
+}
+
+static void pblk_free(struct pblk *pblk)
+{
+       pblk_luns_free(pblk);
+       pblk_lines_free(pblk);
+       pblk_line_meta_free(pblk);
+       pblk_core_free(pblk);
+       pblk_l2p_free(pblk);
+
+       kfree(pblk);
+}
+
+static void pblk_tear_down(struct pblk *pblk)
+{
+       pblk_flush_writer(pblk);
+       pblk_writer_stop(pblk);
+       pblk_rb_sync_l2p(&pblk->rwb);
+       pblk_recov_pad(pblk);
+       pblk_rwb_free(pblk);
+       pblk_rl_free(&pblk->rl);
+
+       pr_debug("pblk: consistent tear down\n");
+}
+
+static void pblk_exit(void *private)
+{
+       struct pblk *pblk = private;
+
+       down_write(&pblk_lock);
+       pblk_gc_exit(pblk);
+       pblk_tear_down(pblk);
+       pblk_free(pblk);
+       up_write(&pblk_lock);
+}
+
+static sector_t pblk_capacity(void *private)
+{
+       struct pblk *pblk = private;
+
+       return pblk->capacity * NR_PHY_IN_LOG;
+}
+
+static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
+                      int flags)
+{
+       struct nvm_geo *geo = &dev->geo;
+       struct request_queue *bqueue = dev->q;
+       struct request_queue *tqueue = tdisk->queue;
+       struct pblk *pblk;
+       int ret;
+
+       if (dev->identity.dom & NVM_RSP_L2P) {
+               pr_err("pblk: device-side L2P table not supported. (%x)\n",
+                                                       dev->identity.dom);
+               return ERR_PTR(-EINVAL);
+       }
+
+       pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL);
+       if (!pblk)
+               return ERR_PTR(-ENOMEM);
+
+       pblk->dev = dev;
+       pblk->disk = tdisk;
+
+       spin_lock_init(&pblk->trans_lock);
+       spin_lock_init(&pblk->lock);
+
+       if (flags & NVM_TARGET_FACTORY)
+               pblk_setup_uuid(pblk);
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_set(&pblk->inflight_writes, 0);
+       atomic_long_set(&pblk->padded_writes, 0);
+       atomic_long_set(&pblk->padded_wb, 0);
+       atomic_long_set(&pblk->nr_flush, 0);
+       atomic_long_set(&pblk->req_writes, 0);
+       atomic_long_set(&pblk->sub_writes, 0);
+       atomic_long_set(&pblk->sync_writes, 0);
+       atomic_long_set(&pblk->compl_writes, 0);
+       atomic_long_set(&pblk->inflight_reads, 0);
+       atomic_long_set(&pblk->sync_reads, 0);
+       atomic_long_set(&pblk->recov_writes, 0);
+       atomic_long_set(&pblk->recov_writes, 0);
+       atomic_long_set(&pblk->recov_gc_writes, 0);
+#endif
+
+       atomic_long_set(&pblk->read_failed, 0);
+       atomic_long_set(&pblk->read_empty, 0);
+       atomic_long_set(&pblk->read_high_ecc, 0);
+       atomic_long_set(&pblk->read_failed_gc, 0);
+       atomic_long_set(&pblk->write_failed, 0);
+       atomic_long_set(&pblk->erase_failed, 0);
+
+       ret = pblk_luns_init(pblk, dev->luns);
+       if (ret) {
+               pr_err("pblk: could not initialize luns\n");
+               goto fail;
+       }
+
+       ret = pblk_lines_init(pblk);
+       if (ret) {
+               pr_err("pblk: could not initialize lines\n");
+               goto fail_free_luns;
+       }
+
+       ret = pblk_core_init(pblk);
+       if (ret) {
+               pr_err("pblk: could not initialize core\n");
+               goto fail_free_line_meta;
+       }
+
+       ret = pblk_l2p_init(pblk);
+       if (ret) {
+               pr_err("pblk: could not initialize maps\n");
+               goto fail_free_core;
+       }
+
+       ret = pblk_lines_configure(pblk, flags);
+       if (ret) {
+               pr_err("pblk: could not configure lines\n");
+               goto fail_free_l2p;
+       }
+
+       ret = pblk_writer_init(pblk);
+       if (ret) {
+               pr_err("pblk: could not initialize write thread\n");
+               goto fail_free_lines;
+       }
+
+       ret = pblk_gc_init(pblk);
+       if (ret) {
+               pr_err("pblk: could not initialize gc\n");
+               goto fail_stop_writer;
+       }
+
+       /* inherit the size from the underlying device */
+       blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
+       blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
+
+       blk_queue_write_cache(tqueue, true, false);
+
+       tqueue->limits.discard_granularity = geo->pgs_per_blk * geo->pfpg_size;
+       tqueue->limits.discard_alignment = 0;
+       blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
+       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue);
+
+       pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
+                       geo->nr_luns, pblk->l_mg.nr_lines,
+                       (unsigned long long)pblk->rl.nr_secs,
+                       pblk->rwb.nr_entries);
+
+       wake_up_process(pblk->writer_ts);
+       return pblk;
+
+fail_stop_writer:
+       pblk_writer_stop(pblk);
+fail_free_lines:
+       pblk_lines_free(pblk);
+fail_free_l2p:
+       pblk_l2p_free(pblk);
+fail_free_core:
+       pblk_core_free(pblk);
+fail_free_line_meta:
+       pblk_line_meta_free(pblk);
+fail_free_luns:
+       pblk_luns_free(pblk);
+fail:
+       kfree(pblk);
+       return ERR_PTR(ret);
+}
+
+/* physical block device target */
+static struct nvm_tgt_type tt_pblk = {
+       .name           = "pblk",
+       .version        = {1, 0, 0},
+
+       .make_rq        = pblk_make_rq,
+       .capacity       = pblk_capacity,
+
+       .init           = pblk_init,
+       .exit           = pblk_exit,
+
+       .sysfs_init     = pblk_sysfs_init,
+       .sysfs_exit     = pblk_sysfs_exit,
+};
+
+static int __init pblk_module_init(void)
+{
+       return nvm_register_tgt_type(&tt_pblk);
+}
+
+static void pblk_module_exit(void)
+{
+       nvm_unregister_tgt_type(&tt_pblk);
+}
+
+module_init(pblk_module_init);
+module_exit(pblk_module_exit);
+MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
+MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c

new file mode 100644 (file)

index 0000000..17c1695
--- /dev/null
+++ b/drivers/lightnvm/pblk-map.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-map.c - pblk's lba-ppa mapping strategy
+ *
+ */
+
+#include "pblk.h"
+
+static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
+                              struct ppa_addr *ppa_list,
+                              unsigned long *lun_bitmap,
+                              struct pblk_sec_meta *meta_list,
+                              unsigned int valid_secs)
+{
+       struct pblk_line *line = pblk_line_get_data(pblk);
+       struct line_emeta *emeta = line->emeta;
+       struct pblk_w_ctx *w_ctx;
+       __le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
+       u64 paddr;
+       int nr_secs = pblk->min_write_pgs;
+       int i;
+
+       paddr = pblk_alloc_page(pblk, line, nr_secs);
+
+       for (i = 0; i < nr_secs; i++, paddr++) {
+               /* ppa to be sent to the device */
+               ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
+
+               /* Write context for target bio completion on write buffer. Note
+                * that the write buffer is protected by the sync backpointer,
+                * and a single writer thread have access to each specific entry
+                * at a time. Thus, it is safe to modify the context for the
+                * entry we are setting up for submission without taking any
+                * lock or memory barrier.
+                */
+               if (i < valid_secs) {
+                       kref_get(&line->ref);
+                       w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
+                       w_ctx->ppa = ppa_list[i];
+                       meta_list[i].lba = cpu_to_le64(w_ctx->lba);
+                       lba_list[paddr] = cpu_to_le64(w_ctx->lba);
+                       le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
+               } else {
+                       meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
+                       lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+                       pblk_map_pad_invalidate(pblk, line, paddr);
+               }
+       }
+
+       if (pblk_line_is_full(line)) {
+               line = pblk_line_replace_data(pblk);
+               if (!line)
+                       return;
+       }
+
+       pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
+}
+
+void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
+                unsigned long *lun_bitmap, unsigned int valid_secs,
+                unsigned int off)
+{
+       struct pblk_sec_meta *meta_list = rqd->meta_list;
+       unsigned int map_secs;
+       int min = pblk->min_write_pgs;
+       int i;
+
+       for (i = off; i < rqd->nr_ppas; i += min) {
+               map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
+               pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
+                                       lun_bitmap, &meta_list[i], map_secs);
+       }
+}
+
+/* only if erase_ppa is set, acquire erase semaphore */
+void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
+                      unsigned int sentry, unsigned long *lun_bitmap,
+                      unsigned int valid_secs, struct ppa_addr *erase_ppa)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+       struct pblk_sec_meta *meta_list = rqd->meta_list;
+       unsigned int map_secs;
+       int min = pblk->min_write_pgs;
+       int i, erase_lun;
+
+       for (i = 0; i < rqd->nr_ppas; i += min) {
+               map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
+               pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
+                                       lun_bitmap, &meta_list[i], map_secs);
+
+               erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
+                                                       rqd->ppa_list[i].g.ch;
+
+               if (!test_bit(erase_lun, e_line->erase_bitmap)) {
+                       if (down_trylock(&pblk->erase_sem))
+                               continue;
+
+                       set_bit(erase_lun, e_line->erase_bitmap);
+                       atomic_dec(&e_line->left_eblks);
+                       *erase_ppa = rqd->ppa_list[i];
+                       erase_ppa->g.blk = e_line->id;
+
+                       /* Avoid evaluating e_line->left_eblks */
+                       return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
+                                                       valid_secs, i + min);
+               }
+       }
+
+       /* Erase blocks that are bad in this line but might not be in next */
+       if (unlikely(ppa_empty(*erase_ppa))) {
+               struct pblk_line_meta *lm = &pblk->lm;
+
+               i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line);
+               if (i == lm->blk_per_line)
+                       return;
+
+               set_bit(i, e_line->erase_bitmap);
+               atomic_dec(&e_line->left_eblks);
+               *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
+               erase_ppa->g.blk = e_line->id;
+       }
+}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c

new file mode 100644 (file)

index 0000000..045384d
--- /dev/null
+++ b/drivers/lightnvm/pblk-rb.c
@@ -0,0 +1,852 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *
+ * Based upon the circular ringbuffer.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-rb.c - pblk's write buffer
+ */
+
+#include <linux/circ_buf.h>
+
+#include "pblk.h"
+
+static DECLARE_RWSEM(pblk_rb_lock);
+
+void pblk_rb_data_free(struct pblk_rb *rb)
+{
+       struct pblk_rb_pages *p, *t;
+
+       down_write(&pblk_rb_lock);
+       list_for_each_entry_safe(p, t, &rb->pages, list) {
+               free_pages((unsigned long)page_address(p->pages), p->order);
+               list_del(&p->list);
+               kfree(p);
+       }
+       up_write(&pblk_rb_lock);
+}
+
+/*
+ * Initialize ring buffer. The data and metadata buffers must be previously
+ * allocated and their size must be a power of two
+ * (Documentation/circular-buffers.txt)
+ */
+int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
+                unsigned int power_size, unsigned int power_seg_sz)
+{
+       struct pblk *pblk = container_of(rb, struct pblk, rwb);
+       unsigned int init_entry = 0;
+       unsigned int alloc_order = power_size;
+       unsigned int max_order = MAX_ORDER - 1;
+       unsigned int order, iter;
+
+       down_write(&pblk_rb_lock);
+       rb->entries = rb_entry_base;
+       rb->seg_size = (1 << power_seg_sz);
+       rb->nr_entries = (1 << power_size);
+       rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
+       rb->sync_point = EMPTY_ENTRY;
+
+       spin_lock_init(&rb->w_lock);
+       spin_lock_init(&rb->s_lock);
+
+       INIT_LIST_HEAD(&rb->pages);
+
+       if (alloc_order >= max_order) {
+               order = max_order;
+               iter = (1 << (alloc_order - max_order));
+       } else {
+               order = alloc_order;
+               iter = 1;
+       }
+
+       do {
+               struct pblk_rb_entry *entry;
+               struct pblk_rb_pages *page_set;
+               void *kaddr;
+               unsigned long set_size;
+               int i;
+
+               page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
+               if (!page_set) {
+                       up_write(&pblk_rb_lock);
+                       return -ENOMEM;
+               }
+
+               page_set->order = order;
+               page_set->pages = alloc_pages(GFP_KERNEL, order);
+               if (!page_set->pages) {
+                       kfree(page_set);
+                       pblk_rb_data_free(rb);
+                       up_write(&pblk_rb_lock);
+                       return -ENOMEM;
+               }
+               kaddr = page_address(page_set->pages);
+
+               entry = &rb->entries[init_entry];
+               entry->data = kaddr;
+               entry->cacheline = pblk_cacheline_to_addr(init_entry++);
+               entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
+
+               set_size = (1 << order);
+               for (i = 1; i < set_size; i++) {
+                       entry = &rb->entries[init_entry];
+                       entry->cacheline = pblk_cacheline_to_addr(init_entry++);
+                       entry->data = kaddr + (i * rb->seg_size);
+                       entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
+                       bio_list_init(&entry->w_ctx.bios);
+               }
+
+               list_add_tail(&page_set->list, &rb->pages);
+               iter--;
+       } while (iter > 0);
+       up_write(&pblk_rb_lock);
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_set(&rb->inflight_sync_point, 0);
+#endif
+
+       /*
+        * Initialize rate-limiter, which controls access to the write buffer
+        * but user and GC I/O
+        */
+       pblk_rl_init(&pblk->rl, rb->nr_entries);
+
+       return 0;
+}
+
+/*
+ * pblk_rb_calculate_size -- calculate the size of the write buffer
+ */
+unsigned int pblk_rb_calculate_size(unsigned int nr_entries)
+{
+       /* Alloc a write buffer that can at least fit 128 entries */
+       return (1 << max(get_count_order(nr_entries), 7));
+}
+
+void *pblk_rb_entries_ref(struct pblk_rb *rb)
+{
+       return rb->entries;
+}
+
+static void clean_wctx(struct pblk_w_ctx *w_ctx)
+{
+       int flags;
+
+try:
+       flags = READ_ONCE(w_ctx->flags);
+       if (!(flags & PBLK_SUBMITTED_ENTRY))
+               goto try;
+
+       /* Release flags on context. Protect from writes and reads */
+       smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
+       pblk_ppa_set_empty(&w_ctx->ppa);
+}
+
+#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
+#define pblk_rb_ring_space(rb, head, tail, size) \
+                                       (CIRC_SPACE(head, tail, size))
+
+/*
+ * Buffer space is calculated with respect to the back pointer signaling
+ * synchronized entries to the media.
+ */
+static unsigned int pblk_rb_space(struct pblk_rb *rb)
+{
+       unsigned int mem = READ_ONCE(rb->mem);
+       unsigned int sync = READ_ONCE(rb->sync);
+
+       return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
+}
+
+/*
+ * Buffer count is calculated with respect to the submission entry signaling the
+ * entries that are available to send to the media
+ */
+unsigned int pblk_rb_read_count(struct pblk_rb *rb)
+{
+       unsigned int mem = READ_ONCE(rb->mem);
+       unsigned int subm = READ_ONCE(rb->subm);
+
+       return pblk_rb_ring_count(mem, subm, rb->nr_entries);
+}
+
+unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
+{
+       unsigned int subm;
+
+       subm = READ_ONCE(rb->subm);
+       /* Commit read means updating submission pointer */
+       smp_store_release(&rb->subm,
+                               (subm + nr_entries) & (rb->nr_entries - 1));
+
+       return subm;
+}
+
+static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
+                               unsigned int to_update)
+{
+       struct pblk *pblk = container_of(rb, struct pblk, rwb);
+       struct pblk_line *line;
+       struct pblk_rb_entry *entry;
+       struct pblk_w_ctx *w_ctx;
+       unsigned int i;
+
+       for (i = 0; i < to_update; i++) {
+               entry = &rb->entries[*l2p_upd];
+               w_ctx = &entry->w_ctx;
+
+               pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
+                                                       entry->cacheline);
+
+               line = &pblk->lines[pblk_tgt_ppa_to_line(w_ctx->ppa)];
+               kref_put(&line->ref, pblk_line_put);
+               clean_wctx(w_ctx);
+               *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
+       }
+
+       return 0;
+}
+
+/*
+ * When we move the l2p_update pointer, we update the l2p table - lookups will
+ * point to the physical address instead of to the cacheline in the write buffer
+ * from this moment on.
+ */
+static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
+                             unsigned int mem, unsigned int sync)
+{
+       unsigned int space, count;
+       int ret = 0;
+
+       lockdep_assert_held(&rb->w_lock);
+
+       /* Update l2p only as buffer entries are being overwritten */
+       space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
+       if (space > nr_entries)
+               goto out;
+
+       count = nr_entries - space;
+       /* l2p_update used exclusively under rb->w_lock */
+       ret = __pblk_rb_update_l2p(rb, &rb->l2p_update, count);
+
+out:
+       return ret;
+}
+
+/*
+ * Update the l2p entry for all sectors stored on the write buffer. This means
+ * that all future lookups to the l2p table will point to a device address, not
+ * to the cacheline in the write buffer.
+ */
+void pblk_rb_sync_l2p(struct pblk_rb *rb)
+{
+       unsigned int sync;
+       unsigned int to_update;
+
+       spin_lock(&rb->w_lock);
+
+       /* Protect from reads and writes */
+       sync = smp_load_acquire(&rb->sync);
+
+       to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
+       __pblk_rb_update_l2p(rb, &rb->l2p_update, to_update);
+
+       spin_unlock(&rb->w_lock);
+}
+
+/*
+ * Write @nr_entries to ring buffer from @data buffer if there is enough space.
+ * Typically, 4KB data chunks coming from a bio will be copied to the ring
+ * buffer, thus the write will fail if not all incoming data can be copied.
+ *
+ */
+static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
+                                 struct pblk_w_ctx w_ctx,
+                                 struct pblk_rb_entry *entry)
+{
+       memcpy(entry->data, data, rb->seg_size);
+
+       entry->w_ctx.lba = w_ctx.lba;
+       entry->w_ctx.ppa = w_ctx.ppa;
+}
+
+void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
+                             struct pblk_w_ctx w_ctx, unsigned int ring_pos)
+{
+       struct pblk *pblk = container_of(rb, struct pblk, rwb);
+       struct pblk_rb_entry *entry;
+       int flags;
+
+       entry = &rb->entries[ring_pos];
+       flags = READ_ONCE(entry->w_ctx.flags);
+#ifdef CONFIG_NVM_DEBUG
+       /* Caller must guarantee that the entry is free */
+       BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
+#endif
+
+       __pblk_rb_write_entry(rb, data, w_ctx, entry);
+
+       pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
+       flags = w_ctx.flags | PBLK_WRITTEN_DATA;
+
+       /* Release flags on write context. Protect from writes */
+       smp_store_release(&entry->w_ctx.flags, flags);
+}
+
+void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
+                           struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
+                           unsigned int ring_pos)
+{
+       struct pblk *pblk = container_of(rb, struct pblk, rwb);
+       struct pblk_rb_entry *entry;
+       int flags;
+
+       entry = &rb->entries[ring_pos];
+       flags = READ_ONCE(entry->w_ctx.flags);
+#ifdef CONFIG_NVM_DEBUG
+       /* Caller must guarantee that the entry is free */
+       BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
+#endif
+
+       __pblk_rb_write_entry(rb, data, w_ctx, entry);
+
+       if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, gc_line))
+               entry->w_ctx.lba = ADDR_EMPTY;
+
+       flags = w_ctx.flags | PBLK_WRITTEN_DATA;
+
+       /* Release flags on write context. Protect from writes */
+       smp_store_release(&entry->w_ctx.flags, flags);
+}
+
+static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
+                                 unsigned int pos)
+{
+       struct pblk_rb_entry *entry;
+       unsigned int subm, sync_point;
+       int flags;
+
+       subm = READ_ONCE(rb->subm);
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_inc(&rb->inflight_sync_point);
+#endif
+
+       if (pos == subm)
+               return 0;
+
+       sync_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
+       entry = &rb->entries[sync_point];
+
+       flags = READ_ONCE(entry->w_ctx.flags);
+       flags |= PBLK_FLUSH_ENTRY;
+
+       /* Release flags on context. Protect from writes */
+       smp_store_release(&entry->w_ctx.flags, flags);
+
+       /* Protect syncs */
+       smp_store_release(&rb->sync_point, sync_point);
+
+       spin_lock_irq(&rb->s_lock);
+       bio_list_add(&entry->w_ctx.bios, bio);
+       spin_unlock_irq(&rb->s_lock);
+
+       return 1;
+}
+
+static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
+                              unsigned int *pos)
+{
+       unsigned int mem;
+       unsigned int sync;
+
+       sync = READ_ONCE(rb->sync);
+       mem = READ_ONCE(rb->mem);
+
+       if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < nr_entries)
+               return 0;
+
+       if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
+               return 0;
+
+       *pos = mem;
+
+       return 1;
+}
+
+static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
+                            unsigned int *pos)
+{
+       if (!__pblk_rb_may_write(rb, nr_entries, pos))
+               return 0;
+
+       /* Protect from read count */
+       smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1));
+       return 1;
+}
+
+static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
+                                  unsigned int *pos, struct bio *bio,
+                                  int *io_ret)
+{
+       unsigned int mem;
+
+       if (!__pblk_rb_may_write(rb, nr_entries, pos))
+               return 0;
+
+       mem = (*pos + nr_entries) & (rb->nr_entries - 1);
+       *io_ret = NVM_IO_DONE;
+
+       if (bio->bi_opf & REQ_PREFLUSH) {
+               struct pblk *pblk = container_of(rb, struct pblk, rwb);
+
+#ifdef CONFIG_NVM_DEBUG
+               atomic_long_inc(&pblk->nr_flush);
+#endif
+               if (pblk_rb_sync_point_set(&pblk->rwb, bio, mem))
+                       *io_ret = NVM_IO_OK;
+       }
+
+       /* Protect from read count */
+       smp_store_release(&rb->mem, mem);
+       return 1;
+}
+
+/*
+ * Atomically check that (i) there is space on the write buffer for the
+ * incoming I/O, and (ii) the current I/O type has enough budget in the write
+ * buffer (rate-limiter).
+ */
+int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
+                          unsigned int nr_entries, unsigned int *pos)
+{
+       struct pblk *pblk = container_of(rb, struct pblk, rwb);
+       int flush_done;
+
+       spin_lock(&rb->w_lock);
+       if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) {
+               spin_unlock(&rb->w_lock);
+               return NVM_IO_REQUEUE;
+       }
+
+       if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) {
+               spin_unlock(&rb->w_lock);
+               return NVM_IO_REQUEUE;
+       }
+
+       pblk_rl_user_in(&pblk->rl, nr_entries);
+       spin_unlock(&rb->w_lock);
+
+       return flush_done;
+}
+
+/*
+ * Look at pblk_rb_may_write_user comment
+ */
+int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
+                        unsigned int *pos)
+{
+       struct pblk *pblk = container_of(rb, struct pblk, rwb);
+
+       spin_lock(&rb->w_lock);
+       if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
+               spin_unlock(&rb->w_lock);
+               return 0;
+       }
+
+       if (!pblk_rb_may_write(rb, nr_entries, pos)) {
+               spin_unlock(&rb->w_lock);
+               return 0;
+       }
+
+       pblk_rl_gc_in(&pblk->rl, nr_entries);
+       spin_unlock(&rb->w_lock);
+
+       return 1;
+}
+
+/*
+ * The caller of this function must ensure that the backpointer will not
+ * overwrite the entries passed on the list.
+ */
+unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
+                                     struct list_head *list,
+                                     unsigned int max)
+{
+       struct pblk_rb_entry *entry, *tentry;
+       struct page *page;
+       unsigned int read = 0;
+       int ret;
+
+       list_for_each_entry_safe(entry, tentry, list, index) {
+               if (read > max) {
+                       pr_err("pblk: too many entries on list\n");
+                       goto out;
+               }
+
+               page = virt_to_page(entry->data);
+               if (!page) {
+                       pr_err("pblk: could not allocate write bio page\n");
+                       goto out;
+               }
+
+               ret = bio_add_page(bio, page, rb->seg_size, 0);
+               if (ret != rb->seg_size) {
+                       pr_err("pblk: could not add page to write bio\n");
+                       goto out;
+               }
+
+               list_del(&entry->index);
+               read++;
+       }
+
+out:
+       return read;
+}
+
+/*
+ * Read available entries on rb and add them to the given bio. To avoid a memory
+ * copy, a page reference to the write buffer is used to be added to the bio.
+ *
+ * This function is used by the write thread to form the write bio that will
+ * persist data on the write buffer to the media.
+ */
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
+                                struct pblk_c_ctx *c_ctx,
+                                unsigned int pos,
+                                unsigned int nr_entries,
+                                unsigned int count)
+{
+       struct pblk *pblk = container_of(rb, struct pblk, rwb);
+       struct pblk_rb_entry *entry;
+       struct page *page;
+       unsigned int pad = 0, read = 0, to_read = nr_entries;
+       unsigned int user_io = 0, gc_io = 0;
+       unsigned int i;
+       int flags;
+       int ret;
+
+       if (count < nr_entries) {
+               pad = nr_entries - count;
+               to_read = count;
+       }
+
+       c_ctx->sentry = pos;
+       c_ctx->nr_valid = to_read;
+       c_ctx->nr_padded = pad;
+
+       for (i = 0; i < to_read; i++) {
+               entry = &rb->entries[pos];
+
+               /* A write has been allowed into the buffer, but data is still
+                * being copied to it. It is ok to busy wait.
+                */
+try:
+               flags = READ_ONCE(entry->w_ctx.flags);
+               if (!(flags & PBLK_WRITTEN_DATA))
+                       goto try;
+
+               if (flags & PBLK_IOTYPE_USER)
+                       user_io++;
+               else if (flags & PBLK_IOTYPE_GC)
+                       gc_io++;
+               else
+                       WARN(1, "pblk: unknown IO type\n");
+
+               page = virt_to_page(entry->data);
+               if (!page) {
+                       pr_err("pblk: could not allocate write bio page\n");
+                       flags &= ~PBLK_WRITTEN_DATA;
+                       flags |= PBLK_SUBMITTED_ENTRY;
+                       /* Release flags on context. Protect from writes */
+                       smp_store_release(&entry->w_ctx.flags, flags);
+                       goto out;
+               }
+
+               ret = bio_add_page(bio, page, rb->seg_size, 0);
+               if (ret != rb->seg_size) {
+                       pr_err("pblk: could not add page to write bio\n");
+                       flags &= ~PBLK_WRITTEN_DATA;
+                       flags |= PBLK_SUBMITTED_ENTRY;
+                       /* Release flags on context. Protect from writes */
+                       smp_store_release(&entry->w_ctx.flags, flags);
+                       goto out;
+               }
+
+               if (flags & PBLK_FLUSH_ENTRY) {
+                       unsigned int sync_point;
+
+                       sync_point = READ_ONCE(rb->sync_point);
+                       if (sync_point == pos) {
+                               /* Protect syncs */
+                               smp_store_release(&rb->sync_point, EMPTY_ENTRY);
+                       }
+
+                       flags &= ~PBLK_FLUSH_ENTRY;
+#ifdef CONFIG_NVM_DEBUG
+                       atomic_dec(&rb->inflight_sync_point);
+#endif
+               }
+
+               flags &= ~PBLK_WRITTEN_DATA;
+               flags |= PBLK_SUBMITTED_ENTRY;
+
+               /* Release flags on context. Protect from writes */
+               smp_store_release(&entry->w_ctx.flags, flags);
+
+               pos = (pos + 1) & (rb->nr_entries - 1);
+       }
+
+       read = to_read;
+       pblk_rl_out(&pblk->rl, user_io, gc_io);
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_add(pad, &((struct pblk *)
+                       (container_of(rb, struct pblk, rwb)))->padded_writes);
+#endif
+out:
+       return read;
+}
+
+/*
+ * Copy to bio only if the lba matches the one on the given cache entry.
+ * Otherwise, it means that the entry has been overwritten, and the bio should
+ * be directed to disk.
+ */
+int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
+                       u64 pos, int bio_iter)
+{
+       struct pblk_rb_entry *entry;
+       struct pblk_w_ctx *w_ctx;
+       void *data;
+       int flags;
+       int ret = 1;
+
+       spin_lock(&rb->w_lock);
+
+#ifdef CONFIG_NVM_DEBUG
+       /* Caller must ensure that the access will not cause an overflow */
+       BUG_ON(pos >= rb->nr_entries);
+#endif
+       entry = &rb->entries[pos];
+       w_ctx = &entry->w_ctx;
+       flags = READ_ONCE(w_ctx->flags);
+
+       /* Check if the entry has been overwritten or is scheduled to be */
+       if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) {
+               ret = 0;
+               goto out;
+       }
+
+       /* Only advance the bio if it hasn't been advanced already. If advanced,
+        * this bio is at least a partial bio (i.e., it has partially been
+        * filled with data from the cache). If part of the data resides on the
+        * media, we will read later on
+        */
+       if (unlikely(!bio->bi_iter.bi_idx))
+               bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
+
+       data = bio_data(bio);
+       memcpy(data, entry->data, rb->seg_size);
+
+out:
+       spin_unlock(&rb->w_lock);
+       return ret;
+}
+
+struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
+{
+       unsigned int entry = pos & (rb->nr_entries - 1);
+
+       return &rb->entries[entry].w_ctx;
+}
+
+unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
+       __acquires(&rb->s_lock)
+{
+       if (flags)
+               spin_lock_irqsave(&rb->s_lock, *flags);
+       else
+               spin_lock_irq(&rb->s_lock);
+
+       return rb->sync;
+}
+
+void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
+       __releases(&rb->s_lock)
+{
+       lockdep_assert_held(&rb->s_lock);
+
+       if (flags)
+               spin_unlock_irqrestore(&rb->s_lock, *flags);
+       else
+               spin_unlock_irq(&rb->s_lock);
+}
+
+unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
+{
+       unsigned int sync;
+       unsigned int i;
+
+       lockdep_assert_held(&rb->s_lock);
+
+       sync = READ_ONCE(rb->sync);
+
+       for (i = 0; i < nr_entries; i++)
+               sync = (sync + 1) & (rb->nr_entries - 1);
+
+       /* Protect from counts */
+       smp_store_release(&rb->sync, sync);
+
+       return sync;
+}
+
+unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb)
+{
+       unsigned int subm, sync_point;
+       unsigned int count;
+
+       /* Protect syncs */
+       sync_point = smp_load_acquire(&rb->sync_point);
+       if (sync_point == EMPTY_ENTRY)
+               return 0;
+
+       subm = READ_ONCE(rb->subm);
+
+       /* The sync point itself counts as a sector to sync */
+       count = pblk_rb_ring_count(sync_point, subm, rb->nr_entries) + 1;
+
+       return count;
+}
+
+/*
+ * Scan from the current position of the sync pointer to find the entry that
+ * corresponds to the given ppa. This is necessary since write requests can be
+ * completed out of order. The assumption is that the ppa is close to the sync
+ * pointer thus the search will not take long.
+ *
+ * The caller of this function must guarantee that the sync pointer will no
+ * reach the entry while it is using the metadata associated with it. With this
+ * assumption in mind, there is no need to take the sync lock.
+ */
+struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb,
+                                             struct ppa_addr *ppa)
+{
+       unsigned int sync, subm, count;
+       unsigned int i;
+
+       sync = READ_ONCE(rb->sync);
+       subm = READ_ONCE(rb->subm);
+       count = pblk_rb_ring_count(subm, sync, rb->nr_entries);
+
+       for (i = 0; i < count; i++)
+               sync = (sync + 1) & (rb->nr_entries - 1);
+
+       return NULL;
+}
+
+int pblk_rb_tear_down_check(struct pblk_rb *rb)
+{
+       struct pblk_rb_entry *entry;
+       int i;
+       int ret = 0;
+
+       spin_lock(&rb->w_lock);
+       spin_lock_irq(&rb->s_lock);
+
+       if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
+                               (rb->sync == rb->l2p_update) &&
+                               (rb->sync_point == EMPTY_ENTRY)) {
+               goto out;
+       }
+
+       if (!rb->entries) {
+               ret = 1;
+               goto out;
+       }
+
+       for (i = 0; i < rb->nr_entries; i++) {
+               entry = &rb->entries[i];
+
+               if (!entry->data) {
+                       ret = 1;
+                       goto out;
+               }
+       }
+
+out:
+       spin_unlock(&rb->w_lock);
+       spin_unlock_irq(&rb->s_lock);
+
+       return ret;
+}
+
+unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
+{
+       return (pos & (rb->nr_entries - 1));
+}
+
+int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
+{
+       return (pos >= rb->nr_entries);
+}
+
+ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
+{
+       struct pblk *pblk = container_of(rb, struct pblk, rwb);
+       struct pblk_c_ctx *c;
+       ssize_t offset;
+       int queued_entries = 0;
+
+       spin_lock_irq(&rb->s_lock);
+       list_for_each_entry(c, &pblk->compl_list, list)
+               queued_entries++;
+       spin_unlock_irq(&rb->s_lock);
+
+       if (rb->sync_point != EMPTY_ENTRY)
+               offset = scnprintf(buf, PAGE_SIZE,
+                       "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
+                       rb->nr_entries,
+                       rb->mem,
+                       rb->subm,
+                       rb->sync,
+                       rb->l2p_update,
+#ifdef CONFIG_NVM_DEBUG
+                       atomic_read(&rb->inflight_sync_point),
+#else
+                       0,
+#endif
+                       rb->sync_point,
+                       pblk_rb_read_count(rb),
+                       pblk_rb_space(rb),
+                       pblk_rb_sync_point_count(rb),
+                       queued_entries);
+       else
+               offset = scnprintf(buf, PAGE_SIZE,
+                       "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
+                       rb->nr_entries,
+                       rb->mem,
+                       rb->subm,
+                       rb->sync,
+                       rb->l2p_update,
+#ifdef CONFIG_NVM_DEBUG
+                       atomic_read(&rb->inflight_sync_point),
+#else
+                       0,
+#endif
+                       pblk_rb_read_count(rb),
+                       pblk_rb_space(rb),
+                       pblk_rb_sync_point_count(rb),
+                       queued_entries);
+
+       return offset;
+}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c

new file mode 100644 (file)

index 0000000..4a12f14
--- /dev/null
+++ b/drivers/lightnvm/pblk-read.c
@@ -0,0 +1,529 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-read.c - pblk's read path
+ */
+
+#include "pblk.h"
+
+/*
+ * There is no guarantee that the value read from cache has not been updated and
+ * resides at another location in the cache. We guarantee though that if the
+ * value is read from the cache, it belongs to the mapped lba. In order to
+ * guarantee and order between writes and reads are ordered, a flush must be
+ * issued.
+ */
+static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
+                               sector_t lba, struct ppa_addr ppa,
+                               int bio_iter)
+{
+#ifdef CONFIG_NVM_DEBUG
+       /* Callers must ensure that the ppa points to a cache address */
+       BUG_ON(pblk_ppa_empty(ppa));
+       BUG_ON(!pblk_addr_in_cache(ppa));
+#endif
+
+       return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba,
+                                       pblk_addr_to_cacheline(ppa), bio_iter);
+}
+
+static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
+                                unsigned long *read_bitmap)
+{
+       struct bio *bio = rqd->bio;
+       struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS];
+       sector_t blba = pblk_get_lba(bio);
+       int nr_secs = rqd->nr_ppas;
+       int advanced_bio = 0;
+       int i, j = 0;
+
+       /* logic error: lba out-of-bounds. Ignore read request */
+       if (blba + nr_secs >= pblk->rl.nr_secs) {
+               WARN(1, "pblk: read lbas out of bounds\n");
+               return;
+       }
+
+       pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs);
+
+       for (i = 0; i < nr_secs; i++) {
+               struct ppa_addr p = ppas[i];
+               sector_t lba = blba + i;
+
+retry:
+               if (pblk_ppa_empty(p)) {
+                       WARN_ON(test_and_set_bit(i, read_bitmap));
+                       continue;
+               }
+
+               /* Try to read from write buffer. The address is later checked
+                * on the write buffer to prevent retrieving overwritten data.
+                */
+               if (pblk_addr_in_cache(p)) {
+                       if (!pblk_read_from_cache(pblk, bio, lba, p, i)) {
+                               pblk_lookup_l2p_seq(pblk, &p, lba, 1);
+                               goto retry;
+                       }
+                       WARN_ON(test_and_set_bit(i, read_bitmap));
+                       advanced_bio = 1;
+               } else {
+                       /* Read from media non-cached sectors */
+                       rqd->ppa_list[j++] = p;
+               }
+
+               if (advanced_bio)
+                       bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
+       }
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_add(nr_secs, &pblk->inflight_reads);
+#endif
+}
+
+static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
+{
+       int err;
+
+       rqd->flags = pblk_set_read_mode(pblk);
+
+       err = pblk_submit_io(pblk, rqd);
+       if (err)
+               return NVM_IO_ERR;
+
+       return NVM_IO_OK;
+}
+
+static void pblk_end_io_read(struct nvm_rq *rqd)
+{
+       struct pblk *pblk = rqd->private;
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+       struct bio *bio = rqd->bio;
+
+       if (rqd->error)
+               pblk_log_read_err(pblk, rqd);
+#ifdef CONFIG_NVM_DEBUG
+       else
+               WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+#endif
+
+       if (rqd->nr_ppas > 1)
+               nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+
+       bio_put(bio);
+       if (r_ctx->orig_bio) {
+#ifdef CONFIG_NVM_DEBUG
+               WARN_ONCE(r_ctx->orig_bio->bi_error,
+                                               "pblk: corrupted read bio\n");
+#endif
+               bio_endio(r_ctx->orig_bio);
+               bio_put(r_ctx->orig_bio);
+       }
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_add(rqd->nr_ppas, &pblk->sync_reads);
+       atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads);
+#endif
+
+       pblk_free_rqd(pblk, rqd, READ);
+}
+
+static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
+                                     unsigned int bio_init_idx,
+                                     unsigned long *read_bitmap)
+{
+       struct bio *new_bio, *bio = rqd->bio;
+       struct bio_vec src_bv, dst_bv;
+       void *ppa_ptr = NULL;
+       void *src_p, *dst_p;
+       dma_addr_t dma_ppa_list = 0;
+       int nr_secs = rqd->nr_ppas;
+       int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
+       int i, ret, hole;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       new_bio = bio_alloc(GFP_KERNEL, nr_holes);
+       if (!new_bio) {
+               pr_err("pblk: could not alloc read bio\n");
+               return NVM_IO_ERR;
+       }
+
+       if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
+               goto err;
+
+       if (nr_holes != new_bio->bi_vcnt) {
+               pr_err("pblk: malformed bio\n");
+               goto err;
+       }
+
+       new_bio->bi_iter.bi_sector = 0; /* internal bio */
+       bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
+       new_bio->bi_private = &wait;
+       new_bio->bi_end_io = pblk_end_bio_sync;
+
+       rqd->bio = new_bio;
+       rqd->nr_ppas = nr_holes;
+       rqd->end_io = NULL;
+
+       if (unlikely(nr_secs > 1 && nr_holes == 1)) {
+               ppa_ptr = rqd->ppa_list;
+               dma_ppa_list = rqd->dma_ppa_list;
+               rqd->ppa_addr = rqd->ppa_list[0];
+       }
+
+       ret = pblk_submit_read_io(pblk, rqd);
+       if (ret) {
+               bio_put(rqd->bio);
+               pr_err("pblk: read IO submission failed\n");
+               goto err;
+       }
+
+       if (!wait_for_completion_io_timeout(&wait,
+                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+               pr_err("pblk: partial read I/O timed out\n");
+       }
+
+       if (rqd->error) {
+               atomic_long_inc(&pblk->read_failed);
+#ifdef CONFIG_NVM_DEBUG
+               pblk_print_failed_rqd(pblk, rqd, rqd->error);
+#endif
+       }
+
+       if (unlikely(nr_secs > 1 && nr_holes == 1)) {
+               rqd->ppa_list = ppa_ptr;
+               rqd->dma_ppa_list = dma_ppa_list;
+       }
+
+       /* Fill the holes in the original bio */
+       i = 0;
+       hole = find_first_zero_bit(read_bitmap, nr_secs);
+       do {
+               src_bv = new_bio->bi_io_vec[i++];
+               dst_bv = bio->bi_io_vec[bio_init_idx + hole];
+
+               src_p = kmap_atomic(src_bv.bv_page);
+               dst_p = kmap_atomic(dst_bv.bv_page);
+
+               memcpy(dst_p + dst_bv.bv_offset,
+                       src_p + src_bv.bv_offset,
+                       PBLK_EXPOSED_PAGE_SIZE);
+
+               kunmap_atomic(src_p);
+               kunmap_atomic(dst_p);
+
+               mempool_free(src_bv.bv_page, pblk->page_pool);
+
+               hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1);
+       } while (hole < nr_secs);
+
+       bio_put(new_bio);
+
+       /* Complete the original bio and associated request */
+       rqd->bio = bio;
+       rqd->nr_ppas = nr_secs;
+       rqd->private = pblk;
+
+       bio_endio(bio);
+       pblk_end_io_read(rqd);
+       return NVM_IO_OK;
+
+err:
+       /* Free allocated pages in new bio */
+       pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt);
+       rqd->private = pblk;
+       pblk_end_io_read(rqd);
+       return NVM_IO_ERR;
+}
+
+static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
+                        unsigned long *read_bitmap)
+{
+       struct bio *bio = rqd->bio;
+       struct ppa_addr ppa;
+       sector_t lba = pblk_get_lba(bio);
+
+       /* logic error: lba out-of-bounds. Ignore read request */
+       if (lba >= pblk->rl.nr_secs) {
+               WARN(1, "pblk: read lba out of bounds\n");
+               return;
+       }
+
+       pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_inc(&pblk->inflight_reads);
+#endif
+
+retry:
+       if (pblk_ppa_empty(ppa)) {
+               WARN_ON(test_and_set_bit(0, read_bitmap));
+               return;
+       }
+
+       /* Try to read from write buffer. The address is later checked on the
+        * write buffer to prevent retrieving overwritten data.
+        */
+       if (pblk_addr_in_cache(ppa)) {
+               if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0)) {
+                       pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
+                       goto retry;
+               }
+               WARN_ON(test_and_set_bit(0, read_bitmap));
+       } else {
+               rqd->ppa_addr = ppa;
+       }
+}
+
+int pblk_submit_read(struct pblk *pblk, struct bio *bio)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       unsigned int nr_secs = pblk_get_secs(bio);
+       struct nvm_rq *rqd;
+       unsigned long read_bitmap; /* Max 64 ppas per request */
+       unsigned int bio_init_idx;
+       int ret = NVM_IO_ERR;
+
+       if (nr_secs > PBLK_MAX_REQ_ADDRS)
+               return NVM_IO_ERR;
+
+       bitmap_zero(&read_bitmap, nr_secs);
+
+       rqd = pblk_alloc_rqd(pblk, READ);
+       if (IS_ERR(rqd)) {
+               pr_err_ratelimited("pblk: not able to alloc rqd");
+               return NVM_IO_ERR;
+       }
+
+       rqd->opcode = NVM_OP_PREAD;
+       rqd->bio = bio;
+       rqd->nr_ppas = nr_secs;
+       rqd->private = pblk;
+       rqd->end_io = pblk_end_io_read;
+
+       /* Save the index for this bio's start. This is needed in case
+        * we need to fill a partial read.
+        */
+       bio_init_idx = pblk_get_bi_idx(bio);
+
+       if (nr_secs > 1) {
+               rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+                                               &rqd->dma_ppa_list);
+               if (!rqd->ppa_list) {
+                       pr_err("pblk: not able to allocate ppa list\n");
+                       goto fail_rqd_free;
+               }
+
+               pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
+       } else {
+               pblk_read_rq(pblk, rqd, &read_bitmap);
+       }
+
+       bio_get(bio);
+       if (bitmap_full(&read_bitmap, nr_secs)) {
+               bio_endio(bio);
+               pblk_end_io_read(rqd);
+               return NVM_IO_OK;
+       }
+
+       /* All sectors are to be read from the device */
+       if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
+               struct bio *int_bio = NULL;
+               struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+
+               /* Clone read bio to deal with read errors internally */
+               int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
+               if (!int_bio) {
+                       pr_err("pblk: could not clone read bio\n");
+                       return NVM_IO_ERR;
+               }
+
+               rqd->bio = int_bio;
+               r_ctx->orig_bio = bio;
+
+               ret = pblk_submit_read_io(pblk, rqd);
+               if (ret) {
+                       pr_err("pblk: read IO submission failed\n");
+                       if (int_bio)
+                               bio_put(int_bio);
+                       return ret;
+               }
+
+               return NVM_IO_OK;
+       }
+
+       /* The read bio request could be partially filled by the write buffer,
+        * but there are some holes that need to be read from the drive.
+        */
+       ret = pblk_fill_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap);
+       if (ret) {
+               pr_err("pblk: failed to perform partial read\n");
+               return ret;
+       }
+
+       return NVM_IO_OK;
+
+fail_rqd_free:
+       pblk_free_rqd(pblk, rqd, READ);
+       return ret;
+}
+
+static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
+                             struct pblk_line *line, u64 *lba_list,
+                             unsigned int nr_secs)
+{
+       struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS];
+       int valid_secs = 0;
+       int i;
+
+       pblk_lookup_l2p_rand(pblk, ppas, lba_list, nr_secs);
+
+       for (i = 0; i < nr_secs; i++) {
+               if (pblk_addr_in_cache(ppas[i]) || ppas[i].g.blk != line->id ||
+                                               pblk_ppa_empty(ppas[i])) {
+                       lba_list[i] = ADDR_EMPTY;
+                       continue;
+               }
+
+               rqd->ppa_list[valid_secs++] = ppas[i];
+       }
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_add(valid_secs, &pblk->inflight_reads);
+#endif
+       return valid_secs;
+}
+
+static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
+                     struct pblk_line *line, sector_t lba)
+{
+       struct ppa_addr ppa;
+       int valid_secs = 0;
+
+       if (lba == ADDR_EMPTY)
+               goto out;
+
+       /* logic error: lba out-of-bounds */
+       if (lba >= pblk->rl.nr_secs) {
+               WARN(1, "pblk: read lba out of bounds\n");
+               goto out;
+       }
+
+       spin_lock(&pblk->trans_lock);
+       ppa = pblk_trans_map_get(pblk, lba);
+       spin_unlock(&pblk->trans_lock);
+
+       /* Ignore updated values until the moment */
+       if (pblk_addr_in_cache(ppa) || ppa.g.blk != line->id ||
+                                                       pblk_ppa_empty(ppa))
+               goto out;
+
+       rqd->ppa_addr = ppa;
+       valid_secs = 1;
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_inc(&pblk->inflight_reads);
+#endif
+
+out:
+       return valid_secs;
+}
+
+int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
+                       unsigned int nr_secs, unsigned int *secs_to_gc,
+                       struct pblk_line *line)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct request_queue *q = dev->q;
+       struct bio *bio;
+       struct nvm_rq rqd;
+       int ret, data_len;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       memset(&rqd, 0, sizeof(struct nvm_rq));
+
+       if (nr_secs > 1) {
+               rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+                                                       &rqd.dma_ppa_list);
+               if (!rqd.ppa_list)
+                       return NVM_IO_ERR;
+
+               *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
+                                                               nr_secs);
+               if (*secs_to_gc == 1) {
+                       struct ppa_addr ppa;
+
+                       ppa = rqd.ppa_list[0];
+                       nvm_dev_dma_free(dev->parent, rqd.ppa_list,
+                                                       rqd.dma_ppa_list);
+                       rqd.ppa_addr = ppa;
+               }
+       } else {
+               *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
+       }
+
+       if (!(*secs_to_gc))
+               goto out;
+
+       data_len = (*secs_to_gc) * geo->sec_size;
+       bio = bio_map_kern(q, data, data_len, GFP_KERNEL);
+       if (IS_ERR(bio)) {
+               pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
+               goto err_free_dma;
+       }
+
+       bio->bi_iter.bi_sector = 0; /* internal bio */
+       bio_set_op_attrs(bio, REQ_OP_READ, 0);
+
+       rqd.opcode = NVM_OP_PREAD;
+       rqd.end_io = pblk_end_io_sync;
+       rqd.private = &wait;
+       rqd.nr_ppas = *secs_to_gc;
+       rqd.bio = bio;
+
+       ret = pblk_submit_read_io(pblk, &rqd);
+       if (ret) {
+               bio_endio(bio);
+               pr_err("pblk: GC read request failed\n");
+               goto err_free_dma;
+       }
+
+       if (!wait_for_completion_io_timeout(&wait,
+                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+               pr_err("pblk: GC read I/O timed out\n");
+       }
+
+       if (rqd.error) {
+               atomic_long_inc(&pblk->read_failed_gc);
+#ifdef CONFIG_NVM_DEBUG
+               pblk_print_failed_rqd(pblk, &rqd, rqd.error);
+#endif
+       }
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_add(*secs_to_gc, &pblk->sync_reads);
+       atomic_long_add(*secs_to_gc, &pblk->recov_gc_reads);
+       atomic_long_sub(*secs_to_gc, &pblk->inflight_reads);
+#endif
+
+out:
+       if (rqd.nr_ppas > 1)
+               nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+       return NVM_IO_OK;
+
+err_free_dma:
+       if (rqd.nr_ppas > 1)
+               nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+       return NVM_IO_ERR;
+}
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c

new file mode 100644 (file)

index 0000000..f8f8508
--- /dev/null
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -0,0 +1,998 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial: Javier Gonzalez <javier@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-recovery.c - pblk's recovery path
+ */
+
+#include "pblk.h"
+
+void pblk_submit_rec(struct work_struct *work)
+{
+       struct pblk_rec_ctx *recovery =
+                       container_of(work, struct pblk_rec_ctx, ws_rec);
+       struct pblk *pblk = recovery->pblk;
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_rq *rqd = recovery->rqd;
+       struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+       int max_secs = nvm_max_phys_sects(dev);
+       struct bio *bio;
+       unsigned int nr_rec_secs;
+       unsigned int pgs_read;
+       int ret;
+
+       nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
+                                                               max_secs);
+
+       bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
+       if (!bio) {
+               pr_err("pblk: not able to create recovery bio\n");
+               return;
+       }
+
+       bio->bi_iter.bi_sector = 0;
+       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+       rqd->bio = bio;
+       rqd->nr_ppas = nr_rec_secs;
+
+       pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed,
+                                                               nr_rec_secs);
+       if (pgs_read != nr_rec_secs) {
+               pr_err("pblk: could not read recovery entries\n");
+               goto err;
+       }
+
+       if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) {
+               pr_err("pblk: could not setup recovery request\n");
+               goto err;
+       }
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_add(nr_rec_secs, &pblk->recov_writes);
+#endif
+
+       ret = pblk_submit_io(pblk, rqd);
+       if (ret) {
+               pr_err("pblk: I/O submission failed: %d\n", ret);
+               goto err;
+       }
+
+       mempool_free(recovery, pblk->rec_pool);
+       return;
+
+err:
+       bio_put(bio);
+       pblk_free_rqd(pblk, rqd, WRITE);
+}
+
+int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
+                       struct pblk_rec_ctx *recovery, u64 *comp_bits,
+                       unsigned int comp)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       int max_secs = nvm_max_phys_sects(dev);
+       struct nvm_rq *rec_rqd;
+       struct pblk_c_ctx *rec_ctx;
+       int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
+
+       rec_rqd = pblk_alloc_rqd(pblk, WRITE);
+       if (IS_ERR(rec_rqd)) {
+               pr_err("pblk: could not create recovery req.\n");
+               return -ENOMEM;
+       }
+
+       rec_ctx = nvm_rq_to_pdu(rec_rqd);
+
+       /* Copy completion bitmap, but exclude the first X completed entries */
+       bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
+                               (unsigned long int *)comp_bits,
+                               comp, max_secs);
+
+       /* Save the context for the entries that need to be re-written and
+        * update current context with the completed entries.
+        */
+       rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp);
+       if (comp >= c_ctx->nr_valid) {
+               rec_ctx->nr_valid = 0;
+               rec_ctx->nr_padded = nr_entries - comp;
+
+               c_ctx->nr_padded = comp - c_ctx->nr_valid;
+       } else {
+               rec_ctx->nr_valid = c_ctx->nr_valid - comp;
+               rec_ctx->nr_padded = c_ctx->nr_padded;
+
+               c_ctx->nr_valid = comp;
+               c_ctx->nr_padded = 0;
+       }
+
+       recovery->rqd = rec_rqd;
+       recovery->pblk = pblk;
+
+       return 0;
+}
+
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
+{
+       u32 crc;
+
+       crc = pblk_calc_emeta_crc(pblk, emeta);
+       if (le32_to_cpu(emeta->crc) != crc)
+               return NULL;
+
+       if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
+               return NULL;
+
+       return pblk_line_emeta_to_lbas(emeta);
+}
+
+static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct line_emeta *emeta = line->emeta;
+       __le64 *lba_list;
+       int data_start;
+       int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
+       int i;
+
+       lba_list = pblk_recov_get_lba_list(pblk, emeta);
+       if (!lba_list)
+               return 1;
+
+       data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
+       nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
+       nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
+
+       for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
+               struct ppa_addr ppa;
+               int pos;
+
+               ppa = addr_to_pblk_ppa(pblk, i, line->id);
+               pos = pblk_ppa_to_pos(geo, ppa);
+
+               /* Do not update bad blocks */
+               if (test_bit(pos, line->blk_bitmap))
+                       continue;
+
+               if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
+                       spin_lock(&line->lock);
+                       if (test_and_set_bit(i, line->invalid_bitmap))
+                               WARN_ONCE(1, "pblk: rec. double invalidate:\n");
+                       else
+                               line->vsc--;
+                       spin_unlock(&line->lock);
+
+                       continue;
+               }
+
+               pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
+               nr_lbas++;
+       }
+
+       if (nr_valid_lbas != nr_lbas)
+               pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
+                               line->id, line->emeta->nr_valid_lbas, nr_lbas);
+
+       line->left_msecs = 0;
+
+       return 0;
+}
+
+static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_meta *lm = &pblk->lm;
+       int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
+
+       return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
+                               nr_bb * geo->sec_per_blk;
+}
+
+struct pblk_recov_alloc {
+       struct ppa_addr *ppa_list;
+       struct pblk_sec_meta *meta_list;
+       struct nvm_rq *rqd;
+       void *data;
+       dma_addr_t dma_ppa_list;
+       dma_addr_t dma_meta_list;
+};
+
+static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
+                              struct pblk_recov_alloc p, u64 r_ptr)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct ppa_addr *ppa_list;
+       struct pblk_sec_meta *meta_list;
+       struct nvm_rq *rqd;
+       struct bio *bio;
+       void *data;
+       dma_addr_t dma_ppa_list, dma_meta_list;
+       u64 r_ptr_int;
+       int left_ppas;
+       int rq_ppas, rq_len;
+       int i, j;
+       int ret = 0;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       ppa_list = p.ppa_list;
+       meta_list = p.meta_list;
+       rqd = p.rqd;
+       data = p.data;
+       dma_ppa_list = p.dma_ppa_list;
+       dma_meta_list = p.dma_meta_list;
+
+       left_ppas = line->cur_sec - r_ptr;
+       if (!left_ppas)
+               return 0;
+
+       r_ptr_int = r_ptr;
+
+next_read_rq:
+       memset(rqd, 0, pblk_r_rq_size);
+
+       rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+       if (!rq_ppas)
+               rq_ppas = pblk->min_write_pgs;
+       rq_len = rq_ppas * geo->sec_size;
+
+       bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+       if (IS_ERR(bio))
+               return PTR_ERR(bio);
+
+       bio->bi_iter.bi_sector = 0; /* internal bio */
+       bio_set_op_attrs(bio, REQ_OP_READ, 0);
+
+       rqd->bio = bio;
+       rqd->opcode = NVM_OP_PREAD;
+       rqd->flags = pblk_set_read_mode(pblk);
+       rqd->meta_list = meta_list;
+       rqd->nr_ppas = rq_ppas;
+       rqd->ppa_list = ppa_list;
+       rqd->dma_ppa_list = dma_ppa_list;
+       rqd->dma_meta_list = dma_meta_list;
+       rqd->end_io = pblk_end_io_sync;
+       rqd->private = &wait;
+
+       for (i = 0; i < rqd->nr_ppas; ) {
+               struct ppa_addr ppa;
+               int pos;
+
+               ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
+               pos = pblk_dev_ppa_to_pos(geo, ppa);
+
+               while (test_bit(pos, line->blk_bitmap)) {
+                       r_ptr_int += pblk->min_write_pgs;
+                       ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
+                       pos = pblk_dev_ppa_to_pos(geo, ppa);
+               }
+
+               for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++)
+                       rqd->ppa_list[i] =
+                               addr_to_gen_ppa(pblk, r_ptr_int, line->id);
+       }
+
+       /* If read fails, more padding is needed */
+       ret = pblk_submit_io(pblk, rqd);
+       if (ret) {
+               pr_err("pblk: I/O submission failed: %d\n", ret);
+               return ret;
+       }
+
+       if (!wait_for_completion_io_timeout(&wait,
+                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+               pr_err("pblk: L2P recovery read timed out\n");
+               return -EINTR;
+       }
+
+       reinit_completion(&wait);
+
+       /* At this point, the read should not fail. If it does, it is a problem
+        * we cannot recover from here. Need FTL log.
+        */
+       if (rqd->error) {
+               pr_err("pblk: L2P recovery failed (%d)\n", rqd->error);
+               return -EINTR;
+       }
+
+       for (i = 0; i < rqd->nr_ppas; i++) {
+               u64 lba = le64_to_cpu(meta_list[i].lba);
+
+               if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
+                       continue;
+
+               pblk_update_map(pblk, lba, rqd->ppa_list[i]);
+       }
+
+       left_ppas -= rq_ppas;
+       if (left_ppas > 0)
+               goto next_read_rq;
+
+       return 0;
+}
+
+static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
+                             struct pblk_recov_alloc p, int left_ppas)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct ppa_addr *ppa_list;
+       struct pblk_sec_meta *meta_list;
+       struct nvm_rq *rqd;
+       struct bio *bio;
+       void *data;
+       dma_addr_t dma_ppa_list, dma_meta_list;
+       __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
+       u64 w_ptr = line->cur_sec;
+       int left_line_ppas = line->left_msecs;
+       int rq_ppas, rq_len;
+       int i, j;
+       int ret = 0;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       ppa_list = p.ppa_list;
+       meta_list = p.meta_list;
+       rqd = p.rqd;
+       data = p.data;
+       dma_ppa_list = p.dma_ppa_list;
+       dma_meta_list = p.dma_meta_list;
+
+next_pad_rq:
+       rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+       if (!rq_ppas)
+               rq_ppas = pblk->min_write_pgs;
+       rq_len = rq_ppas * geo->sec_size;
+
+       bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+       if (IS_ERR(bio))
+               return PTR_ERR(bio);
+
+       bio->bi_iter.bi_sector = 0; /* internal bio */
+       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+
+       memset(rqd, 0, pblk_r_rq_size);
+
+       rqd->bio = bio;
+       rqd->opcode = NVM_OP_PWRITE;
+       rqd->flags = pblk_set_progr_mode(pblk, WRITE);
+       rqd->meta_list = meta_list;
+       rqd->nr_ppas = rq_ppas;
+       rqd->ppa_list = ppa_list;
+       rqd->dma_ppa_list = dma_ppa_list;
+       rqd->dma_meta_list = dma_meta_list;
+       rqd->end_io = pblk_end_io_sync;
+       rqd->private = &wait;
+
+       for (i = 0; i < rqd->nr_ppas; ) {
+               struct ppa_addr ppa;
+               int pos;
+
+               w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
+               ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id);
+               pos = pblk_ppa_to_pos(geo, ppa);
+
+               while (test_bit(pos, line->blk_bitmap)) {
+                       w_ptr += pblk->min_write_pgs;
+                       ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id);
+                       pos = pblk_ppa_to_pos(geo, ppa);
+               }
+
+               for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
+                       struct ppa_addr dev_ppa;
+
+                       dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
+
+                       pblk_map_invalidate(pblk, dev_ppa);
+                       meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
+                       lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
+                       rqd->ppa_list[i] = dev_ppa;
+               }
+       }
+
+       ret = pblk_submit_io(pblk, rqd);
+       if (ret) {
+               pr_err("pblk: I/O submission failed: %d\n", ret);
+               return ret;
+       }
+
+       if (!wait_for_completion_io_timeout(&wait,
+                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+               pr_err("pblk: L2P recovery write timed out\n");
+       }
+       reinit_completion(&wait);
+
+       left_line_ppas -= rq_ppas;
+       left_ppas -= rq_ppas;
+       if (left_ppas > 0 && left_line_ppas)
+               goto next_pad_rq;
+
+       return 0;
+}
+
+/* When this function is called, it means that not all upper pages have been
+ * written in a page that contains valid data. In order to recover this data, we
+ * first find the write pointer on the device, then we pad all necessary
+ * sectors, and finally attempt to read the valid data
+ */
+static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
+                                  struct pblk_recov_alloc p)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct ppa_addr *ppa_list;
+       struct pblk_sec_meta *meta_list;
+       struct nvm_rq *rqd;
+       struct bio *bio;
+       void *data;
+       dma_addr_t dma_ppa_list, dma_meta_list;
+       u64 w_ptr = 0, r_ptr;
+       int rq_ppas, rq_len;
+       int i, j;
+       int ret = 0;
+       int rec_round;
+       int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       ppa_list = p.ppa_list;
+       meta_list = p.meta_list;
+       rqd = p.rqd;
+       data = p.data;
+       dma_ppa_list = p.dma_ppa_list;
+       dma_meta_list = p.dma_meta_list;
+
+       /* we could recover up until the line write pointer */
+       r_ptr = line->cur_sec;
+       rec_round = 0;
+
+next_rq:
+       memset(rqd, 0, pblk_r_rq_size);
+
+       rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+       if (!rq_ppas)
+               rq_ppas = pblk->min_write_pgs;
+       rq_len = rq_ppas * geo->sec_size;
+
+       bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+       if (IS_ERR(bio))
+               return PTR_ERR(bio);
+
+       bio->bi_iter.bi_sector = 0; /* internal bio */
+       bio_set_op_attrs(bio, REQ_OP_READ, 0);
+
+       rqd->bio = bio;
+       rqd->opcode = NVM_OP_PREAD;
+       rqd->flags = pblk_set_read_mode(pblk);
+       rqd->meta_list = meta_list;
+       rqd->nr_ppas = rq_ppas;
+       rqd->ppa_list = ppa_list;
+       rqd->dma_ppa_list = dma_ppa_list;
+       rqd->dma_meta_list = dma_meta_list;
+       rqd->end_io = pblk_end_io_sync;
+       rqd->private = &wait;
+
+       for (i = 0; i < rqd->nr_ppas; ) {
+               struct ppa_addr ppa;
+               int pos;
+
+               w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
+               ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
+               pos = pblk_dev_ppa_to_pos(geo, ppa);
+
+               while (test_bit(pos, line->blk_bitmap)) {
+                       w_ptr += pblk->min_write_pgs;
+                       ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
+                       pos = pblk_dev_ppa_to_pos(geo, ppa);
+               }
+
+               for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++)
+                       rqd->ppa_list[i] =
+                               addr_to_gen_ppa(pblk, w_ptr, line->id);
+       }
+
+       ret = pblk_submit_io(pblk, rqd);
+       if (ret) {
+               pr_err("pblk: I/O submission failed: %d\n", ret);
+               return ret;
+       }
+
+       if (!wait_for_completion_io_timeout(&wait,
+                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+               pr_err("pblk: L2P recovery read timed out\n");
+       }
+       reinit_completion(&wait);
+
+       /* This should not happen since the read failed during normal recovery,
+        * but the media works funny sometimes...
+        */
+       if (!rec_round++ && !rqd->error) {
+               rec_round = 0;
+               for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) {
+                       u64 lba = le64_to_cpu(meta_list[i].lba);
+
+                       if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
+                               continue;
+
+                       pblk_update_map(pblk, lba, rqd->ppa_list[i]);
+               }
+       }
+
+       /* Reached the end of the written line */
+       if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
+               int pad_secs, nr_error_bits, bit;
+               int ret;
+
+               bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
+               nr_error_bits = rqd->nr_ppas - bit;
+
+               /* Roll back failed sectors */
+               line->cur_sec -= nr_error_bits;
+               line->left_msecs += nr_error_bits;
+               bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
+
+               pad_secs = pblk_pad_distance(pblk);
+               if (pad_secs > line->left_msecs)
+                       pad_secs = line->left_msecs;
+
+               ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
+               if (ret)
+                       pr_err("pblk: OOB padding failed (err:%d)\n", ret);
+
+               ret = pblk_recov_read_oob(pblk, line, p, r_ptr);
+               if (ret)
+                       pr_err("pblk: OOB read failed (err:%d)\n", ret);
+
+               line->left_ssecs = line->left_msecs;
+               left_ppas = 0;
+       }
+
+       left_ppas -= rq_ppas;
+       if (left_ppas > 0)
+               goto next_rq;
+
+       return ret;
+}
+
+static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
+                              struct pblk_recov_alloc p, int *done)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct ppa_addr *ppa_list;
+       struct pblk_sec_meta *meta_list;
+       struct nvm_rq *rqd;
+       struct bio *bio;
+       void *data;
+       dma_addr_t dma_ppa_list, dma_meta_list;
+       u64 paddr;
+       int rq_ppas, rq_len;
+       int i, j;
+       int ret = 0;
+       int left_ppas = pblk_calc_sec_in_line(pblk, line);
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       ppa_list = p.ppa_list;
+       meta_list = p.meta_list;
+       rqd = p.rqd;
+       data = p.data;
+       dma_ppa_list = p.dma_ppa_list;
+       dma_meta_list = p.dma_meta_list;
+
+       *done = 1;
+
+next_rq:
+       memset(rqd, 0, pblk_r_rq_size);
+
+       rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+       if (!rq_ppas)
+               rq_ppas = pblk->min_write_pgs;
+       rq_len = rq_ppas * geo->sec_size;
+
+       bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+       if (IS_ERR(bio))
+               return PTR_ERR(bio);
+
+       bio->bi_iter.bi_sector = 0; /* internal bio */
+       bio_set_op_attrs(bio, REQ_OP_READ, 0);
+
+       rqd->bio = bio;
+       rqd->opcode = NVM_OP_PREAD;
+       rqd->flags = pblk_set_read_mode(pblk);
+       rqd->meta_list = meta_list;
+       rqd->nr_ppas = rq_ppas;
+       rqd->ppa_list = ppa_list;
+       rqd->dma_ppa_list = dma_ppa_list;
+       rqd->dma_meta_list = dma_meta_list;
+       rqd->end_io = pblk_end_io_sync;
+       rqd->private = &wait;
+
+       for (i = 0; i < rqd->nr_ppas; ) {
+               struct ppa_addr ppa;
+               int pos;
+
+               paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
+               ppa = addr_to_gen_ppa(pblk, paddr, line->id);
+               pos = pblk_dev_ppa_to_pos(geo, ppa);
+
+               while (test_bit(pos, line->blk_bitmap)) {
+                       paddr += pblk->min_write_pgs;
+                       ppa = addr_to_gen_ppa(pblk, paddr, line->id);
+                       pos = pblk_dev_ppa_to_pos(geo, ppa);
+               }
+
+               for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++)
+                       rqd->ppa_list[i] =
+                               addr_to_gen_ppa(pblk, paddr, line->id);
+       }
+
+       ret = pblk_submit_io(pblk, rqd);
+       if (ret) {
+               pr_err("pblk: I/O submission failed: %d\n", ret);
+               bio_put(bio);
+               return ret;
+       }
+
+       if (!wait_for_completion_io_timeout(&wait,
+                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+               pr_err("pblk: L2P recovery read timed out\n");
+       }
+       reinit_completion(&wait);
+
+       /* Reached the end of the written line */
+       if (rqd->error) {
+               int nr_error_bits, bit;
+
+               bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
+               nr_error_bits = rqd->nr_ppas - bit;
+
+               /* Roll back failed sectors */
+               line->cur_sec -= nr_error_bits;
+               line->left_msecs += nr_error_bits;
+               line->left_ssecs = line->left_msecs;
+               bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
+
+               left_ppas = 0;
+               rqd->nr_ppas = bit;
+
+               if (rqd->error != NVM_RSP_ERR_EMPTYPAGE)
+                       *done = 0;
+       }
+
+       for (i = 0; i < rqd->nr_ppas; i++) {
+               u64 lba = le64_to_cpu(meta_list[i].lba);
+
+               if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
+                       continue;
+
+               pblk_update_map(pblk, lba, rqd->ppa_list[i]);
+       }
+
+       left_ppas -= rq_ppas;
+       if (left_ppas > 0)
+               goto next_rq;
+
+       return ret;
+}
+
+/* Scan line for lbas on out of bound area */
+static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct nvm_rq *rqd;
+       struct ppa_addr *ppa_list;
+       struct pblk_sec_meta *meta_list;
+       struct pblk_recov_alloc p;
+       void *data;
+       dma_addr_t dma_ppa_list, dma_meta_list;
+       int done, ret = 0;
+
+       rqd = pblk_alloc_rqd(pblk, READ);
+       if (IS_ERR(rqd))
+               return PTR_ERR(rqd);
+
+       meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+       if (!meta_list) {
+               ret = -ENOMEM;
+               goto free_rqd;
+       }
+
+       ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+       dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+       data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
+       if (!data) {
+               ret = -ENOMEM;
+               goto free_meta_list;
+       }
+
+       p.ppa_list = ppa_list;
+       p.meta_list = meta_list;
+       p.rqd = rqd;
+       p.data = data;
+       p.dma_ppa_list = dma_ppa_list;
+       p.dma_meta_list = dma_meta_list;
+
+       ret = pblk_recov_scan_oob(pblk, line, p, &done);
+       if (ret) {
+               pr_err("pblk: could not recover L2P from OOB\n");
+               goto out;
+       }
+
+       if (!done) {
+               ret = pblk_recov_scan_all_oob(pblk, line, p);
+               if (ret) {
+                       pr_err("pblk: could not recover L2P from OOB\n");
+                       goto out;
+               }
+       }
+
+       if (pblk_line_is_full(line))
+               pblk_line_recov_close(pblk, line);
+
+out:
+       kfree(data);
+free_meta_list:
+       nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+free_rqd:
+       pblk_free_rqd(pblk, rqd, READ);
+
+       return ret;
+}
+
+/* Insert lines ordered by sequence number (seq_num) on list */
+static void pblk_recov_line_add_ordered(struct list_head *head,
+                                       struct pblk_line *line)
+{
+       struct pblk_line *t = NULL;
+
+       list_for_each_entry(t, head, list)
+               if (t->seq_nr > line->seq_nr)
+                       break;
+
+       __list_add(&line->list, t->list.prev, &t->list);
+}
+
+struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line *line, *tline, *data_line = NULL;
+       struct line_smeta *smeta;
+       struct line_emeta *emeta;
+       int found_lines = 0, recovered_lines = 0, open_lines = 0;
+       int is_next = 0;
+       int meta_line;
+       int i, valid_uuid = 0;
+       LIST_HEAD(recov_list);
+
+       /* TODO: Implement FTL snapshot */
+
+       /* Scan recovery - takes place when FTL snapshot fails */
+       spin_lock(&l_mg->free_lock);
+       meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+       set_bit(meta_line, &l_mg->meta_bitmap);
+       smeta = l_mg->sline_meta[meta_line].meta;
+       emeta = l_mg->eline_meta[meta_line].meta;
+       spin_unlock(&l_mg->free_lock);
+
+       /* Order data lines using their sequence number */
+       for (i = 0; i < l_mg->nr_lines; i++) {
+               u32 crc;
+
+               line = &pblk->lines[i];
+
+               memset(smeta, 0, lm->smeta_len);
+               line->smeta = smeta;
+               line->lun_bitmap = ((void *)(smeta)) +
+                                               sizeof(struct line_smeta);
+
+               /* Lines that cannot be read are assumed as not written here */
+               if (pblk_line_read_smeta(pblk, line))
+                       continue;
+
+               crc = pblk_calc_smeta_crc(pblk, smeta);
+               if (le32_to_cpu(smeta->crc) != crc)
+                       continue;
+
+               if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
+                       continue;
+
+               if (le16_to_cpu(smeta->header.version) != 1) {
+                       pr_err("pblk: found incompatible line version %u\n",
+                                       smeta->header.version);
+                       return ERR_PTR(-EINVAL);
+               }
+
+               /* The first valid instance uuid is used for initialization */
+               if (!valid_uuid) {
+                       memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
+                       valid_uuid = 1;
+               }
+
+               if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
+                       pr_debug("pblk: ignore line %u due to uuid mismatch\n",
+                                       i);
+                       continue;
+               }
+
+               /* Update line metadata */
+               spin_lock(&line->lock);
+               line->id = le32_to_cpu(line->smeta->header.id);
+               line->type = le16_to_cpu(line->smeta->header.type);
+               line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
+               spin_unlock(&line->lock);
+
+               /* Update general metadata */
+               spin_lock(&l_mg->free_lock);
+               if (line->seq_nr >= l_mg->d_seq_nr)
+                       l_mg->d_seq_nr = line->seq_nr + 1;
+               l_mg->nr_free_lines--;
+               spin_unlock(&l_mg->free_lock);
+
+               if (pblk_line_recov_alloc(pblk, line))
+                       goto out;
+
+               pblk_recov_line_add_ordered(&recov_list, line);
+               found_lines++;
+               pr_debug("pblk: recovering data line %d, seq:%llu\n",
+                                               line->id, smeta->seq_nr);
+       }
+
+       if (!found_lines) {
+               pblk_setup_uuid(pblk);
+
+               spin_lock(&l_mg->free_lock);
+               WARN_ON_ONCE(!test_and_clear_bit(meta_line,
+                                                       &l_mg->meta_bitmap));
+               spin_unlock(&l_mg->free_lock);
+
+               goto out;
+       }
+
+       /* Verify closed blocks and recover this portion of L2P table*/
+       list_for_each_entry_safe(line, tline, &recov_list, list) {
+               int off, nr_bb;
+
+               recovered_lines++;
+               /* Calculate where emeta starts based on the line bb */
+               off = lm->sec_per_line - lm->emeta_sec;
+               nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
+               off -= nr_bb * geo->sec_per_pl;
+
+               memset(emeta, 0, lm->emeta_len);
+               line->emeta = emeta;
+               line->emeta_ssec = off;
+
+               if (pblk_line_read_emeta(pblk, line)) {
+                       pblk_recov_l2p_from_oob(pblk, line);
+                       goto next;
+               }
+
+               if (pblk_recov_l2p_from_emeta(pblk, line))
+                       pblk_recov_l2p_from_oob(pblk, line);
+
+next:
+               if (pblk_line_is_full(line)) {
+                       struct list_head *move_list;
+
+                       spin_lock(&line->lock);
+                       line->state = PBLK_LINESTATE_CLOSED;
+                       move_list = pblk_line_gc_list(pblk, line);
+                       spin_unlock(&line->lock);
+
+                       spin_lock(&l_mg->gc_lock);
+                       list_move_tail(&line->list, move_list);
+                       spin_unlock(&l_mg->gc_lock);
+
+                       mempool_free(line->map_bitmap, pblk->line_meta_pool);
+                       line->map_bitmap = NULL;
+                       line->smeta = NULL;
+                       line->emeta = NULL;
+               } else {
+                       if (open_lines > 1)
+                               pr_err("pblk: failed to recover L2P\n");
+
+                       open_lines++;
+                       line->meta_line = meta_line;
+                       data_line = line;
+               }
+       }
+
+       spin_lock(&l_mg->free_lock);
+       if (!open_lines) {
+               WARN_ON_ONCE(!test_and_clear_bit(meta_line,
+                                                       &l_mg->meta_bitmap));
+               pblk_line_replace_data(pblk);
+       } else {
+               /* Allocate next line for preparation */
+               l_mg->data_next = pblk_line_get(pblk);
+               if (l_mg->data_next) {
+                       l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+                       l_mg->data_next->type = PBLK_LINETYPE_DATA;
+                       is_next = 1;
+               }
+       }
+       spin_unlock(&l_mg->free_lock);
+
+       if (is_next) {
+               pblk_line_erase(pblk, l_mg->data_next);
+               pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
+       }
+
+out:
+       if (found_lines != recovered_lines)
+               pr_err("pblk: failed to recover all found lines %d/%d\n",
+                                               found_lines, recovered_lines);
+
+       return data_line;
+}
+
+/*
+ * Pad until smeta can be read on current data line
+ */
+void pblk_recov_pad(struct pblk *pblk)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line *line;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct nvm_rq *rqd;
+       struct pblk_recov_alloc p;
+       struct ppa_addr *ppa_list;
+       struct pblk_sec_meta *meta_list;
+       void *data;
+       dma_addr_t dma_ppa_list, dma_meta_list;
+
+       spin_lock(&l_mg->free_lock);
+       line = l_mg->data_line;
+       spin_unlock(&l_mg->free_lock);
+
+       rqd = pblk_alloc_rqd(pblk, READ);
+       if (IS_ERR(rqd))
+               return;
+
+       meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+       if (!meta_list)
+               goto free_rqd;
+
+       ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+       dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+       data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
+       if (!data)
+               goto free_meta_list;
+
+       p.ppa_list = ppa_list;
+       p.meta_list = meta_list;
+       p.rqd = rqd;
+       p.data = data;
+       p.dma_ppa_list = dma_ppa_list;
+       p.dma_meta_list = dma_meta_list;
+
+       if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
+               pr_err("pblk: Tear down padding failed\n");
+               goto free_data;
+       }
+
+       pblk_line_close(pblk, line);
+
+free_data:
+       kfree(data);
+free_meta_list:
+       nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+free_rqd:
+       pblk_free_rqd(pblk, rqd, READ);
+}
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c

new file mode 100644 (file)

index 0000000..ab7cbb1
--- /dev/null
+++ b/drivers/lightnvm/pblk-rl.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-rl.c - pblk's rate limiter for user I/O
+ *
+ */
+
+#include "pblk.h"
+
+static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
+{
+       mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
+}
+
+int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
+{
+       int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
+
+       return (!(rb_user_cnt + nr_entries > rl->rb_user_max));
+}
+
+int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
+{
+       int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt);
+       int rb_user_active;
+
+       /* If there is no user I/O let GC take over space on the write buffer */
+       rb_user_active = READ_ONCE(rl->rb_user_active);
+       return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active));
+}
+
+void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
+{
+       atomic_add(nr_entries, &rl->rb_user_cnt);
+
+       /* Release user I/O state. Protect from GC */
+       smp_store_release(&rl->rb_user_active, 1);
+       pblk_rl_kick_u_timer(rl);
+}
+
+void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
+{
+       atomic_add(nr_entries, &rl->rb_gc_cnt);
+}
+
+void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc)
+{
+       atomic_sub(nr_user, &rl->rb_user_cnt);
+       atomic_sub(nr_gc, &rl->rb_gc_cnt);
+}
+
+unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl)
+{
+       return atomic_read(&rl->free_blocks);
+}
+
+/*
+ * We check for (i) the number of free blocks in the current LUN and (ii) the
+ * total number of free blocks in the pblk instance. This is to even out the
+ * number of free blocks on each LUN when GC kicks in.
+ *
+ * Only the total number of free blocks is used to configure the rate limiter.
+ */
+static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
+{
+       unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
+
+       if (free_blocks >= rl->high) {
+               rl->rb_user_max = max - rl->rb_gc_rsv;
+               rl->rb_gc_max = rl->rb_gc_rsv;
+               rl->rb_state = PBLK_RL_HIGH;
+       } else if (free_blocks < rl->high) {
+               int shift = rl->high_pw - rl->rb_windows_pw;
+               int user_windows = free_blocks >> shift;
+               int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
+               int gc_max;
+
+               rl->rb_user_max = user_max;
+               gc_max = max - rl->rb_user_max;
+               rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
+
+               if (free_blocks > rl->low)
+                       rl->rb_state = PBLK_RL_MID;
+               else
+                       rl->rb_state = PBLK_RL_LOW;
+       }
+
+       return rl->rb_state;
+}
+
+void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
+{
+       rl->rb_gc_rsv = rl->rb_gc_max = rsv;
+}
+
+void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
+{
+       struct pblk *pblk = container_of(rl, struct pblk, rl);
+       int blk_in_line = atomic_read(&line->blk_in_line);
+       int ret;
+
+       atomic_add(blk_in_line, &rl->free_blocks);
+       /* Rates will not change that often - no need to lock update */
+       ret = pblk_rl_update_rates(rl, rl->rb_budget);
+
+       if (ret == (PBLK_RL_MID | PBLK_RL_LOW))
+               pblk_gc_should_start(pblk);
+       else
+               pblk_gc_should_stop(pblk);
+}
+
+void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
+{
+       struct pblk *pblk = container_of(rl, struct pblk, rl);
+       int blk_in_line = atomic_read(&line->blk_in_line);
+       int ret;
+
+       atomic_sub(blk_in_line, &rl->free_blocks);
+
+       /* Rates will not change that often - no need to lock update */
+       ret = pblk_rl_update_rates(rl, rl->rb_budget);
+       if (ret == (PBLK_RL_MID | PBLK_RL_LOW))
+               pblk_gc_should_start(pblk);
+       else
+               pblk_gc_should_stop(pblk);
+}
+
+int pblk_rl_gc_thrs(struct pblk_rl *rl)
+{
+       return rl->high;
+}
+
+int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
+{
+       return rl->rb_user_max;
+}
+
+static void pblk_rl_u_timer(unsigned long data)
+{
+       struct pblk_rl *rl = (struct pblk_rl *)data;
+
+       /* Release user I/O state. Protect from GC */
+       smp_store_release(&rl->rb_user_active, 0);
+}
+
+void pblk_rl_free(struct pblk_rl *rl)
+{
+       del_timer(&rl->u_timer);
+}
+
+void pblk_rl_init(struct pblk_rl *rl, int budget)
+{
+       unsigned int rb_windows;
+
+       rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
+       rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
+       rl->high_pw = get_count_order(rl->high);
+
+       /* This will always be a power-of-2 */
+       rb_windows = budget / PBLK_MAX_REQ_ADDRS;
+       rl->rb_windows_pw = get_count_order(rb_windows) + 1;
+
+       /* To start with, all buffer is available to user I/O writers */
+       rl->rb_budget = budget;
+       rl->rb_user_max = budget;
+       atomic_set(&rl->rb_user_cnt, 0);
+       rl->rb_gc_max = 0;
+       rl->rb_state = PBLK_RL_HIGH;
+       atomic_set(&rl->rb_gc_cnt, 0);
+
+       setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
+       rl->rb_user_active = 0;
+}
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c

new file mode 100644 (file)

index 0000000..f0af1d1
--- /dev/null
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Implementation of a physical block-device target for Open-channel SSDs.
+ *
+ * pblk-sysfs.c - pblk's sysfs
+ *
+ */
+
+#include "pblk.h"
+
+static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_lun *rlun;
+       ssize_t sz = 0;
+       int i;
+
+       for (i = 0; i < geo->nr_luns; i++) {
+               int active = 1;
+
+               rlun = &pblk->luns[i];
+               if (!down_trylock(&rlun->wr_sem)) {
+                       active = 0;
+                       up(&rlun->wr_sem);
+               }
+               sz += snprintf(page + sz, PAGE_SIZE - sz,
+                               "pblk: pos:%d, ch:%d, lun:%d - %d\n",
+                                       i,
+                                       rlun->bppa.g.ch,
+                                       rlun->bppa.g.lun,
+                                       active);
+       }
+
+       return sz;
+}
+
+static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       int free_blocks, total_blocks;
+       int rb_user_max, rb_user_cnt;
+       int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state;
+
+       free_blocks = atomic_read(&pblk->rl.free_blocks);
+       rb_user_max = pblk->rl.rb_user_max;
+       rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
+       rb_gc_max = pblk->rl.rb_gc_max;
+       rb_gc_rsv = pblk->rl.rb_gc_rsv;
+       rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
+       rb_budget = pblk->rl.rb_budget;
+       rb_state = pblk->rl.rb_state;
+
+       total_blocks = geo->blks_per_lun * geo->nr_luns;
+
+       return snprintf(page, PAGE_SIZE,
+               "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
+                               rb_user_cnt,
+                               rb_user_max,
+                               rb_gc_cnt,
+                               rb_gc_max,
+                               rb_gc_rsv,
+                               rb_state,
+                               rb_budget,
+                               pblk->rl.low,
+                               pblk->rl.high,
+                               free_blocks,
+                               total_blocks,
+                               READ_ONCE(pblk->rl.rb_user_active));
+}
+
+static ssize_t pblk_sysfs_gc_state_show(struct pblk *pblk, char *page)
+{
+       int gc_enabled, gc_active;
+
+       pblk_gc_sysfs_state_show(pblk, &gc_enabled, &gc_active);
+       return snprintf(page, PAGE_SIZE, "gc_enabled=%d, gc_active=%d\n",
+                                       gc_enabled, gc_active);
+}
+
+static ssize_t pblk_sysfs_stats(struct pblk *pblk, char *page)
+{
+       ssize_t sz;
+
+       sz = snprintf(page, PAGE_SIZE,
+                       "read_failed=%lu, read_high_ecc=%lu, read_empty=%lu, read_failed_gc=%lu, write_failed=%lu, erase_failed=%lu\n",
+                       atomic_long_read(&pblk->read_failed),
+                       atomic_long_read(&pblk->read_high_ecc),
+                       atomic_long_read(&pblk->read_empty),
+                       atomic_long_read(&pblk->read_failed_gc),
+                       atomic_long_read(&pblk->write_failed),
+                       atomic_long_read(&pblk->erase_failed));
+
+       return sz;
+}
+
+static ssize_t pblk_sysfs_write_buffer(struct pblk *pblk, char *page)
+{
+       return pblk_rb_sysfs(&pblk->rwb, page);
+}
+
+static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       ssize_t sz = 0;
+
+       sz = snprintf(page, PAGE_SIZE - sz,
+               "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
+               pblk->ppaf_bitsize,
+               pblk->ppaf.blk_offset, geo->ppaf.blk_len,
+               pblk->ppaf.pg_offset, geo->ppaf.pg_len,
+               pblk->ppaf.lun_offset, geo->ppaf.lun_len,
+               pblk->ppaf.ch_offset, geo->ppaf.ch_len,
+               pblk->ppaf.pln_offset, geo->ppaf.pln_len,
+               pblk->ppaf.sec_offset, geo->ppaf.sect_len);
+
+       sz += snprintf(page + sz, PAGE_SIZE - sz,
+               "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
+               geo->ppaf.blk_offset, geo->ppaf.blk_len,
+               geo->ppaf.pg_offset, geo->ppaf.pg_len,
+               geo->ppaf.lun_offset, geo->ppaf.lun_len,
+               geo->ppaf.ch_offset, geo->ppaf.ch_len,
+               geo->ppaf.pln_offset, geo->ppaf.pln_len,
+               geo->ppaf.sect_offset, geo->ppaf.sect_len);
+
+       return sz;
+}
+
+static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line *line;
+       ssize_t sz = 0;
+       int nr_free_lines;
+       int cur_data, cur_log;
+       int free_line_cnt = 0, closed_line_cnt = 0;
+       int d_line_cnt = 0, l_line_cnt = 0;
+       int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
+       int free = 0, bad = 0, cor = 0;
+       int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
+       int map_weight = 0, meta_weight = 0;
+
+       spin_lock(&l_mg->free_lock);
+       cur_data = (l_mg->data_line) ? l_mg->data_line->id : -1;
+       cur_log = (l_mg->log_line) ? l_mg->log_line->id : -1;
+       nr_free_lines = l_mg->nr_free_lines;
+
+       list_for_each_entry(line, &l_mg->free_list, list)
+               free_line_cnt++;
+       spin_unlock(&l_mg->free_lock);
+
+       spin_lock(&l_mg->gc_lock);
+       list_for_each_entry(line, &l_mg->gc_full_list, list) {
+               if (line->type == PBLK_LINETYPE_DATA)
+                       d_line_cnt++;
+               else if (line->type == PBLK_LINETYPE_LOG)
+                       l_line_cnt++;
+               closed_line_cnt++;
+               gc_full++;
+       }
+
+       list_for_each_entry(line, &l_mg->gc_high_list, list) {
+               if (line->type == PBLK_LINETYPE_DATA)
+                       d_line_cnt++;
+               else if (line->type == PBLK_LINETYPE_LOG)
+                       l_line_cnt++;
+               closed_line_cnt++;
+               gc_high++;
+       }
+
+       list_for_each_entry(line, &l_mg->gc_mid_list, list) {
+               if (line->type == PBLK_LINETYPE_DATA)
+                       d_line_cnt++;
+               else if (line->type == PBLK_LINETYPE_LOG)
+                       l_line_cnt++;
+               closed_line_cnt++;
+               gc_mid++;
+       }
+
+       list_for_each_entry(line, &l_mg->gc_low_list, list) {
+               if (line->type == PBLK_LINETYPE_DATA)
+                       d_line_cnt++;
+               else if (line->type == PBLK_LINETYPE_LOG)
+                       l_line_cnt++;
+               closed_line_cnt++;
+               gc_low++;
+       }
+
+       list_for_each_entry(line, &l_mg->gc_empty_list, list) {
+               if (line->type == PBLK_LINETYPE_DATA)
+                       d_line_cnt++;
+               else if (line->type == PBLK_LINETYPE_LOG)
+                       l_line_cnt++;
+               closed_line_cnt++;
+               gc_empty++;
+       }
+
+       list_for_each_entry(line, &l_mg->free_list, list)
+               free++;
+       list_for_each_entry(line, &l_mg->bad_list, list)
+               bad++;
+       list_for_each_entry(line, &l_mg->corrupt_list, list)
+               cor++;
+       spin_unlock(&l_mg->gc_lock);
+
+       spin_lock(&l_mg->free_lock);
+       if (l_mg->data_line) {
+               cur_sec = l_mg->data_line->cur_sec;
+               msecs = l_mg->data_line->left_msecs;
+               ssecs = l_mg->data_line->left_ssecs;
+               vsc = l_mg->data_line->vsc;
+               sec_in_line = l_mg->data_line->sec_in_line;
+               meta_weight = bitmap_weight(&l_mg->meta_bitmap,
+                                                       PBLK_DATA_LINES);
+               map_weight = bitmap_weight(l_mg->data_line->map_bitmap,
+                                                       lm->sec_per_line);
+       }
+       spin_unlock(&l_mg->free_lock);
+
+       if (nr_free_lines != free_line_cnt)
+               pr_err("pblk: corrupted free line list\n");
+
+       sz = snprintf(page, PAGE_SIZE - sz,
+               "line: nluns:%d, nblks:%d, nsecs:%d\n",
+               geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
+
+       sz += snprintf(page + sz, PAGE_SIZE - sz,
+               "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n",
+                                       cur_data, cur_log,
+                                       free, nr_free_lines, bad, cor,
+                                       closed_line_cnt,
+                                       d_line_cnt, l_line_cnt,
+                                       l_mg->nr_lines);
+
+       sz += snprintf(page + sz, PAGE_SIZE - sz,
+               "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n",
+                       gc_full, gc_high, gc_mid, gc_low, gc_empty,
+                       atomic_read(&pblk->gc.inflight_gc));
+
+       sz += snprintf(page + sz, PAGE_SIZE - sz,
+               "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
+                       cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line,
+                       map_weight, lm->sec_per_line, meta_weight);
+
+       return sz;
+}
+
+static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_meta *lm = &pblk->lm;
+       ssize_t sz = 0;
+
+       sz = snprintf(page, PAGE_SIZE - sz,
+                               "smeta - len:%d, secs:%d\n",
+                                       lm->smeta_len, lm->smeta_sec);
+       sz += snprintf(page + sz, PAGE_SIZE - sz,
+                               "emeta - len:%d, sec:%d, bb_start:%d\n",
+                                       lm->emeta_len, lm->emeta_sec,
+                                       lm->emeta_bb);
+       sz += snprintf(page + sz, PAGE_SIZE - sz,
+                               "bitmap lengths: sec:%d, blk:%d, lun:%d\n",
+                                       lm->sec_bitmap_len,
+                                       lm->blk_bitmap_len,
+                                       lm->lun_bitmap_len);
+       sz += snprintf(page + sz, PAGE_SIZE - sz,
+                               "blk_line:%d, sec_line:%d, sec_blk:%d\n",
+                                       lm->blk_per_line,
+                                       lm->sec_per_line,
+                                       geo->sec_per_blk);
+
+       return sz;
+}
+
+#ifdef CONFIG_NVM_DEBUG
+static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
+{
+       return snprintf(page, PAGE_SIZE,
+               "%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
+                       atomic_long_read(&pblk->inflight_writes),
+                       atomic_long_read(&pblk->inflight_reads),
+                       atomic_long_read(&pblk->req_writes),
+                       atomic_long_read(&pblk->nr_flush),
+                       atomic_long_read(&pblk->padded_writes),
+                       atomic_long_read(&pblk->padded_wb),
+                       atomic_long_read(&pblk->sub_writes),
+                       atomic_long_read(&pblk->sync_writes),
+                       atomic_long_read(&pblk->compl_writes),
+                       atomic_long_read(&pblk->recov_writes),
+                       atomic_long_read(&pblk->recov_gc_writes),
+                       atomic_long_read(&pblk->recov_gc_reads),
+                       atomic_long_read(&pblk->sync_reads));
+}
+#endif
+
+static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
+                                    size_t len)
+{
+       struct pblk_gc *gc = &pblk->gc;
+       size_t c_len;
+       int value;
+
+       c_len = strcspn(page, "\n");
+       if (c_len >= len)
+               return -EINVAL;
+
+       if (kstrtouint(page, 0, &value))
+               return -EINVAL;
+
+       spin_lock(&gc->lock);
+       pblk_rl_set_gc_rsc(&pblk->rl, value);
+       spin_unlock(&gc->lock);
+
+       return len;
+}
+
+static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
+                                  size_t len)
+{
+       size_t c_len;
+       int force;
+
+       c_len = strcspn(page, "\n");
+       if (c_len >= len)
+               return -EINVAL;
+
+       if (kstrtouint(page, 0, &force))
+               return -EINVAL;
+
+       if (force < 0 || force > 1)
+               return -EINVAL;
+
+       pblk_gc_sysfs_force(pblk, force);
+
+       return len;
+}
+
+static struct attribute sys_write_luns = {
+       .name = "write_luns",
+       .mode = 0444,
+};
+
+static struct attribute sys_rate_limiter_attr = {
+       .name = "rate_limiter",
+       .mode = 0444,
+};
+
+static struct attribute sys_gc_state = {
+       .name = "gc_state",
+       .mode = 0444,
+};
+
+static struct attribute sys_errors_attr = {
+       .name = "errors",
+       .mode = 0444,
+};
+
+static struct attribute sys_rb_attr = {
+       .name = "write_buffer",
+       .mode = 0444,
+};
+
+static struct attribute sys_stats_ppaf_attr = {
+       .name = "ppa_format",
+       .mode = 0444,
+};
+
+static struct attribute sys_lines_attr = {
+       .name = "lines",
+       .mode = 0444,
+};
+
+static struct attribute sys_lines_info_attr = {
+       .name = "lines_info",
+       .mode = 0444,
+};
+
+static struct attribute sys_gc_force = {
+       .name = "gc_force",
+       .mode = 0200,
+};
+
+static struct attribute sys_gc_rl_max = {
+       .name = "gc_rl_max",
+       .mode = 0200,
+};
+
+#ifdef CONFIG_NVM_DEBUG
+static struct attribute sys_stats_debug_attr = {
+       .name = "stats",
+       .mode = 0444,
+};
+#endif
+
+static struct attribute *pblk_attrs[] = {
+       &sys_write_luns,
+       &sys_rate_limiter_attr,
+       &sys_errors_attr,
+       &sys_gc_state,
+       &sys_gc_force,
+       &sys_gc_rl_max,
+       &sys_rb_attr,
+       &sys_stats_ppaf_attr,
+       &sys_lines_attr,
+       &sys_lines_info_attr,
+#ifdef CONFIG_NVM_DEBUG
+       &sys_stats_debug_attr,
+#endif
+       NULL,
+};
+
+static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
+                              char *buf)
+{
+       struct pblk *pblk = container_of(kobj, struct pblk, kobj);
+
+       if (strcmp(attr->name, "rate_limiter") == 0)
+               return pblk_sysfs_rate_limiter(pblk, buf);
+       else if (strcmp(attr->name, "write_luns") == 0)
+               return pblk_sysfs_luns_show(pblk, buf);
+       else if (strcmp(attr->name, "gc_state") == 0)
+               return pblk_sysfs_gc_state_show(pblk, buf);
+       else if (strcmp(attr->name, "errors") == 0)
+               return pblk_sysfs_stats(pblk, buf);
+       else if (strcmp(attr->name, "write_buffer") == 0)
+               return pblk_sysfs_write_buffer(pblk, buf);
+       else if (strcmp(attr->name, "ppa_format") == 0)
+               return pblk_sysfs_ppaf(pblk, buf);
+       else if (strcmp(attr->name, "lines") == 0)
+               return pblk_sysfs_lines(pblk, buf);
+       else if (strcmp(attr->name, "lines_info") == 0)
+               return pblk_sysfs_lines_info(pblk, buf);
+#ifdef CONFIG_NVM_DEBUG
+       else if (strcmp(attr->name, "stats") == 0)
+               return pblk_sysfs_stats_debug(pblk, buf);
+#endif
+       return 0;
+}
+
+static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
+                               const char *buf, size_t len)
+{
+       struct pblk *pblk = container_of(kobj, struct pblk, kobj);
+
+       if (strcmp(attr->name, "gc_rl_max") == 0)
+               return pblk_sysfs_rate_store(pblk, buf, len);
+       else if (strcmp(attr->name, "gc_force") == 0)
+               return pblk_sysfs_gc_force(pblk, buf, len);
+
+       return 0;
+}
+
+static const struct sysfs_ops pblk_sysfs_ops = {
+       .show = pblk_sysfs_show,
+       .store = pblk_sysfs_store,
+};
+
+static struct kobj_type pblk_ktype = {
+       .sysfs_ops      = &pblk_sysfs_ops,
+       .default_attrs  = pblk_attrs,
+};
+
+int pblk_sysfs_init(struct gendisk *tdisk)
+{
+       struct pblk *pblk = tdisk->private_data;
+       struct device *parent_dev = disk_to_dev(pblk->disk);
+       int ret;
+
+       ret = kobject_init_and_add(&pblk->kobj, &pblk_ktype,
+                                       kobject_get(&parent_dev->kobj),
+                                       "%s", "pblk");
+       if (ret) {
+               pr_err("pblk: could not register %s/pblk\n",
+                                               tdisk->disk_name);
+               return ret;
+       }
+
+       kobject_uevent(&pblk->kobj, KOBJ_ADD);
+       return 0;
+}
+
+void pblk_sysfs_exit(struct gendisk *tdisk)
+{
+       struct pblk *pblk = tdisk->private_data;
+
+       kobject_uevent(&pblk->kobj, KOBJ_REMOVE);
+       kobject_del(&pblk->kobj);
+       kobject_put(&pblk->kobj);
+}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c

new file mode 100644 (file)

index 0000000..aef6fd7
--- /dev/null
+++ b/drivers/lightnvm/pblk-write.c
@@ -0,0 +1,414 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-write.c - pblk's write path from write buffer to media
+ */
+
+#include "pblk.h"
+
+static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
+{
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_inc(&pblk->sync_writes);
+#endif
+
+       /* Counter protected by rb sync lock */
+       line->left_ssecs--;
+       if (!line->left_ssecs)
+               pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
+}
+
+static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
+                                   struct pblk_c_ctx *c_ctx)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct bio *original_bio;
+       unsigned long ret;
+       int i;
+
+       for (i = 0; i < c_ctx->nr_valid; i++) {
+               struct pblk_w_ctx *w_ctx;
+               struct ppa_addr p;
+               struct pblk_line *line;
+
+               w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
+
+               p = rqd->ppa_list[i];
+               line = &pblk->lines[pblk_dev_ppa_to_line(p)];
+               pblk_sync_line(pblk, line);
+
+               while ((original_bio = bio_list_pop(&w_ctx->bios)))
+                       bio_endio(original_bio);
+       }
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes);
+#endif
+
+       ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
+
+       if (rqd->meta_list)
+               nvm_dev_dma_free(dev->parent, rqd->meta_list,
+                                                       rqd->dma_meta_list);
+
+       bio_put(rqd->bio);
+       pblk_free_rqd(pblk, rqd, WRITE);
+
+       return ret;
+}
+
+static unsigned long pblk_end_queued_w_bio(struct pblk *pblk,
+                                          struct nvm_rq *rqd,
+                                          struct pblk_c_ctx *c_ctx)
+{
+       list_del(&c_ctx->list);
+       return pblk_end_w_bio(pblk, rqd, c_ctx);
+}
+
+static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd,
+                               struct pblk_c_ctx *c_ctx)
+{
+       struct pblk_c_ctx *c, *r;
+       unsigned long flags;
+       unsigned long pos;
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes);
+#endif
+
+       pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap);
+
+       pos = pblk_rb_sync_init(&pblk->rwb, &flags);
+       if (pos == c_ctx->sentry) {
+               pos = pblk_end_w_bio(pblk, rqd, c_ctx);
+
+retry:
+               list_for_each_entry_safe(c, r, &pblk->compl_list, list) {
+                       rqd = nvm_rq_from_c_ctx(c);
+                       if (c->sentry == pos) {
+                               pos = pblk_end_queued_w_bio(pblk, rqd, c);
+                               goto retry;
+                       }
+               }
+       } else {
+               WARN_ON(nvm_rq_from_c_ctx(c_ctx) != rqd);
+               list_add_tail(&c_ctx->list, &pblk->compl_list);
+       }
+       pblk_rb_sync_end(&pblk->rwb, &flags);
+}
+
+/* When a write fails, we are not sure whether the block has grown bad or a page
+ * range is more susceptible to write errors. If a high number of pages fail, we
+ * assume that the block is bad and we mark it accordingly. In all cases, we
+ * remap and resubmit the failed entries as fast as possible; if a flush is
+ * waiting on a completion, the whole stack would stall otherwise.
+ */
+static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
+{
+       void *comp_bits = &rqd->ppa_status;
+       struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+       struct pblk_rec_ctx *recovery;
+       struct ppa_addr *ppa_list = rqd->ppa_list;
+       int nr_ppas = rqd->nr_ppas;
+       unsigned int c_entries;
+       int bit, ret;
+
+       if (unlikely(nr_ppas == 1))
+               ppa_list = &rqd->ppa_addr;
+
+       recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC);
+       if (!recovery) {
+               pr_err("pblk: could not allocate recovery context\n");
+               return;
+       }
+       INIT_LIST_HEAD(&recovery->failed);
+
+       bit = -1;
+       while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) {
+               struct pblk_rb_entry *entry;
+               struct ppa_addr ppa;
+
+               /* Logic error */
+               if (bit > c_ctx->nr_valid) {
+                       WARN_ONCE(1, "pblk: corrupted write request\n");
+                       mempool_free(recovery, pblk->rec_pool);
+                       goto out;
+               }
+
+               ppa = ppa_list[bit];
+               entry = pblk_rb_sync_scan_entry(&pblk->rwb, &ppa);
+               if (!entry) {
+                       pr_err("pblk: could not scan entry on write failure\n");
+                       mempool_free(recovery, pblk->rec_pool);
+                       goto out;
+               }
+
+               /* The list is filled first and emptied afterwards. No need for
+                * protecting it with a lock
+                */
+               list_add_tail(&entry->index, &recovery->failed);
+       }
+
+       c_entries = find_first_bit(comp_bits, nr_ppas);
+       ret = pblk_recov_setup_rq(pblk, c_ctx, recovery, comp_bits, c_entries);
+       if (ret) {
+               pr_err("pblk: could not recover from write failure\n");
+               mempool_free(recovery, pblk->rec_pool);
+               goto out;
+       }
+
+       INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
+       queue_work(pblk->kw_wq, &recovery->ws_rec);
+
+out:
+       pblk_complete_write(pblk, rqd, c_ctx);
+}
+
+static void pblk_end_io_write(struct nvm_rq *rqd)
+{
+       struct pblk *pblk = rqd->private;
+       struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+
+       if (rqd->error) {
+               pblk_log_write_err(pblk, rqd);
+               return pblk_end_w_fail(pblk, rqd);
+       }
+#ifdef CONFIG_NVM_DEBUG
+       else
+               WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+#endif
+
+       pblk_complete_write(pblk, rqd, c_ctx);
+}
+
+static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
+                          unsigned int nr_secs)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+
+       /* Setup write request */
+       rqd->opcode = NVM_OP_PWRITE;
+       rqd->nr_ppas = nr_secs;
+       rqd->flags = pblk_set_progr_mode(pblk, WRITE);
+       rqd->private = pblk;
+       rqd->end_io = pblk_end_io_write;
+
+       rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+                                                       &rqd->dma_meta_list);
+       if (!rqd->meta_list)
+               return -ENOMEM;
+
+       if (unlikely(nr_secs == 1))
+               return 0;
+
+       rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
+       rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
+
+       return 0;
+}
+
+static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
+                          struct pblk_c_ctx *c_ctx)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+       struct ppa_addr erase_ppa;
+       unsigned int valid = c_ctx->nr_valid;
+       unsigned int padded = c_ctx->nr_padded;
+       unsigned int nr_secs = valid + padded;
+       unsigned long *lun_bitmap;
+       int ret = 0;
+
+       lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
+       if (!lun_bitmap) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       c_ctx->lun_bitmap = lun_bitmap;
+
+       ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
+       if (ret) {
+               kfree(lun_bitmap);
+               goto out;
+       }
+
+       ppa_set_empty(&erase_ppa);
+       if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
+               pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
+       else
+               pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
+                                                       valid, &erase_ppa);
+
+out:
+       if (unlikely(e_line && !ppa_empty(erase_ppa))) {
+               if (pblk_blk_erase_async(pblk, erase_ppa)) {
+                       struct nvm_tgt_dev *dev = pblk->dev;
+                       struct nvm_geo *geo = &dev->geo;
+                       int bit;
+
+                       atomic_inc(&e_line->left_eblks);
+                       bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
+                       WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
+                       up(&pblk->erase_sem);
+               }
+       }
+
+       return ret;
+}
+
+int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
+                       struct pblk_c_ctx *c_ctx)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+       unsigned long *lun_bitmap;
+       int ret;
+
+       lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
+       if (!lun_bitmap)
+               return -ENOMEM;
+
+       c_ctx->lun_bitmap = lun_bitmap;
+
+       ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas);
+       if (ret)
+               return ret;
+
+       pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0);
+
+       rqd->ppa_status = (u64)0;
+       rqd->flags = pblk_set_progr_mode(pblk, WRITE);
+
+       return ret;
+}
+
+static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
+                                 unsigned int secs_to_flush)
+{
+       int secs_to_sync;
+
+       secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush);
+
+#ifdef CONFIG_NVM_DEBUG
+       if ((!secs_to_sync && secs_to_flush)
+                       || (secs_to_sync < 0)
+                       || (secs_to_sync > secs_avail && !secs_to_flush)) {
+               pr_err("pblk: bad sector calculation (a:%d,s:%d,f:%d)\n",
+                               secs_avail, secs_to_sync, secs_to_flush);
+       }
+#endif
+
+       return secs_to_sync;
+}
+
+static int pblk_submit_write(struct pblk *pblk)
+{
+       struct bio *bio;
+       struct nvm_rq *rqd;
+       struct pblk_c_ctx *c_ctx;
+       unsigned int pgs_read;
+       unsigned int secs_avail, secs_to_sync, secs_to_com;
+       unsigned int secs_to_flush;
+       unsigned long pos;
+       int err;
+
+       /* If there are no sectors in the cache, flushes (bios without data)
+        * will be cleared on the cache threads
+        */
+       secs_avail = pblk_rb_read_count(&pblk->rwb);
+       if (!secs_avail)
+               return 1;
+
+       secs_to_flush = pblk_rb_sync_point_count(&pblk->rwb);
+       if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
+               return 1;
+
+       rqd = pblk_alloc_rqd(pblk, WRITE);
+       if (IS_ERR(rqd)) {
+               pr_err("pblk: cannot allocate write req.\n");
+               return 1;
+       }
+       c_ctx = nvm_rq_to_pdu(rqd);
+
+       bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
+       if (!bio) {
+               pr_err("pblk: cannot allocate write bio\n");
+               goto fail_free_rqd;
+       }
+       bio->bi_iter.bi_sector = 0; /* internal bio */
+       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+       rqd->bio = bio;
+
+       secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush);
+       if (secs_to_sync > pblk->max_write_pgs) {
+               pr_err("pblk: bad buffer sync calculation\n");
+               goto fail_put_bio;
+       }
+
+       secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
+       pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
+
+       pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos,
+                                               secs_to_sync, secs_avail);
+       if (!pgs_read) {
+               pr_err("pblk: corrupted write bio\n");
+               goto fail_put_bio;
+       }
+
+       if (c_ctx->nr_padded)
+               if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
+                       goto fail_put_bio;
+
+       /* Assign lbas to ppas and populate request structure */
+       err = pblk_setup_w_rq(pblk, rqd, c_ctx);
+       if (err) {
+               pr_err("pblk: could not setup write request\n");
+               goto fail_free_bio;
+       }
+
+       err = pblk_submit_io(pblk, rqd);
+       if (err) {
+               pr_err("pblk: I/O submission failed: %d\n", err);
+               goto fail_free_bio;
+       }
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_long_add(secs_to_sync, &pblk->sub_writes);
+#endif
+
+       return 0;
+
+fail_free_bio:
+       if (c_ctx->nr_padded)
+               pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
+fail_put_bio:
+       bio_put(bio);
+fail_free_rqd:
+       pblk_free_rqd(pblk, rqd, WRITE);
+
+       return 1;
+}
+
+int pblk_write_ts(void *data)
+{
+       struct pblk *pblk = data;
+
+       while (!kthread_should_stop()) {
+               if (!pblk_submit_write(pblk))
+                       continue;
+               set_current_state(TASK_INTERRUPTIBLE);
+               io_schedule();
+       }
+
+       return 0;
+}
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h

new file mode 100644 (file)

index 0000000..99f3186
--- /dev/null
+++ b/drivers/lightnvm/pblk.h
@@ -0,0 +1,1121 @@
+/*
+ * Copyright (C) 2015 IT University of Copenhagen (rrpc.h)
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Matias Bjorling <matias@cnexlabs.com>
+ * Write buffering: Javier Gonzalez <javier@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Implementation of a Physical Block-device target for Open-channel SSDs.
+ *
+ */
+
+#ifndef PBLK_H_
+#define PBLK_H_
+
+#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
+#include <linux/bio.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/vmalloc.h>
+#include <linux/crc32.h>
+#include <linux/uuid.h>
+
+#include <linux/lightnvm.h>
+
+/* Run only GC if less than 1/X blocks are free */
+#define GC_LIMIT_INVERSE 5
+#define GC_TIME_MSECS 1000
+
+#define PBLK_SECTOR (512)
+#define PBLK_EXPOSED_PAGE_SIZE (4096)
+#define PBLK_MAX_REQ_ADDRS (64)
+#define PBLK_MAX_REQ_ADDRS_PW (6)
+
+#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
+
+#define PBLK_COMMAND_TIMEOUT_MS 30000
+
+/* Max 512 LUNs per device */
+#define PBLK_MAX_LUNS_BITMAP (4)
+
+#define NR_PHY_IN_LOG (PBLK_EXPOSED_PAGE_SIZE / PBLK_SECTOR)
+
+#define pblk_for_each_lun(pblk, rlun, i) \
+               for ((i) = 0, rlun = &(pblk)->luns[0]; \
+                       (i) < (pblk)->nr_luns; (i)++, rlun = &(pblk)->luns[(i)])
+
+#define ERASE 2 /* READ = 0, WRITE = 1 */
+
+enum {
+       /* IO Types */
+       PBLK_IOTYPE_USER        = 1 << 0,
+       PBLK_IOTYPE_GC          = 1 << 1,
+
+       /* Write buffer flags */
+       PBLK_FLUSH_ENTRY        = 1 << 2,
+       PBLK_WRITTEN_DATA       = 1 << 3,
+       PBLK_SUBMITTED_ENTRY    = 1 << 4,
+       PBLK_WRITABLE_ENTRY     = 1 << 5,
+};
+
+enum {
+       PBLK_BLK_ST_OPEN =      0x1,
+       PBLK_BLK_ST_CLOSED =    0x2,
+};
+
+/* The number of GC lists and the rate-limiter states go together. This way the
+ * rate-limiter can dictate how much GC is needed based on resource utilization.
+ */
+#define PBLK_NR_GC_LISTS 3
+#define PBLK_MAX_GC_JOBS 32
+
+enum {
+       PBLK_RL_HIGH = 1,
+       PBLK_RL_MID = 2,
+       PBLK_RL_LOW = 3,
+};
+
+struct pblk_sec_meta {
+       u64 reserved;
+       __le64 lba;
+};
+
+#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
+
+/* write completion context */
+struct pblk_c_ctx {
+       struct list_head list;          /* Head for out-of-order completion */
+
+       unsigned long *lun_bitmap;      /* Luns used on current request */
+       unsigned int sentry;
+       unsigned int nr_valid;
+       unsigned int nr_padded;
+};
+
+/* Read context */
+struct pblk_r_ctx {
+       struct bio *orig_bio;
+};
+
+/* Recovery context */
+struct pblk_rec_ctx {
+       struct pblk *pblk;
+       struct nvm_rq *rqd;
+       struct list_head failed;
+       struct work_struct ws_rec;
+};
+
+/* Write context */
+struct pblk_w_ctx {
+       struct bio_list bios;           /* Original bios - used for completion
+                                        * in REQ_FUA, REQ_FLUSH case
+                                        */
+       u64 lba;                        /* Logic addr. associated with entry */
+       struct ppa_addr ppa;            /* Physic addr. associated with entry */
+       int flags;                      /* Write context flags */
+};
+
+struct pblk_rb_entry {
+       struct ppa_addr cacheline;      /* Cacheline for this entry */
+       void *data;                     /* Pointer to data on this entry */
+       struct pblk_w_ctx w_ctx;        /* Context for this entry */
+       struct list_head index;         /* List head to enable indexes */
+};
+
+#define EMPTY_ENTRY (~0U)
+
+struct pblk_rb_pages {
+       struct page *pages;
+       int order;
+       struct list_head list;
+};
+
+struct pblk_rb {
+       struct pblk_rb_entry *entries;  /* Ring buffer entries */
+       unsigned int mem;               /* Write offset - points to next
+                                        * writable entry in memory
+                                        */
+       unsigned int subm;              /* Read offset - points to last entry
+                                        * that has been submitted to the media
+                                        * to be persisted
+                                        */
+       unsigned int sync;              /* Synced - backpointer that signals
+                                        * the last submitted entry that has
+                                        * been successfully persisted to media
+                                        */
+       unsigned int sync_point;        /* Sync point - last entry that must be
+                                        * flushed to the media. Used with
+                                        * REQ_FLUSH and REQ_FUA
+                                        */
+       unsigned int l2p_update;        /* l2p update point - next entry for
+                                        * which l2p mapping will be updated to
+                                        * contain a device ppa address (instead
+                                        * of a cacheline
+                                        */
+       unsigned int nr_entries;        /* Number of entries in write buffer -
+                                        * must be a power of two
+                                        */
+       unsigned int seg_size;          /* Size of the data segments being
+                                        * stored on each entry. Typically this
+                                        * will be 4KB
+                                        */
+
+       struct list_head pages;         /* List of data pages */
+
+       spinlock_t w_lock;              /* Write lock */
+       spinlock_t s_lock;              /* Sync lock */
+
+#ifdef CONFIG_NVM_DEBUG
+       atomic_t inflight_sync_point;   /* Not served REQ_FLUSH | REQ_FUA */
+#endif
+};
+
+#define PBLK_RECOVERY_SECTORS 16
+
+struct pblk_lun {
+       struct ppa_addr bppa;
+
+       u8 *bb_list;                    /* Bad block list for LUN. Only used on
+                                        * bring up. Bad blocks are managed
+                                        * within lines on run-time.
+                                        */
+
+       struct semaphore wr_sem;
+};
+
+struct pblk_gc_rq {
+       struct pblk_line *line;
+       void *data;
+       u64 *lba_list;
+       int nr_secs;
+       int secs_to_gc;
+       struct list_head list;
+};
+
+struct pblk_gc {
+       int gc_active;
+       int gc_enabled;
+       int gc_forced;
+       int gc_jobs_active;
+       atomic_t inflight_gc;
+
+       struct task_struct *gc_ts;
+       struct task_struct *gc_writer_ts;
+       struct workqueue_struct *gc_reader_wq;
+       struct timer_list gc_timer;
+
+       int w_entries;
+       struct list_head w_list;
+
+       spinlock_t lock;
+       spinlock_t w_lock;
+};
+
+struct pblk_rl {
+       unsigned int high;      /* Upper threshold for rate limiter (free run -
+                                * user I/O rate limiter
+                                */
+       unsigned int low;       /* Lower threshold for rate limiter (user I/O
+                                * rate limiter - stall)
+                                */
+       unsigned int high_pw;   /* High rounded up as a power of 2 */
+
+#define PBLK_USER_HIGH_THRS 2  /* Begin write limit at 50 percent
+                                * available blks
+                                */
+#define PBLK_USER_LOW_THRS 20  /* Aggressive GC at 5% available blocks */
+
+       int rb_windows_pw;      /* Number of rate windows in the write buffer
+                                * given as a power-of-2. This guarantees that
+                                * when user I/O is being rate limited, there
+                                * will be reserved enough space for the GC to
+                                * place its payload. A window is of
+                                * pblk->max_write_pgs size, which in NVMe is
+                                * 64, i.e., 256kb.
+                                */
+       int rb_budget;          /* Total number of entries available for I/O */
+       int rb_user_max;        /* Max buffer entries available for user I/O */
+       atomic_t rb_user_cnt;   /* User I/O buffer counter */
+       int rb_gc_max;          /* Max buffer entries available for GC I/O */
+       int rb_gc_rsv;          /* Reserved buffer entries for GC I/O */
+       int rb_state;           /* Rate-limiter current state */
+       atomic_t rb_gc_cnt;     /* GC I/O buffer counter */
+
+       int rb_user_active;
+       struct timer_list u_timer;
+
+       unsigned long long nr_secs;
+       unsigned long total_blocks;
+       atomic_t free_blocks;
+};
+
+#define PBLK_LINE_NR_LUN_BITMAP 2
+#define PBLK_LINE_NR_SEC_BITMAP 2
+#define PBLK_LINE_EMPTY (~0U)
+
+enum {
+       /* Line Types */
+       PBLK_LINETYPE_FREE = 0,
+       PBLK_LINETYPE_LOG = 1,
+       PBLK_LINETYPE_DATA = 2,
+
+       /* Line state */
+       PBLK_LINESTATE_FREE = 10,
+       PBLK_LINESTATE_OPEN = 11,
+       PBLK_LINESTATE_CLOSED = 12,
+       PBLK_LINESTATE_GC = 13,
+       PBLK_LINESTATE_BAD = 14,
+       PBLK_LINESTATE_CORRUPT = 15,
+
+       /* GC group */
+       PBLK_LINEGC_NONE = 20,
+       PBLK_LINEGC_EMPTY = 21,
+       PBLK_LINEGC_LOW = 22,
+       PBLK_LINEGC_MID = 23,
+       PBLK_LINEGC_HIGH = 24,
+       PBLK_LINEGC_FULL = 25,
+};
+
+#define PBLK_MAGIC 0x70626c6b /*pblk*/
+
+struct line_header {
+       __le32 crc;
+       __le32 identifier;      /* pblk identifier */
+       __u8 uuid[16];          /* instance uuid */
+       __le16 type;            /* line type */
+       __le16 version;         /* type version */
+       __le32 id;              /* line id for current line */
+};
+
+struct line_smeta {
+       struct line_header header;
+
+       __le32 crc;             /* Full structure including struct crc */
+       /* Previous line metadata */
+       __le32 prev_id;         /* Line id for previous line */
+
+       /* Current line metadata */
+       __le64 seq_nr;          /* Sequence number for current line */
+
+       /* Active writers */
+       __le32 window_wr_lun;   /* Number of parallel LUNs to write */
+
+       __le32 rsvd[2];
+};
+
+/*
+ * Metadata Layout:
+ *     1. struct pblk_emeta
+ *     2. nr_lbas u64 forming lba list
+ *     3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line)
+ *     4. nr_luns bits (u64 format) forming line bad block bitmap
+ *
+ *     3. and 4. will be part of FTL log
+ */
+struct line_emeta {
+       struct line_header header;
+
+       __le32 crc;             /* Full structure including struct crc */
+
+       /* Previous line metadata */
+       __le32 prev_id;         /* Line id for prev line */
+
+       /* Current line metadata */
+       __le64 seq_nr;          /* Sequence number for current line */
+
+       /* Active writers */
+       __le32 window_wr_lun;   /* Number of parallel LUNs to write */
+
+       /* Bookkeeping for recovery */
+       __le32 next_id;         /* Line id for next line */
+       __le64 nr_lbas;         /* Number of lbas mapped in line */
+       __le64 nr_valid_lbas;   /* Number of valid lbas mapped in line */
+};
+
+struct pblk_line {
+       struct pblk *pblk;
+       unsigned int id;                /* Line number corresponds to the
+                                        * block line
+                                        */
+       unsigned int seq_nr;            /* Unique line sequence number */
+
+       int state;                      /* PBLK_LINESTATE_X */
+       int type;                       /* PBLK_LINETYPE_X */
+       int gc_group;                   /* PBLK_LINEGC_X */
+       struct list_head list;          /* Free, GC lists */
+
+       unsigned long *lun_bitmap;      /* Bitmap for LUNs mapped in line */
+
+       struct line_smeta *smeta;       /* Start metadata */
+       struct line_emeta *emeta;       /* End metadata */
+       int meta_line;                  /* Metadata line id */
+       u64 smeta_ssec;                 /* Sector where smeta starts */
+       u64 emeta_ssec;                 /* Sector where emeta starts */
+
+       unsigned int sec_in_line;       /* Number of usable secs in line */
+
+       atomic_t blk_in_line;           /* Number of good blocks in line */
+       unsigned long *blk_bitmap;      /* Bitmap for valid/invalid blocks */
+       unsigned long *erase_bitmap;    /* Bitmap for erased blocks */
+
+       unsigned long *map_bitmap;      /* Bitmap for mapped sectors in line */
+       unsigned long *invalid_bitmap;  /* Bitmap for invalid sectors in line */
+
+       atomic_t left_eblks;            /* Blocks left for erasing */
+       atomic_t left_seblks;           /* Blocks left for sync erasing */
+
+       int left_msecs;                 /* Sectors left for mapping */
+       int left_ssecs;                 /* Sectors left to sync */
+       unsigned int cur_sec;           /* Sector map pointer */
+       unsigned int vsc;               /* Valid sector count in line */
+
+       struct kref ref;                /* Write buffer L2P references */
+
+       spinlock_t lock;                /* Necessary for invalid_bitmap only */
+};
+
+#define PBLK_DATA_LINES 4
+
+enum{
+       PBLK_KMALLOC_META = 1,
+       PBLK_VMALLOC_META = 2,
+};
+
+struct pblk_line_metadata {
+       void *meta;
+};
+
+struct pblk_line_mgmt {
+       int nr_lines;                   /* Total number of full lines */
+       int nr_free_lines;              /* Number of full lines in free list */
+
+       /* Free lists - use free_lock */
+       struct list_head free_list;     /* Full lines ready to use */
+       struct list_head corrupt_list;  /* Full lines corrupted */
+       struct list_head bad_list;      /* Full lines bad */
+
+       /* GC lists - use gc_lock */
+       struct list_head *gc_lists[PBLK_NR_GC_LISTS];
+       struct list_head gc_high_list;  /* Full lines ready to GC, high isc */
+       struct list_head gc_mid_list;   /* Full lines ready to GC, mid isc */
+       struct list_head gc_low_list;   /* Full lines ready to GC, low isc */
+
+       struct list_head gc_full_list;  /* Full lines ready to GC, no valid */
+       struct list_head gc_empty_list; /* Full lines close, all valid */
+
+       struct pblk_line *log_line;     /* Current FTL log line */
+       struct pblk_line *data_line;    /* Current data line */
+       struct pblk_line *log_next;     /* Next FTL log line */
+       struct pblk_line *data_next;    /* Next data line */
+
+       /* Metadata allocation type: VMALLOC | KMALLOC */
+       int smeta_alloc_type;
+       int emeta_alloc_type;
+
+       /* Pre-allocated metadata for data lines */
+       struct pblk_line_metadata sline_meta[PBLK_DATA_LINES];
+       struct pblk_line_metadata eline_meta[PBLK_DATA_LINES];
+       unsigned long meta_bitmap;
+
+       /* Helpers for fast bitmap calculations */
+       unsigned long *bb_template;
+       unsigned long *bb_aux;
+
+       unsigned long d_seq_nr;         /* Data line unique sequence number */
+       unsigned long l_seq_nr;         /* Log line unique sequence number */
+
+       spinlock_t free_lock;
+       spinlock_t gc_lock;
+};
+
+struct pblk_line_meta {
+       unsigned int smeta_len;         /* Total length for smeta */
+       unsigned int smeta_sec;         /* Sectors needed for smeta*/
+       unsigned int emeta_len;         /* Total length for emeta */
+       unsigned int emeta_sec;         /* Sectors needed for emeta*/
+       unsigned int emeta_bb;          /* Boundary for bb that affects emeta */
+       unsigned int sec_bitmap_len;    /* Length for sector bitmap in line */
+       unsigned int blk_bitmap_len;    /* Length for block bitmap in line */
+       unsigned int lun_bitmap_len;    /* Length for lun bitmap in line */
+
+       unsigned int blk_per_line;      /* Number of blocks in a full line */
+       unsigned int sec_per_line;      /* Number of sectors in a line */
+       unsigned int min_blk_line;      /* Min. number of good blocks in line */
+
+       unsigned int mid_thrs;          /* Threshold for GC mid list */
+       unsigned int high_thrs;         /* Threshold for GC high list */
+};
+
+struct pblk_addr_format {
+       u64     ch_mask;
+       u64     lun_mask;
+       u64     pln_mask;
+       u64     blk_mask;
+       u64     pg_mask;
+       u64     sec_mask;
+       u8      ch_offset;
+       u8      lun_offset;
+       u8      pln_offset;
+       u8      blk_offset;
+       u8      pg_offset;
+       u8      sec_offset;
+};
+
+struct pblk {
+       struct nvm_tgt_dev *dev;
+       struct gendisk *disk;
+
+       struct kobject kobj;
+
+       struct pblk_lun *luns;
+
+       struct pblk_line *lines;                /* Line array */
+       struct pblk_line_mgmt l_mg;             /* Line management */
+       struct pblk_line_meta lm;               /* Line metadata */
+
+       int ppaf_bitsize;
+       struct pblk_addr_format ppaf;
+
+       struct pblk_rb rwb;
+
+       int min_write_pgs; /* Minimum amount of pages required by controller */
+       int max_write_pgs; /* Maximum amount of pages supported by controller */
+       int pgs_in_buffer; /* Number of pages that need to be held in buffer to
+                           * guarantee successful reads.
+                           */
+
+       sector_t capacity; /* Device capacity when bad blocks are subtracted */
+       int over_pct;      /* Percentage of device used for over-provisioning */
+
+       /* pblk provisioning values. Used by rate limiter */
+       struct pblk_rl rl;
+
+       struct semaphore erase_sem;
+
+       unsigned char instance_uuid[16];
+#ifdef CONFIG_NVM_DEBUG
+       /* All debug counters apply to 4kb sector I/Os */
+       atomic_long_t inflight_writes;  /* Inflight writes (user and gc) */
+       atomic_long_t padded_writes;    /* Sectors padded due to flush/fua */
+       atomic_long_t padded_wb;        /* Sectors padded in write buffer */
+       atomic_long_t nr_flush;         /* Number of flush/fua I/O */
+       atomic_long_t req_writes;       /* Sectors stored on write buffer */
+       atomic_long_t sub_writes;       /* Sectors submitted from buffer */
+       atomic_long_t sync_writes;      /* Sectors synced to media */
+       atomic_long_t compl_writes;     /* Sectors completed in write bio */
+       atomic_long_t inflight_reads;   /* Inflight sector read requests */
+       atomic_long_t sync_reads;       /* Completed sector read requests */
+       atomic_long_t recov_writes;     /* Sectors submitted from recovery */
+       atomic_long_t recov_gc_writes;  /* Sectors submitted from write GC */
+       atomic_long_t recov_gc_reads;   /* Sectors submitted from read GC */
+#endif
+
+       spinlock_t lock;
+
+       atomic_long_t read_failed;
+       atomic_long_t read_empty;
+       atomic_long_t read_high_ecc;
+       atomic_long_t read_failed_gc;
+       atomic_long_t write_failed;
+       atomic_long_t erase_failed;
+
+       struct task_struct *writer_ts;
+
+       /* Simple translation map of logical addresses to physical addresses.
+        * The logical addresses is known by the host system, while the physical
+        * addresses are used when writing to the disk block device.
+        */
+       unsigned char *trans_map;
+       spinlock_t trans_lock;
+
+       struct list_head compl_list;
+
+       mempool_t *page_pool;
+       mempool_t *line_ws_pool;
+       mempool_t *rec_pool;
+       mempool_t *r_rq_pool;
+       mempool_t *w_rq_pool;
+       mempool_t *line_meta_pool;
+
+       struct workqueue_struct *kw_wq;
+       struct timer_list wtimer;
+
+       struct pblk_gc gc;
+};
+
+struct pblk_line_ws {
+       struct pblk *pblk;
+       struct pblk_line *line;
+       void *priv;
+       struct work_struct ws;
+};
+
+#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx))
+#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
+
+/*
+ * pblk ring buffer operations
+ */
+int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
+                unsigned int power_size, unsigned int power_seg_sz);
+unsigned int pblk_rb_calculate_size(unsigned int nr_entries);
+void *pblk_rb_entries_ref(struct pblk_rb *rb);
+int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
+                          unsigned int nr_entries, unsigned int *pos);
+int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
+                        unsigned int *pos);
+void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
+                             struct pblk_w_ctx w_ctx, unsigned int pos);
+void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
+                           struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
+                           unsigned int pos);
+struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
+
+void pblk_rb_sync_l2p(struct pblk_rb *rb);
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
+                                struct pblk_c_ctx *c_ctx,
+                                unsigned int pos,
+                                unsigned int nr_entries,
+                                unsigned int count);
+unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
+                                     struct list_head *list,
+                                     unsigned int max);
+int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
+                       u64 pos, int bio_iter);
+unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
+
+unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
+unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries);
+struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb,
+                                             struct ppa_addr *ppa);
+void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
+unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
+
+unsigned int pblk_rb_read_count(struct pblk_rb *rb);
+unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
+
+int pblk_rb_tear_down_check(struct pblk_rb *rb);
+int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos);
+void pblk_rb_data_free(struct pblk_rb *rb);
+ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
+
+/*
+ * pblk core
+ */
+struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
+int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
+                       struct pblk_c_ctx *c_ctx);
+void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
+void pblk_flush_writer(struct pblk *pblk);
+struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
+void pblk_discard(struct pblk *pblk, struct bio *bio);
+void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
+void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
+struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
+                             unsigned int nr_secs, unsigned int len,
+                             gfp_t gfp_mask);
+struct pblk_line *pblk_line_get(struct pblk *pblk);
+struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
+struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
+int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
+struct pblk_line *pblk_line_get_data(struct pblk *pblk);
+struct pblk_line *pblk_line_get_data_next(struct pblk *pblk);
+int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_is_full(struct pblk_line *line);
+void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_close_ws(struct work_struct *work);
+void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_mark_bb(struct work_struct *work);
+void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
+                     void (*work)(struct work_struct *));
+u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
+void pblk_line_put(struct kref *ref);
+struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
+u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
+int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
+                  unsigned long secs_to_flush);
+void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+                 unsigned long *lun_bitmap);
+void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+               unsigned long *lun_bitmap);
+void pblk_end_bio_sync(struct bio *bio);
+void pblk_end_io_sync(struct nvm_rq *rqd);
+int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
+                      int nr_pages);
+void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
+                            u64 paddr);
+void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
+                        int nr_pages);
+void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
+void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
+void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
+                          struct ppa_addr ppa);
+void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
+                        struct ppa_addr ppa, struct ppa_addr entry_line);
+int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
+                      struct pblk_line *gc_line);
+void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
+                         u64 *lba_list, int nr_secs);
+void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
+                        sector_t blba, int nr_secs);
+
+/*
+ * pblk user I/O write path
+ */
+int pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
+                       unsigned long flags);
+int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list,
+                          unsigned int nr_entries, unsigned int nr_rec_entries,
+                          struct pblk_line *gc_line, unsigned long flags);
+
+/*
+ * pblk map
+ */
+void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
+                      unsigned int sentry, unsigned long *lun_bitmap,
+                      unsigned int valid_secs, struct ppa_addr *erase_ppa);
+void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
+                unsigned long *lun_bitmap, unsigned int valid_secs,
+                unsigned int off);
+
+/*
+ * pblk write thread
+ */
+int pblk_write_ts(void *data);
+void pblk_write_timer_fn(unsigned long data);
+void pblk_write_should_kick(struct pblk *pblk);
+
+/*
+ * pblk read path
+ */
+int pblk_submit_read(struct pblk *pblk, struct bio *bio);
+int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
+                       unsigned int nr_secs, unsigned int *secs_to_gc,
+                       struct pblk_line *line);
+/*
+ * pblk recovery
+ */
+void pblk_submit_rec(struct work_struct *work);
+struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
+void pblk_recov_pad(struct pblk *pblk);
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
+int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
+                       struct pblk_rec_ctx *recovery, u64 *comp_bits,
+                       unsigned int comp);
+
+/*
+ * pblk gc
+ */
+#define PBLK_GC_TRIES 3
+
+int pblk_gc_init(struct pblk *pblk);
+void pblk_gc_exit(struct pblk *pblk);
+void pblk_gc_should_start(struct pblk *pblk);
+void pblk_gc_should_stop(struct pblk *pblk);
+int pblk_gc_status(struct pblk *pblk);
+void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
+                             int *gc_active);
+void pblk_gc_sysfs_force(struct pblk *pblk, int force);
+
+/*
+ * pblk rate limiter
+ */
+void pblk_rl_init(struct pblk_rl *rl, int budget);
+void pblk_rl_free(struct pblk_rl *rl);
+int pblk_rl_gc_thrs(struct pblk_rl *rl);
+unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
+int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
+int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
+void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
+int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
+void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
+void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
+
+/*
+ * pblk sysfs
+ */
+int pblk_sysfs_init(struct gendisk *tdisk);
+void pblk_sysfs_exit(struct gendisk *tdisk);
+
+static inline void *pblk_malloc(size_t size, int type, gfp_t flags)
+{
+       if (type == PBLK_KMALLOC_META)
+               return kmalloc(size, flags);
+       return vmalloc(size);
+}
+
+static inline void pblk_mfree(void *ptr, int type)
+{
+       if (type == PBLK_KMALLOC_META)
+               kfree(ptr);
+       else
+               vfree(ptr);
+}
+
+static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
+{
+       return c_ctx - sizeof(struct nvm_rq);
+}
+
+static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta)
+{
+       return (emeta) + 1;
+}
+
+#define NVM_MEM_PAGE_WRITE (8)
+
+static inline int pblk_pad_distance(struct pblk *pblk)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+
+       return NVM_MEM_PAGE_WRITE * geo->nr_luns * geo->sec_per_pl;
+}
+
+static inline int pblk_dev_ppa_to_line(struct ppa_addr p)
+{
+       return p.g.blk;
+}
+
+static inline int pblk_tgt_ppa_to_line(struct ppa_addr p)
+{
+       return p.g.blk;
+}
+
+static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p)
+{
+       return p.g.lun * geo->nr_chnls + p.g.ch;
+}
+
+/* A block within a line corresponds to the lun */
+static inline int pblk_dev_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p)
+{
+       return p.g.lun * geo->nr_chnls + p.g.ch;
+}
+
+static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32)
+{
+       struct ppa_addr ppa64;
+
+       ppa64.ppa = 0;
+
+       if (ppa32 == -1) {
+               ppa64.ppa = ADDR_EMPTY;
+       } else if (ppa32 & (1U << 31)) {
+               ppa64.c.line = ppa32 & ((~0U) >> 1);
+               ppa64.c.is_cached = 1;
+       } else {
+               ppa64.g.blk = (ppa32 & pblk->ppaf.blk_mask) >>
+                                                       pblk->ppaf.blk_offset;
+               ppa64.g.pg = (ppa32 & pblk->ppaf.pg_mask) >>
+                                                       pblk->ppaf.pg_offset;
+               ppa64.g.lun = (ppa32 & pblk->ppaf.lun_mask) >>
+                                                       pblk->ppaf.lun_offset;
+               ppa64.g.ch = (ppa32 & pblk->ppaf.ch_mask) >>
+                                                       pblk->ppaf.ch_offset;
+               ppa64.g.pl = (ppa32 & pblk->ppaf.pln_mask) >>
+                                                       pblk->ppaf.pln_offset;
+               ppa64.g.sec = (ppa32 & pblk->ppaf.sec_mask) >>
+                                                       pblk->ppaf.sec_offset;
+       }
+
+       return ppa64;
+}
+
+static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
+                                                               sector_t lba)
+{
+       struct ppa_addr ppa;
+
+       if (pblk->ppaf_bitsize < 32) {
+               u32 *map = (u32 *)pblk->trans_map;
+
+               ppa = pblk_ppa32_to_ppa64(pblk, map[lba]);
+       } else {
+               struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map;
+
+               ppa = map[lba];
+       }
+
+       return ppa;
+}
+
+static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64)
+{
+       u32 ppa32 = 0;
+
+       if (ppa64.ppa == ADDR_EMPTY) {
+               ppa32 = ~0U;
+       } else if (ppa64.c.is_cached) {
+               ppa32 |= ppa64.c.line;
+               ppa32 |= 1U << 31;
+       } else {
+               ppa32 |= ppa64.g.blk << pblk->ppaf.blk_offset;
+               ppa32 |= ppa64.g.pg << pblk->ppaf.pg_offset;
+               ppa32 |= ppa64.g.lun << pblk->ppaf.lun_offset;
+               ppa32 |= ppa64.g.ch << pblk->ppaf.ch_offset;
+               ppa32 |= ppa64.g.pl << pblk->ppaf.pln_offset;
+               ppa32 |= ppa64.g.sec << pblk->ppaf.sec_offset;
+       }
+
+       return ppa32;
+}
+
+static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba,
+                                               struct ppa_addr ppa)
+{
+       if (pblk->ppaf_bitsize < 32) {
+               u32 *map = (u32 *)pblk->trans_map;
+
+               map[lba] = pblk_ppa64_to_ppa32(pblk, ppa);
+       } else {
+               u64 *map = (u64 *)pblk->trans_map;
+
+               map[lba] = ppa.ppa;
+       }
+}
+
+static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk,
+                                                       struct ppa_addr p)
+{
+       u64 paddr;
+
+       paddr = 0;
+       paddr |= (u64)p.g.pg << pblk->ppaf.pg_offset;
+       paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset;
+       paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset;
+       paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset;
+       paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset;
+
+       return paddr;
+}
+
+static inline int pblk_ppa_empty(struct ppa_addr ppa_addr)
+{
+       return (ppa_addr.ppa == ADDR_EMPTY);
+}
+
+static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
+{
+       ppa_addr->ppa = ADDR_EMPTY;
+}
+
+static inline int pblk_addr_in_cache(struct ppa_addr ppa)
+{
+       return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
+}
+
+static inline int pblk_addr_to_cacheline(struct ppa_addr ppa)
+{
+       return ppa.c.line;
+}
+
+static inline struct ppa_addr pblk_cacheline_to_addr(int addr)
+{
+       struct ppa_addr p;
+
+       p.c.line = addr;
+       p.c.is_cached = 1;
+
+       return p;
+}
+
+static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
+                                             u64 line_id)
+{
+       struct ppa_addr ppa;
+
+       ppa.ppa = 0;
+       ppa.g.blk = line_id;
+       ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset;
+       ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset;
+       ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset;
+       ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset;
+       ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset;
+
+       return ppa;
+}
+
+static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
+                                        u64 line_id)
+{
+       struct ppa_addr ppa;
+
+       ppa = addr_to_gen_ppa(pblk, paddr, line_id);
+
+       return ppa;
+}
+
+static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
+                                           struct line_smeta *smeta)
+{
+       u32 crc = ~(u32)0;
+
+       crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc),
+                               sizeof(struct line_header) - sizeof(crc));
+
+       return crc;
+}
+
+static inline u32 pblk_calc_smeta_crc(struct pblk *pblk,
+                                     struct line_smeta *smeta)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+       u32 crc = ~(u32)0;
+
+       crc = crc32_le(crc, (unsigned char *)smeta +
+                               sizeof(struct line_header) + sizeof(crc),
+                               lm->smeta_len -
+                               sizeof(struct line_header) - sizeof(crc));
+
+       return crc;
+}
+
+static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
+                                     struct line_emeta *emeta)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+       u32 crc = ~(u32)0;
+
+       crc = crc32_le(crc, (unsigned char *)emeta +
+                               sizeof(struct line_header) + sizeof(crc),
+                               lm->emeta_len -
+                               sizeof(struct line_header) - sizeof(crc));
+
+       return crc;
+}
+
+static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       int flags;
+
+       flags = geo->plane_mode >> 1;
+
+       if (type == WRITE)
+               flags |= NVM_IO_SCRAMBLE_ENABLE;
+
+       return flags;
+}
+
+static inline int pblk_set_read_mode(struct pblk *pblk)
+{
+       return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+}
+
+#ifdef CONFIG_NVM_DEBUG
+static inline void print_ppa(struct ppa_addr *p, char *msg, int error)
+{
+       if (p->c.is_cached) {
+               pr_err("ppa: (%s: %x) cache line: %llu\n",
+                               msg, error, (u64)p->c.line);
+       } else {
+               pr_err("ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n",
+                       msg, error,
+                       p->g.ch, p->g.lun, p->g.blk,
+                       p->g.pg, p->g.pl, p->g.sec);
+       }
+}
+
+static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd,
+                                        int error)
+{
+       int bit = -1;
+
+       if (rqd->nr_ppas ==  1) {
+               print_ppa(&rqd->ppa_addr, "rqd", error);
+               return;
+       }
+
+       while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas,
+                                               bit + 1)) < rqd->nr_ppas) {
+               print_ppa(&rqd->ppa_list[bit], "rqd", error);
+       }
+
+       pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status);
+}
+#endif
+
+static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev,
+                                      struct ppa_addr *ppas, int nr_ppas)
+{
+       struct nvm_geo *geo = &tgt_dev->geo;
+       struct ppa_addr *ppa;
+       int i;
+
+       for (i = 0; i < nr_ppas; i++) {
+               ppa = &ppas[i];
+
+               if (!ppa->c.is_cached &&
+                               ppa->g.ch < geo->nr_chnls &&
+                               ppa->g.lun < geo->luns_per_chnl &&
+                               ppa->g.pl < geo->nr_planes &&
+                               ppa->g.blk < geo->blks_per_lun &&
+                               ppa->g.pg < geo->pgs_per_blk &&
+                               ppa->g.sec < geo->sec_per_pg)
+                       continue;
+
+#ifdef CONFIG_NVM_DEBUG
+               print_ppa(ppa, "boundary", i);
+#endif
+               return 1;
+       }
+       return 0;
+}
+
+static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+
+       if (paddr > lm->sec_per_line)
+               return 1;
+
+       return 0;
+}
+
+static inline unsigned int pblk_get_bi_idx(struct bio *bio)
+{
+       return bio->bi_iter.bi_idx;
+}
+
+static inline sector_t pblk_get_lba(struct bio *bio)
+{
+       return bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
+}
+
+static inline unsigned int pblk_get_secs(struct bio *bio)
+{
+       return  bio->bi_iter.bi_size / PBLK_EXPOSED_PAGE_SIZE;
+}
+
+static inline sector_t pblk_get_sector(sector_t lba)
+{
+       return lba * NR_PHY_IN_LOG;
+}
+
+static inline void pblk_setup_uuid(struct pblk *pblk)
+{
+       uuid_le uuid;
+
+       uuid_le_gen(&uuid);
+       memcpy(pblk->instance_uuid, uuid.b, 16);
+}
+#endif /* PBLK_H_ */
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c

index e00b1d7b976f0e892c7ff27ce697e3eaeadd6481..cf0e28a0ff61d34844b629f7c22e3625f1efe2bf 100644 (file)
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -318,10 +318,6 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
         }
  
         page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
-       if (!page) {
-               bio_put(bio);
-               return -ENOMEM;
-       }
  
         while ((slot = find_first_zero_bit(rblk->invalid_pages,
                                             nr_sec_per_blk)) < nr_sec_per_blk) {
@@ -414,7 +410,6 @@ static void rrpc_block_gc(struct work_struct *work)
         struct rrpc *rrpc = gcb->rrpc;
         struct rrpc_block *rblk = gcb->rblk;
         struct rrpc_lun *rlun = rblk->rlun;
-       struct nvm_tgt_dev *dev = rrpc->dev;
         struct ppa_addr ppa;
  
         mempool_free(gcb, rrpc->gcb_pool);
@@ -430,7 +425,7 @@ static void rrpc_block_gc(struct work_struct *work)
         ppa.g.lun = rlun->bppa.g.lun;
         ppa.g.blk = rblk->id;
  
-       if (nvm_erase_blk(dev, &ppa, 0))
+       if (nvm_erase_sync(rrpc->dev, &ppa, 1))
                 goto put_back;
  
         rrpc_put_blk(rrpc, rblk);
@@ -822,7 +817,7 @@ static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
  
         for (i = 0; i < npages; i++) {
                 /* We assume that mapping occurs at 4KB granularity */
-               BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
+               BUG_ON(!(laddr + i < rrpc->nr_sects));
                 gp = &rrpc->trans_map[laddr + i];
  
                 if (gp->rblk) {
@@ -851,7 +846,7 @@ static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
         if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
                 return NVM_IO_REQUEUE;
  
-       BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
+       BUG_ON(!(laddr < rrpc->nr_sects));
         gp = &rrpc->trans_map[laddr];
  
         if (gp->rblk) {
@@ -1007,11 +1002,6 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
         }
  
         rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
-       if (!rqd) {
-               pr_err_ratelimited("rrpc: not able to queue bio.");
-               bio_io_error(bio);
-               return BLK_QC_T_NONE;
-       }
         memset(rqd, 0, sizeof(struct nvm_rq));
  
         err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE);
@@ -1275,8 +1265,10 @@ static int rrpc_bb_discovery(struct nvm_tgt_dev *dev, struct rrpc_lun *rlun)
         }
  
         nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
-       if (nr_blks < 0)
-               return nr_blks;
+       if (nr_blks < 0) {
+               ret = nr_blks;
+               goto out;
+       }
  
         for (i = 0; i < nr_blks; i++) {
                 if (blks[i] == NVM_BLK_T_FREE)
@@ -1514,7 +1506,8 @@ err:
  
  static struct nvm_tgt_type tt_rrpc;
  
-static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk)
+static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
+                      int flags)
  {
         struct request_queue *bqueue = dev->q;
         struct request_queue *tqueue = tdisk->queue;
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c

index 3f041b1870335ab48daeb847b986a274f619677b..f757cef293f86881667333f3ecc05bbd5f41a197 100644 (file)
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -392,6 +392,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
          * To get all the fields, copy all archdata
          */
         dev->ofdev.dev.archdata = chip->lbus.pdev->dev.archdata;
+       dev->ofdev.dev.dma_ops = chip->lbus.pdev->dev.dma_ops;
  #endif /* CONFIG_PCI */
  
  #ifdef DEBUG
diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig

index ceff415f201c66b5a61a5ce5673b5f1e46c151a6..ee1a3d9147ef064446bf6f63fec31683c859d8f2 100644 (file)
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -144,12 +144,22 @@ config XGENE_SLIMPRO_MBOX
           want to use the APM X-Gene SLIMpro IPCM support.
  
  config BCM_PDC_MBOX
-       tristate "Broadcom PDC Mailbox"
-       depends on ARM64 || COMPILE_TEST
+       tristate "Broadcom FlexSparx DMA Mailbox"
+       depends on ARCH_BCM_IPROC || COMPILE_TEST
         depends on HAS_DMA
+       help
+         Mailbox implementation for the Broadcom FlexSparx DMA ring manager,
+         which provides access to various offload engines on Broadcom
+         SoCs, including FA2/FA+ on Northstar Plus and PDC on Northstar 2.
+
+config BCM_FLEXRM_MBOX
+       tristate "Broadcom FlexRM Mailbox"
+       depends on ARM64
+       depends on HAS_DMA
+       select GENERIC_MSI_IRQ_DOMAIN
         default ARCH_BCM_IPROC
         help
-         Mailbox implementation for the Broadcom PDC ring manager,
+         Mailbox implementation of the Broadcom FlexRM ring manager,
           which provides access to various offload engines on Broadcom
-         SoCs. Say Y here if you want to use the Broadcom PDC.
+         SoCs. Say Y here if you want to use the Broadcom FlexRM.
  endif
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile

index 7dde4f609ae84df8b9f213fd84d922598f223bb4..e2bcb03cd35b2e87c3d1c510f992a3a1f041334e 100644 (file)
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -30,4 +30,6 @@ obj-$(CONFIG_HI6220_MBOX)     += hi6220-mailbox.o
  
  obj-$(CONFIG_BCM_PDC_MBOX)     += bcm-pdc-mailbox.o
  
+obj-$(CONFIG_BCM_FLEXRM_MBOX)  += bcm-flexrm-mailbox.o
+
  obj-$(CONFIG_TEGRA_HSP_MBOX)   += tegra-hsp.o
diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c

new file mode 100644 (file)

index 0000000..da67882
--- /dev/null
+++ b/drivers/mailbox/bcm-flexrm-mailbox.c
@@ -0,0 +1,1595 @@
+/* Broadcom FlexRM Mailbox Driver
+ *
+ * Copyright (C) 2017 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Each Broadcom FlexSparx4 offload engine is implemented as an
+ * extension to Broadcom FlexRM ring manager. The FlexRM ring
+ * manager provides a set of rings which can be used to submit
+ * work to a FlexSparx4 offload engine.
+ *
+ * This driver creates a mailbox controller using a set of FlexRM
+ * rings where each mailbox channel represents a separate FlexRM ring.
+ */
+
+#include <asm/barrier.h>
+#include <asm/byteorder.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_controller.h>
+#include <linux/mailbox_client.h>
+#include <linux/mailbox/brcm-message.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+
+/* ====== FlexRM register defines ===== */
+
+/* FlexRM configuration */
+#define RING_REGS_SIZE                                 0x10000
+#define RING_DESC_SIZE                                 8
+#define RING_DESC_INDEX(offset)                                \
+                       ((offset) / RING_DESC_SIZE)
+#define RING_DESC_OFFSET(index)                                \
+                       ((index) * RING_DESC_SIZE)
+#define RING_MAX_REQ_COUNT                             1024
+#define RING_BD_ALIGN_ORDER                            12
+#define RING_BD_ALIGN_CHECK(addr)                      \
+                       (!((addr) & ((0x1 << RING_BD_ALIGN_ORDER) - 1)))
+#define RING_BD_TOGGLE_INVALID(offset)                 \
+                       (((offset) >> RING_BD_ALIGN_ORDER) & 0x1)
+#define RING_BD_TOGGLE_VALID(offset)                   \
+                       (!RING_BD_TOGGLE_INVALID(offset))
+#define RING_BD_DESC_PER_REQ                           32
+#define RING_BD_DESC_COUNT                             \
+                       (RING_MAX_REQ_COUNT * RING_BD_DESC_PER_REQ)
+#define RING_BD_SIZE                                   \
+                       (RING_BD_DESC_COUNT * RING_DESC_SIZE)
+#define RING_CMPL_ALIGN_ORDER                          13
+#define RING_CMPL_DESC_COUNT                           RING_MAX_REQ_COUNT
+#define RING_CMPL_SIZE                                 \
+                       (RING_CMPL_DESC_COUNT * RING_DESC_SIZE)
+#define RING_VER_MAGIC                                 0x76303031
+
+/* Per-Ring register offsets */
+#define RING_VER                                       0x000
+#define RING_BD_START_ADDR                             0x004
+#define RING_BD_READ_PTR                               0x008
+#define RING_BD_WRITE_PTR                              0x00c
+#define RING_BD_READ_PTR_DDR_LS                                0x010
+#define RING_BD_READ_PTR_DDR_MS                                0x014
+#define RING_CMPL_START_ADDR                           0x018
+#define RING_CMPL_WRITE_PTR                            0x01c
+#define RING_NUM_REQ_RECV_LS                           0x020
+#define RING_NUM_REQ_RECV_MS                           0x024
+#define RING_NUM_REQ_TRANS_LS                          0x028
+#define RING_NUM_REQ_TRANS_MS                          0x02c
+#define RING_NUM_REQ_OUTSTAND                          0x030
+#define RING_CONTROL                                   0x034
+#define RING_FLUSH_DONE                                        0x038
+#define RING_MSI_ADDR_LS                               0x03c
+#define RING_MSI_ADDR_MS                               0x040
+#define RING_MSI_CONTROL                               0x048
+#define RING_BD_READ_PTR_DDR_CONTROL                   0x04c
+#define RING_MSI_DATA_VALUE                            0x064
+
+/* Register RING_BD_START_ADDR fields */
+#define BD_LAST_UPDATE_HW_SHIFT                                28
+#define BD_LAST_UPDATE_HW_MASK                         0x1
+#define BD_START_ADDR_VALUE(pa)                                \
+       ((u32)((((dma_addr_t)(pa)) >> RING_BD_ALIGN_ORDER) & 0x0fffffff))
+#define BD_START_ADDR_DECODE(val)                      \
+       ((dma_addr_t)((val) & 0x0fffffff) << RING_BD_ALIGN_ORDER)
+
+/* Register RING_CMPL_START_ADDR fields */
+#define CMPL_START_ADDR_VALUE(pa)                      \
+       ((u32)((((u64)(pa)) >> RING_CMPL_ALIGN_ORDER) & 0x03ffffff))
+
+/* Register RING_CONTROL fields */
+#define CONTROL_MASK_DISABLE_CONTROL                   12
+#define CONTROL_FLUSH_SHIFT                            5
+#define CONTROL_ACTIVE_SHIFT                           4
+#define CONTROL_RATE_ADAPT_MASK                                0xf
+#define CONTROL_RATE_DYNAMIC                           0x0
+#define CONTROL_RATE_FAST                              0x8
+#define CONTROL_RATE_MEDIUM                            0x9
+#define CONTROL_RATE_SLOW                              0xa
+#define CONTROL_RATE_IDLE                              0xb
+
+/* Register RING_FLUSH_DONE fields */
+#define FLUSH_DONE_MASK                                        0x1
+
+/* Register RING_MSI_CONTROL fields */
+#define MSI_TIMER_VAL_SHIFT                            16
+#define MSI_TIMER_VAL_MASK                             0xffff
+#define MSI_ENABLE_SHIFT                               15
+#define MSI_ENABLE_MASK                                        0x1
+#define MSI_COUNT_SHIFT                                        0
+#define MSI_COUNT_MASK                                 0x3ff
+
+/* Register RING_BD_READ_PTR_DDR_CONTROL fields */
+#define BD_READ_PTR_DDR_TIMER_VAL_SHIFT                        16
+#define BD_READ_PTR_DDR_TIMER_VAL_MASK                 0xffff
+#define BD_READ_PTR_DDR_ENABLE_SHIFT                   15
+#define BD_READ_PTR_DDR_ENABLE_MASK                    0x1
+
+/* ====== FlexRM ring descriptor defines ===== */
+
+/* Completion descriptor format */
+#define CMPL_OPAQUE_SHIFT                      0
+#define CMPL_OPAQUE_MASK                       0xffff
+#define CMPL_ENGINE_STATUS_SHIFT               16
+#define CMPL_ENGINE_STATUS_MASK                        0xffff
+#define CMPL_DME_STATUS_SHIFT                  32
+#define CMPL_DME_STATUS_MASK                   0xffff
+#define CMPL_RM_STATUS_SHIFT                   48
+#define CMPL_RM_STATUS_MASK                    0xffff
+
+/* Completion DME status code */
+#define DME_STATUS_MEM_COR_ERR                 BIT(0)
+#define DME_STATUS_MEM_UCOR_ERR                        BIT(1)
+#define DME_STATUS_FIFO_UNDERFLOW              BIT(2)
+#define DME_STATUS_FIFO_OVERFLOW               BIT(3)
+#define DME_STATUS_RRESP_ERR                   BIT(4)
+#define DME_STATUS_BRESP_ERR                   BIT(5)
+#define DME_STATUS_ERROR_MASK                  (DME_STATUS_MEM_COR_ERR | \
+                                                DME_STATUS_MEM_UCOR_ERR | \
+                                                DME_STATUS_FIFO_UNDERFLOW | \
+                                                DME_STATUS_FIFO_OVERFLOW | \
+                                                DME_STATUS_RRESP_ERR | \
+                                                DME_STATUS_BRESP_ERR)
+
+/* Completion RM status code */
+#define RM_STATUS_CODE_SHIFT                   0
+#define RM_STATUS_CODE_MASK                    0x3ff
+#define RM_STATUS_CODE_GOOD                    0x0
+#define RM_STATUS_CODE_AE_TIMEOUT              0x3ff
+
+/* General descriptor format */
+#define DESC_TYPE_SHIFT                                60
+#define DESC_TYPE_MASK                         0xf
+#define DESC_PAYLOAD_SHIFT                     0
+#define DESC_PAYLOAD_MASK                      0x0fffffffffffffff
+
+/* Null descriptor format  */
+#define NULL_TYPE                              0
+#define NULL_TOGGLE_SHIFT                      58
+#define NULL_TOGGLE_MASK                       0x1
+
+/* Header descriptor format */
+#define HEADER_TYPE                            1
+#define HEADER_TOGGLE_SHIFT                    58
+#define HEADER_TOGGLE_MASK                     0x1
+#define HEADER_ENDPKT_SHIFT                    57
+#define HEADER_ENDPKT_MASK                     0x1
+#define HEADER_STARTPKT_SHIFT                  56
+#define HEADER_STARTPKT_MASK                   0x1
+#define HEADER_BDCOUNT_SHIFT                   36
+#define HEADER_BDCOUNT_MASK                    0x1f
+#define HEADER_BDCOUNT_MAX                     HEADER_BDCOUNT_MASK
+#define HEADER_FLAGS_SHIFT                     16
+#define HEADER_FLAGS_MASK                      0xffff
+#define HEADER_OPAQUE_SHIFT                    0
+#define HEADER_OPAQUE_MASK                     0xffff
+
+/* Source (SRC) descriptor format */
+#define SRC_TYPE                               2
+#define SRC_LENGTH_SHIFT                       44
+#define SRC_LENGTH_MASK                                0xffff
+#define SRC_ADDR_SHIFT                         0
+#define SRC_ADDR_MASK                          0x00000fffffffffff
+
+/* Destination (DST) descriptor format */
+#define DST_TYPE                               3
+#define DST_LENGTH_SHIFT                       44
+#define DST_LENGTH_MASK                                0xffff
+#define DST_ADDR_SHIFT                         0
+#define DST_ADDR_MASK                          0x00000fffffffffff
+
+/* Immediate (IMM) descriptor format */
+#define IMM_TYPE                               4
+#define IMM_DATA_SHIFT                         0
+#define IMM_DATA_MASK                          0x0fffffffffffffff
+
+/* Next pointer (NPTR) descriptor format */
+#define NPTR_TYPE                              5
+#define NPTR_TOGGLE_SHIFT                      58
+#define NPTR_TOGGLE_MASK                       0x1
+#define NPTR_ADDR_SHIFT                                0
+#define NPTR_ADDR_MASK                         0x00000fffffffffff
+
+/* Mega source (MSRC) descriptor format */
+#define MSRC_TYPE                              6
+#define MSRC_LENGTH_SHIFT                      44
+#define MSRC_LENGTH_MASK                       0xffff
+#define MSRC_ADDR_SHIFT                                0
+#define MSRC_ADDR_MASK                         0x00000fffffffffff
+
+/* Mega destination (MDST) descriptor format */
+#define MDST_TYPE                              7
+#define MDST_LENGTH_SHIFT                      44
+#define MDST_LENGTH_MASK                       0xffff
+#define MDST_ADDR_SHIFT                                0
+#define MDST_ADDR_MASK                         0x00000fffffffffff
+
+/* Source with tlast (SRCT) descriptor format */
+#define SRCT_TYPE                              8
+#define SRCT_LENGTH_SHIFT                      44
+#define SRCT_LENGTH_MASK                       0xffff
+#define SRCT_ADDR_SHIFT                                0
+#define SRCT_ADDR_MASK                         0x00000fffffffffff
+
+/* Destination with tlast (DSTT) descriptor format */
+#define DSTT_TYPE                              9
+#define DSTT_LENGTH_SHIFT                      44
+#define DSTT_LENGTH_MASK                       0xffff
+#define DSTT_ADDR_SHIFT                                0
+#define DSTT_ADDR_MASK                         0x00000fffffffffff
+
+/* Immediate with tlast (IMMT) descriptor format */
+#define IMMT_TYPE                              10
+#define IMMT_DATA_SHIFT                                0
+#define IMMT_DATA_MASK                         0x0fffffffffffffff
+
+/* Descriptor helper macros */
+#define DESC_DEC(_d, _s, _m)                   (((_d) >> (_s)) & (_m))
+#define DESC_ENC(_d, _v, _s, _m)               \
+                       do { \
+                               (_d) &= ~((u64)(_m) << (_s)); \
+                               (_d) |= (((u64)(_v) & (_m)) << (_s)); \
+                       } while (0)
+
+/* ====== FlexRM data structures ===== */
+
+struct flexrm_ring {
+       /* Unprotected members */
+       int num;
+       struct flexrm_mbox *mbox;
+       void __iomem *regs;
+       bool irq_requested;
+       unsigned int irq;
+       unsigned int msi_timer_val;
+       unsigned int msi_count_threshold;
+       struct ida requests_ida;
+       struct brcm_message *requests[RING_MAX_REQ_COUNT];
+       void *bd_base;
+       dma_addr_t bd_dma_base;
+       u32 bd_write_offset;
+       void *cmpl_base;
+       dma_addr_t cmpl_dma_base;
+       /* Protected members */
+       spinlock_t lock;
+       struct brcm_message *last_pending_msg;
+       u32 cmpl_read_offset;
+};
+
+struct flexrm_mbox {
+       struct device *dev;
+       void __iomem *regs;
+       u32 num_rings;
+       struct flexrm_ring *rings;
+       struct dma_pool *bd_pool;
+       struct dma_pool *cmpl_pool;
+       struct mbox_controller controller;
+};
+
+/* ====== FlexRM ring descriptor helper routines ===== */
+
+static u64 flexrm_read_desc(void *desc_ptr)
+{
+       return le64_to_cpu(*((u64 *)desc_ptr));
+}
+
+static void flexrm_write_desc(void *desc_ptr, u64 desc)
+{
+       *((u64 *)desc_ptr) = cpu_to_le64(desc);
+}
+
+static u32 flexrm_cmpl_desc_to_reqid(u64 cmpl_desc)
+{
+       return (u32)(cmpl_desc & CMPL_OPAQUE_MASK);
+}
+
+static int flexrm_cmpl_desc_to_error(u64 cmpl_desc)
+{
+       u32 status;
+
+       status = DESC_DEC(cmpl_desc, CMPL_DME_STATUS_SHIFT,
+                         CMPL_DME_STATUS_MASK);
+       if (status & DME_STATUS_ERROR_MASK)
+               return -EIO;
+
+       status = DESC_DEC(cmpl_desc, CMPL_RM_STATUS_SHIFT,
+                         CMPL_RM_STATUS_MASK);
+       status &= RM_STATUS_CODE_MASK;
+       if (status == RM_STATUS_CODE_AE_TIMEOUT)
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+static bool flexrm_is_next_table_desc(void *desc_ptr)
+{
+       u64 desc = flexrm_read_desc(desc_ptr);
+       u32 type = DESC_DEC(desc, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+
+       return (type == NPTR_TYPE) ? true : false;
+}
+
+static u64 flexrm_next_table_desc(u32 toggle, dma_addr_t next_addr)
+{
+       u64 desc = 0;
+
+       DESC_ENC(desc, NPTR_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+       DESC_ENC(desc, toggle, NPTR_TOGGLE_SHIFT, NPTR_TOGGLE_MASK);
+       DESC_ENC(desc, next_addr, NPTR_ADDR_SHIFT, NPTR_ADDR_MASK);
+
+       return desc;
+}
+
+static u64 flexrm_null_desc(u32 toggle)
+{
+       u64 desc = 0;
+
+       DESC_ENC(desc, NULL_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+       DESC_ENC(desc, toggle, NULL_TOGGLE_SHIFT, NULL_TOGGLE_MASK);
+
+       return desc;
+}
+
+static u32 flexrm_estimate_header_desc_count(u32 nhcnt)
+{
+       u32 hcnt = nhcnt / HEADER_BDCOUNT_MAX;
+
+       if (!(nhcnt % HEADER_BDCOUNT_MAX))
+               hcnt += 1;
+
+       return hcnt;
+}
+
+static void flexrm_flip_header_toogle(void *desc_ptr)
+{
+       u64 desc = flexrm_read_desc(desc_ptr);
+
+       if (desc & ((u64)0x1 << HEADER_TOGGLE_SHIFT))
+               desc &= ~((u64)0x1 << HEADER_TOGGLE_SHIFT);
+       else
+               desc |= ((u64)0x1 << HEADER_TOGGLE_SHIFT);
+
+       flexrm_write_desc(desc_ptr, desc);
+}
+
+static u64 flexrm_header_desc(u32 toggle, u32 startpkt, u32 endpkt,
+                              u32 bdcount, u32 flags, u32 opaque)
+{
+       u64 desc = 0;
+
+       DESC_ENC(desc, HEADER_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+       DESC_ENC(desc, toggle, HEADER_TOGGLE_SHIFT, HEADER_TOGGLE_MASK);
+       DESC_ENC(desc, startpkt, HEADER_STARTPKT_SHIFT, HEADER_STARTPKT_MASK);
+       DESC_ENC(desc, endpkt, HEADER_ENDPKT_SHIFT, HEADER_ENDPKT_MASK);
+       DESC_ENC(desc, bdcount, HEADER_BDCOUNT_SHIFT, HEADER_BDCOUNT_MASK);
+       DESC_ENC(desc, flags, HEADER_FLAGS_SHIFT, HEADER_FLAGS_MASK);
+       DESC_ENC(desc, opaque, HEADER_OPAQUE_SHIFT, HEADER_OPAQUE_MASK);
+
+       return desc;
+}
+
+static void flexrm_enqueue_desc(u32 nhpos, u32 nhcnt, u32 reqid,
+                                u64 desc, void **desc_ptr, u32 *toggle,
+                                void *start_desc, void *end_desc)
+{
+       u64 d;
+       u32 nhavail, _toggle, _startpkt, _endpkt, _bdcount;
+
+       /* Sanity check */
+       if (nhcnt <= nhpos)
+               return;
+
+       /*
+        * Each request or packet start with a HEADER descriptor followed
+        * by one or more non-HEADER descriptors (SRC, SRCT, MSRC, DST,
+        * DSTT, MDST, IMM, and IMMT). The number of non-HEADER descriptors
+        * following a HEADER descriptor is represented by BDCOUNT field
+        * of HEADER descriptor. The max value of BDCOUNT field is 31 which
+        * means we can only have 31 non-HEADER descriptors following one
+        * HEADER descriptor.
+        *
+        * In general use, number of non-HEADER descriptors can easily go
+        * beyond 31. To tackle this situation, we have packet (or request)
+        * extenstion bits (STARTPKT and ENDPKT) in the HEADER descriptor.
+        *
+        * To use packet extension, the first HEADER descriptor of request
+        * (or packet) will have STARTPKT=1 and ENDPKT=0. The intermediate
+        * HEADER descriptors will have STARTPKT=0 and ENDPKT=0. The last
+        * HEADER descriptor will have STARTPKT=0 and ENDPKT=1. Also, the
+        * TOGGLE bit of the first HEADER will be set to invalid state to
+        * ensure that FlexRM does not start fetching descriptors till all
+        * descriptors are enqueued. The user of this function will flip
+        * the TOGGLE bit of first HEADER after all descriptors are
+        * enqueued.
+        */
+
+       if ((nhpos % HEADER_BDCOUNT_MAX == 0) && (nhcnt - nhpos)) {
+               /* Prepare the header descriptor */
+               nhavail = (nhcnt - nhpos);
+               _toggle = (nhpos == 0) ? !(*toggle) : (*toggle);
+               _startpkt = (nhpos == 0) ? 0x1 : 0x0;
+               _endpkt = (nhavail <= HEADER_BDCOUNT_MAX) ? 0x1 : 0x0;
+               _bdcount = (nhavail <= HEADER_BDCOUNT_MAX) ?
+                               nhavail : HEADER_BDCOUNT_MAX;
+               if (nhavail <= HEADER_BDCOUNT_MAX)
+                       _bdcount = nhavail;
+               else
+                       _bdcount = HEADER_BDCOUNT_MAX;
+               d = flexrm_header_desc(_toggle, _startpkt, _endpkt,
+                                       _bdcount, 0x0, reqid);
+
+               /* Write header descriptor */
+               flexrm_write_desc(*desc_ptr, d);
+
+               /* Point to next descriptor */
+               *desc_ptr += sizeof(desc);
+               if (*desc_ptr == end_desc)
+                       *desc_ptr = start_desc;
+
+               /* Skip next pointer descriptors */
+               while (flexrm_is_next_table_desc(*desc_ptr)) {
+                       *toggle = (*toggle) ? 0 : 1;
+                       *desc_ptr += sizeof(desc);
+                       if (*desc_ptr == end_desc)
+                               *desc_ptr = start_desc;
+               }
+       }
+
+       /* Write desired descriptor */
+       flexrm_write_desc(*desc_ptr, desc);
+
+       /* Point to next descriptor */
+       *desc_ptr += sizeof(desc);
+       if (*desc_ptr == end_desc)
+               *desc_ptr = start_desc;
+
+       /* Skip next pointer descriptors */
+       while (flexrm_is_next_table_desc(*desc_ptr)) {
+               *toggle = (*toggle) ? 0 : 1;
+               *desc_ptr += sizeof(desc);
+               if (*desc_ptr == end_desc)
+                       *desc_ptr = start_desc;
+       }
+}
+
+static u64 flexrm_src_desc(dma_addr_t addr, unsigned int length)
+{
+       u64 desc = 0;
+
+       DESC_ENC(desc, SRC_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+       DESC_ENC(desc, length, SRC_LENGTH_SHIFT, SRC_LENGTH_MASK);
+       DESC_ENC(desc, addr, SRC_ADDR_SHIFT, SRC_ADDR_MASK);
+
+       return desc;
+}
+
+static u64 flexrm_msrc_desc(dma_addr_t addr, unsigned int length_div_16)
+{
+       u64 desc = 0;
+
+       DESC_ENC(desc, MSRC_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+       DESC_ENC(desc, length_div_16, MSRC_LENGTH_SHIFT, MSRC_LENGTH_MASK);
+       DESC_ENC(desc, addr, MSRC_ADDR_SHIFT, MSRC_ADDR_MASK);
+
+       return desc;
+}
+
+static u64 flexrm_dst_desc(dma_addr_t addr, unsigned int length)
+{
+       u64 desc = 0;
+
+       DESC_ENC(desc, DST_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+       DESC_ENC(desc, length, DST_LENGTH_SHIFT, DST_LENGTH_MASK);
+       DESC_ENC(desc, addr, DST_ADDR_SHIFT, DST_ADDR_MASK);
+
+       return desc;
+}
+
+static u64 flexrm_mdst_desc(dma_addr_t addr, unsigned int length_div_16)
+{
+       u64 desc = 0;
+
+       DESC_ENC(desc, MDST_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+       DESC_ENC(desc, length_div_16, MDST_LENGTH_SHIFT, MDST_LENGTH_MASK);
+       DESC_ENC(desc, addr, MDST_ADDR_SHIFT, MDST_ADDR_MASK);
+
+       return desc;
+}
+
+static u64 flexrm_imm_desc(u64 data)
+{
+       u64 desc = 0;
+
+       DESC_ENC(desc, IMM_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+       DESC_ENC(desc, data, IMM_DATA_SHIFT, IMM_DATA_MASK);
+
+       return desc;
+}
+
+static u64 flexrm_srct_desc(dma_addr_t addr, unsigned int length)
+{
+       u64 desc = 0;
+
+       DESC_ENC(desc, SRCT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+       DESC_ENC(desc, length, SRCT_LENGTH_SHIFT, SRCT_LENGTH_MASK);
+       DESC_ENC(desc, addr, SRCT_ADDR_SHIFT, SRCT_ADDR_MASK);
+
+       return desc;
+}
+
+static u64 flexrm_dstt_desc(dma_addr_t addr, unsigned int length)
+{
+       u64 desc = 0;
+
+       DESC_ENC(desc, DSTT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+       DESC_ENC(desc, length, DSTT_LENGTH_SHIFT, DSTT_LENGTH_MASK);
+       DESC_ENC(desc, addr, DSTT_ADDR_SHIFT, DSTT_ADDR_MASK);
+
+       return desc;
+}
+
+static u64 flexrm_immt_desc(u64 data)
+{
+       u64 desc = 0;
+
+       DESC_ENC(desc, IMMT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+       DESC_ENC(desc, data, IMMT_DATA_SHIFT, IMMT_DATA_MASK);
+
+       return desc;
+}
+
+static bool flexrm_spu_sanity_check(struct brcm_message *msg)
+{
+       struct scatterlist *sg;
+
+       if (!msg->spu.src || !msg->spu.dst)
+               return false;
+       for (sg = msg->spu.src; sg; sg = sg_next(sg)) {
+               if (sg->length & 0xf) {
+                       if (sg->length > SRC_LENGTH_MASK)
+                               return false;
+               } else {
+                       if (sg->length > (MSRC_LENGTH_MASK * 16))
+                               return false;
+               }
+       }
+       for (sg = msg->spu.dst; sg; sg = sg_next(sg)) {
+               if (sg->length & 0xf) {
+                       if (sg->length > DST_LENGTH_MASK)
+                               return false;
+               } else {
+                       if (sg->length > (MDST_LENGTH_MASK * 16))
+                               return false;
+               }
+       }
+
+       return true;
+}
+
+static u32 flexrm_spu_estimate_nonheader_desc_count(struct brcm_message *msg)
+{
+       u32 cnt = 0;
+       unsigned int dst_target = 0;
+       struct scatterlist *src_sg = msg->spu.src, *dst_sg = msg->spu.dst;
+
+       while (src_sg || dst_sg) {
+               if (src_sg) {
+                       cnt++;
+                       dst_target = src_sg->length;
+                       src_sg = sg_next(src_sg);
+               } else
+                       dst_target = UINT_MAX;
+
+               while (dst_target && dst_sg) {
+                       cnt++;
+                       if (dst_sg->length < dst_target)
+                               dst_target -= dst_sg->length;
+                       else
+                               dst_target = 0;
+                       dst_sg = sg_next(dst_sg);
+               }
+       }
+
+       return cnt;
+}
+
+static int flexrm_spu_dma_map(struct device *dev, struct brcm_message *msg)
+{
+       int rc;
+
+       rc = dma_map_sg(dev, msg->spu.src, sg_nents(msg->spu.src),
+                       DMA_TO_DEVICE);
+       if (rc < 0)
+               return rc;
+
+       rc = dma_map_sg(dev, msg->spu.dst, sg_nents(msg->spu.dst),
+                       DMA_FROM_DEVICE);
+       if (rc < 0) {
+               dma_unmap_sg(dev, msg->spu.src, sg_nents(msg->spu.src),
+                            DMA_TO_DEVICE);
+               return rc;
+       }
+
+       return 0;
+}
+
+static void flexrm_spu_dma_unmap(struct device *dev, struct brcm_message *msg)
+{
+       dma_unmap_sg(dev, msg->spu.dst, sg_nents(msg->spu.dst),
+                    DMA_FROM_DEVICE);
+       dma_unmap_sg(dev, msg->spu.src, sg_nents(msg->spu.src),
+                    DMA_TO_DEVICE);
+}
+
+static void *flexrm_spu_write_descs(struct brcm_message *msg, u32 nhcnt,
+                                    u32 reqid, void *desc_ptr, u32 toggle,
+                                    void *start_desc, void *end_desc)
+{
+       u64 d;
+       u32 nhpos = 0;
+       void *orig_desc_ptr = desc_ptr;
+       unsigned int dst_target = 0;
+       struct scatterlist *src_sg = msg->spu.src, *dst_sg = msg->spu.dst;
+
+       while (src_sg || dst_sg) {
+               if (src_sg) {
+                       if (sg_dma_len(src_sg) & 0xf)
+                               d = flexrm_src_desc(sg_dma_address(src_sg),
+                                                    sg_dma_len(src_sg));
+                       else
+                               d = flexrm_msrc_desc(sg_dma_address(src_sg),
+                                                     sg_dma_len(src_sg)/16);
+                       flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+                                            d, &desc_ptr, &toggle,
+                                            start_desc, end_desc);
+                       nhpos++;
+                       dst_target = sg_dma_len(src_sg);
+                       src_sg = sg_next(src_sg);
+               } else
+                       dst_target = UINT_MAX;
+
+               while (dst_target && dst_sg) {
+                       if (sg_dma_len(dst_sg) & 0xf)
+                               d = flexrm_dst_desc(sg_dma_address(dst_sg),
+                                                    sg_dma_len(dst_sg));
+                       else
+                               d = flexrm_mdst_desc(sg_dma_address(dst_sg),
+                                                     sg_dma_len(dst_sg)/16);
+                       flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+                                            d, &desc_ptr, &toggle,
+                                            start_desc, end_desc);
+                       nhpos++;
+                       if (sg_dma_len(dst_sg) < dst_target)
+                               dst_target -= sg_dma_len(dst_sg);
+                       else
+                               dst_target = 0;
+                       dst_sg = sg_next(dst_sg);
+               }
+       }
+
+       /* Null descriptor with invalid toggle bit */
+       flexrm_write_desc(desc_ptr, flexrm_null_desc(!toggle));
+
+       /* Ensure that descriptors have been written to memory */
+       wmb();
+
+       /* Flip toggle bit in header */
+       flexrm_flip_header_toogle(orig_desc_ptr);
+
+       return desc_ptr;
+}
+
+static bool flexrm_sba_sanity_check(struct brcm_message *msg)
+{
+       u32 i;
+
+       if (!msg->sba.cmds || !msg->sba.cmds_count)
+               return false;
+
+       for (i = 0; i < msg->sba.cmds_count; i++) {
+               if (((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) ||
+                    (msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C)) &&
+                   (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT))
+                       return false;
+               if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) &&
+                   (msg->sba.cmds[i].data_len > SRCT_LENGTH_MASK))
+                       return false;
+               if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C) &&
+                   (msg->sba.cmds[i].data_len > SRCT_LENGTH_MASK))
+                       return false;
+               if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_RESP) &&
+                   (msg->sba.cmds[i].resp_len > DSTT_LENGTH_MASK))
+                       return false;
+               if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT) &&
+                   (msg->sba.cmds[i].data_len > DSTT_LENGTH_MASK))
+                       return false;
+       }
+
+       return true;
+}
+
+static u32 flexrm_sba_estimate_nonheader_desc_count(struct brcm_message *msg)
+{
+       u32 i, cnt;
+
+       cnt = 0;
+       for (i = 0; i < msg->sba.cmds_count; i++) {
+               cnt++;
+
+               if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) ||
+                   (msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C))
+                       cnt++;
+
+               if (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_RESP)
+                       cnt++;
+
+               if (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT)
+                       cnt++;
+       }
+
+       return cnt;
+}
+
+static void *flexrm_sba_write_descs(struct brcm_message *msg, u32 nhcnt,
+                                    u32 reqid, void *desc_ptr, u32 toggle,
+                                    void *start_desc, void *end_desc)
+{
+       u64 d;
+       u32 i, nhpos = 0;
+       struct brcm_sba_command *c;
+       void *orig_desc_ptr = desc_ptr;
+
+       /* Convert SBA commands into descriptors */
+       for (i = 0; i < msg->sba.cmds_count; i++) {
+               c = &msg->sba.cmds[i];
+
+               if ((c->flags & BRCM_SBA_CMD_HAS_RESP) &&
+                   (c->flags & BRCM_SBA_CMD_HAS_OUTPUT)) {
+                       /* Destination response descriptor */
+                       d = flexrm_dst_desc(c->resp, c->resp_len);
+                       flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+                                            d, &desc_ptr, &toggle,
+                                            start_desc, end_desc);
+                       nhpos++;
+               } else if (c->flags & BRCM_SBA_CMD_HAS_RESP) {
+                       /* Destination response with tlast descriptor */
+                       d = flexrm_dstt_desc(c->resp, c->resp_len);
+                       flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+                                            d, &desc_ptr, &toggle,
+                                            start_desc, end_desc);
+                       nhpos++;
+               }
+
+               if (c->flags & BRCM_SBA_CMD_HAS_OUTPUT) {
+                       /* Destination with tlast descriptor */
+                       d = flexrm_dstt_desc(c->data, c->data_len);
+                       flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+                                            d, &desc_ptr, &toggle,
+                                            start_desc, end_desc);
+                       nhpos++;
+               }
+
+               if (c->flags & BRCM_SBA_CMD_TYPE_B) {
+                       /* Command as immediate descriptor */
+                       d = flexrm_imm_desc(c->cmd);
+                       flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+                                            d, &desc_ptr, &toggle,
+                                            start_desc, end_desc);
+                       nhpos++;
+               } else {
+                       /* Command as immediate descriptor with tlast */
+                       d = flexrm_immt_desc(c->cmd);
+                       flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+                                            d, &desc_ptr, &toggle,
+                                            start_desc, end_desc);
+                       nhpos++;
+               }
+
+               if ((c->flags & BRCM_SBA_CMD_TYPE_B) ||
+                   (c->flags & BRCM_SBA_CMD_TYPE_C)) {
+                       /* Source with tlast descriptor */
+                       d = flexrm_srct_desc(c->data, c->data_len);
+                       flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+                                            d, &desc_ptr, &toggle,
+                                            start_desc, end_desc);
+                       nhpos++;
+               }
+       }
+
+       /* Null descriptor with invalid toggle bit */
+       flexrm_write_desc(desc_ptr, flexrm_null_desc(!toggle));
+
+       /* Ensure that descriptors have been written to memory */
+       wmb();
+
+       /* Flip toggle bit in header */
+       flexrm_flip_header_toogle(orig_desc_ptr);
+
+       return desc_ptr;
+}
+
+static bool flexrm_sanity_check(struct brcm_message *msg)
+{
+       if (!msg)
+               return false;
+
+       switch (msg->type) {
+       case BRCM_MESSAGE_SPU:
+               return flexrm_spu_sanity_check(msg);
+       case BRCM_MESSAGE_SBA:
+               return flexrm_sba_sanity_check(msg);
+       default:
+               return false;
+       };
+}
+
+static u32 flexrm_estimate_nonheader_desc_count(struct brcm_message *msg)
+{
+       if (!msg)
+               return 0;
+
+       switch (msg->type) {
+       case BRCM_MESSAGE_SPU:
+               return flexrm_spu_estimate_nonheader_desc_count(msg);
+       case BRCM_MESSAGE_SBA:
+               return flexrm_sba_estimate_nonheader_desc_count(msg);
+       default:
+               return 0;
+       };
+}
+
+static int flexrm_dma_map(struct device *dev, struct brcm_message *msg)
+{
+       if (!dev || !msg)
+               return -EINVAL;
+
+       switch (msg->type) {
+       case BRCM_MESSAGE_SPU:
+               return flexrm_spu_dma_map(dev, msg);
+       default:
+               break;
+       }
+
+       return 0;
+}
+
+static void flexrm_dma_unmap(struct device *dev, struct brcm_message *msg)
+{
+       if (!dev || !msg)
+               return;
+
+       switch (msg->type) {
+       case BRCM_MESSAGE_SPU:
+               flexrm_spu_dma_unmap(dev, msg);
+               break;
+       default:
+               break;
+       }
+}
+
+static void *flexrm_write_descs(struct brcm_message *msg, u32 nhcnt,
+                               u32 reqid, void *desc_ptr, u32 toggle,
+                               void *start_desc, void *end_desc)
+{
+       if (!msg || !desc_ptr || !start_desc || !end_desc)
+               return ERR_PTR(-ENOTSUPP);
+
+       if ((desc_ptr < start_desc) || (end_desc <= desc_ptr))
+               return ERR_PTR(-ERANGE);
+
+       switch (msg->type) {
+       case BRCM_MESSAGE_SPU:
+               return flexrm_spu_write_descs(msg, nhcnt, reqid,
+                                              desc_ptr, toggle,
+                                              start_desc, end_desc);
+       case BRCM_MESSAGE_SBA:
+               return flexrm_sba_write_descs(msg, nhcnt, reqid,
+                                              desc_ptr, toggle,
+                                              start_desc, end_desc);
+       default:
+               return ERR_PTR(-ENOTSUPP);
+       };
+}
+
+/* ====== FlexRM driver helper routines ===== */
+
+static int flexrm_new_request(struct flexrm_ring *ring,
+                               struct brcm_message *batch_msg,
+                               struct brcm_message *msg)
+{
+       void *next;
+       unsigned long flags;
+       u32 val, count, nhcnt;
+       u32 read_offset, write_offset;
+       bool exit_cleanup = false;
+       int ret = 0, reqid;
+
+       /* Do sanity check on message */
+       if (!flexrm_sanity_check(msg))
+               return -EIO;
+       msg->error = 0;
+
+       /* If no requests possible then save data pointer and goto done. */
+       reqid = ida_simple_get(&ring->requests_ida, 0,
+                               RING_MAX_REQ_COUNT, GFP_KERNEL);
+       if (reqid < 0) {
+               spin_lock_irqsave(&ring->lock, flags);
+               if (batch_msg)
+                       ring->last_pending_msg = batch_msg;
+               else
+                       ring->last_pending_msg = msg;
+               spin_unlock_irqrestore(&ring->lock, flags);
+               return 0;
+       }
+       ring->requests[reqid] = msg;
+
+       /* Do DMA mappings for the message */
+       ret = flexrm_dma_map(ring->mbox->dev, msg);
+       if (ret < 0) {
+               ring->requests[reqid] = NULL;
+               ida_simple_remove(&ring->requests_ida, reqid);
+               return ret;
+       }
+
+       /* If last_pending_msg is already set then goto done with error */
+       spin_lock_irqsave(&ring->lock, flags);
+       if (ring->last_pending_msg)
+               ret = -ENOSPC;
+       spin_unlock_irqrestore(&ring->lock, flags);
+       if (ret < 0) {
+               dev_warn(ring->mbox->dev, "no space in ring %d\n", ring->num);
+               exit_cleanup = true;
+               goto exit;
+       }
+
+       /* Determine current HW BD read offset */
+       read_offset = readl_relaxed(ring->regs + RING_BD_READ_PTR);
+       val = readl_relaxed(ring->regs + RING_BD_START_ADDR);
+       read_offset *= RING_DESC_SIZE;
+       read_offset += (u32)(BD_START_ADDR_DECODE(val) - ring->bd_dma_base);
+
+       /*
+        * Number required descriptors = number of non-header descriptors +
+        *                               number of header descriptors +
+        *                               1x null descriptor
+        */
+       nhcnt = flexrm_estimate_nonheader_desc_count(msg);
+       count = flexrm_estimate_header_desc_count(nhcnt) + nhcnt + 1;
+
+       /* Check for available descriptor space. */
+       write_offset = ring->bd_write_offset;
+       while (count) {
+               if (!flexrm_is_next_table_desc(ring->bd_base + write_offset))
+                       count--;
+               write_offset += RING_DESC_SIZE;
+               if (write_offset == RING_BD_SIZE)
+                       write_offset = 0x0;
+               if (write_offset == read_offset)
+                       break;
+       }
+       if (count) {
+               spin_lock_irqsave(&ring->lock, flags);
+               if (batch_msg)
+                       ring->last_pending_msg = batch_msg;
+               else
+                       ring->last_pending_msg = msg;
+               spin_unlock_irqrestore(&ring->lock, flags);
+               ret = 0;
+               exit_cleanup = true;
+               goto exit;
+       }
+
+       /* Write descriptors to ring */
+       next = flexrm_write_descs(msg, nhcnt, reqid,
+                       ring->bd_base + ring->bd_write_offset,
+                       RING_BD_TOGGLE_VALID(ring->bd_write_offset),
+                       ring->bd_base, ring->bd_base + RING_BD_SIZE);
+       if (IS_ERR(next)) {
+               ret = PTR_ERR(next);
+               exit_cleanup = true;
+               goto exit;
+       }
+
+       /* Save ring BD write offset */
+       ring->bd_write_offset = (unsigned long)(next - ring->bd_base);
+
+exit:
+       /* Update error status in message */
+       msg->error = ret;
+
+       /* Cleanup if we failed */
+       if (exit_cleanup) {
+               flexrm_dma_unmap(ring->mbox->dev, msg);
+               ring->requests[reqid] = NULL;
+               ida_simple_remove(&ring->requests_ida, reqid);
+       }
+
+       return ret;
+}
+
+static int flexrm_process_completions(struct flexrm_ring *ring)
+{
+       u64 desc;
+       int err, count = 0;
+       unsigned long flags;
+       struct brcm_message *msg = NULL;
+       u32 reqid, cmpl_read_offset, cmpl_write_offset;
+       struct mbox_chan *chan = &ring->mbox->controller.chans[ring->num];
+
+       spin_lock_irqsave(&ring->lock, flags);
+
+       /* Check last_pending_msg */
+       if (ring->last_pending_msg) {
+               msg = ring->last_pending_msg;
+               ring->last_pending_msg = NULL;
+       }
+
+       /*
+        * Get current completion read and write offset
+        *
+        * Note: We should read completion write pointer atleast once
+        * after we get a MSI interrupt because HW maintains internal
+        * MSI status which will allow next MSI interrupt only after
+        * completion write pointer is read.
+        */
+       cmpl_write_offset = readl_relaxed(ring->regs + RING_CMPL_WRITE_PTR);
+       cmpl_write_offset *= RING_DESC_SIZE;
+       cmpl_read_offset = ring->cmpl_read_offset;
+       ring->cmpl_read_offset = cmpl_write_offset;
+
+       spin_unlock_irqrestore(&ring->lock, flags);
+
+       /* If last_pending_msg was set then queue it back */
+       if (msg)
+               mbox_send_message(chan, msg);
+
+       /* For each completed request notify mailbox clients */
+       reqid = 0;
+       while (cmpl_read_offset != cmpl_write_offset) {
+               /* Dequeue next completion descriptor */
+               desc = *((u64 *)(ring->cmpl_base + cmpl_read_offset));
+
+               /* Next read offset */
+               cmpl_read_offset += RING_DESC_SIZE;
+               if (cmpl_read_offset == RING_CMPL_SIZE)
+                       cmpl_read_offset = 0;
+
+               /* Decode error from completion descriptor */
+               err = flexrm_cmpl_desc_to_error(desc);
+               if (err < 0) {
+                       dev_warn(ring->mbox->dev,
+                                "got completion desc=0x%lx with error %d",
+                                (unsigned long)desc, err);
+               }
+
+               /* Determine request id from completion descriptor */
+               reqid = flexrm_cmpl_desc_to_reqid(desc);
+
+               /* Determine message pointer based on reqid */
+               msg = ring->requests[reqid];
+               if (!msg) {
+                       dev_warn(ring->mbox->dev,
+                                "null msg pointer for completion desc=0x%lx",
+                                (unsigned long)desc);
+                       continue;
+               }
+
+               /* Release reqid for recycling */
+               ring->requests[reqid] = NULL;
+               ida_simple_remove(&ring->requests_ida, reqid);
+
+               /* Unmap DMA mappings */
+               flexrm_dma_unmap(ring->mbox->dev, msg);
+
+               /* Give-back message to mailbox client */
+               msg->error = err;
+               mbox_chan_received_data(chan, msg);
+
+               /* Increment number of completions processed */
+               count++;
+       }
+
+       return count;
+}
+
+/* ====== FlexRM interrupt handler ===== */
+
+static irqreturn_t flexrm_irq_event(int irq, void *dev_id)
+{
+       /* We only have MSI for completions so just wakeup IRQ thread */
+       /* Ring related errors will be informed via completion descriptors */
+
+       return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t flexrm_irq_thread(int irq, void *dev_id)
+{
+       flexrm_process_completions(dev_id);
+
+       return IRQ_HANDLED;
+}
+
+/* ====== FlexRM mailbox callbacks ===== */
+
+static int flexrm_send_data(struct mbox_chan *chan, void *data)
+{
+       int i, rc;
+       struct flexrm_ring *ring = chan->con_priv;
+       struct brcm_message *msg = data;
+
+       if (msg->type == BRCM_MESSAGE_BATCH) {
+               for (i = msg->batch.msgs_queued;
+                    i < msg->batch.msgs_count; i++) {
+                       rc = flexrm_new_request(ring, msg,
+                                                &msg->batch.msgs[i]);
+                       if (rc) {
+                               msg->error = rc;
+                               return rc;
+                       }
+                       msg->batch.msgs_queued++;
+               }
+               return 0;
+       }
+
+       return flexrm_new_request(ring, NULL, data);
+}
+
+static bool flexrm_peek_data(struct mbox_chan *chan)
+{
+       int cnt = flexrm_process_completions(chan->con_priv);
+
+       return (cnt > 0) ? true : false;
+}
+
+static int flexrm_startup(struct mbox_chan *chan)
+{
+       u64 d;
+       u32 val, off;
+       int ret = 0;
+       dma_addr_t next_addr;
+       struct flexrm_ring *ring = chan->con_priv;
+
+       /* Allocate BD memory */
+       ring->bd_base = dma_pool_alloc(ring->mbox->bd_pool,
+                                      GFP_KERNEL, &ring->bd_dma_base);
+       if (!ring->bd_base) {
+               dev_err(ring->mbox->dev, "can't allocate BD memory\n");
+               ret = -ENOMEM;
+               goto fail;
+       }
+
+       /* Configure next table pointer entries in BD memory */
+       for (off = 0; off < RING_BD_SIZE; off += RING_DESC_SIZE) {
+               next_addr = off + RING_DESC_SIZE;
+               if (next_addr == RING_BD_SIZE)
+                       next_addr = 0;
+               next_addr += ring->bd_dma_base;
+               if (RING_BD_ALIGN_CHECK(next_addr))
+                       d = flexrm_next_table_desc(RING_BD_TOGGLE_VALID(off),
+                                                   next_addr);
+               else
+                       d = flexrm_null_desc(RING_BD_TOGGLE_INVALID(off));
+               flexrm_write_desc(ring->bd_base + off, d);
+       }
+
+       /* Allocate completion memory */
+       ring->cmpl_base = dma_pool_alloc(ring->mbox->cmpl_pool,
+                                        GFP_KERNEL, &ring->cmpl_dma_base);
+       if (!ring->cmpl_base) {
+               dev_err(ring->mbox->dev, "can't allocate completion memory\n");
+               ret = -ENOMEM;
+               goto fail_free_bd_memory;
+       }
+       memset(ring->cmpl_base, 0, RING_CMPL_SIZE);
+
+       /* Request IRQ */
+       if (ring->irq == UINT_MAX) {
+               dev_err(ring->mbox->dev, "ring IRQ not available\n");
+               ret = -ENODEV;
+               goto fail_free_cmpl_memory;
+       }
+       ret = request_threaded_irq(ring->irq,
+                                  flexrm_irq_event,
+                                  flexrm_irq_thread,
+                                  0, dev_name(ring->mbox->dev), ring);
+       if (ret) {
+               dev_err(ring->mbox->dev, "failed to request ring IRQ\n");
+               goto fail_free_cmpl_memory;
+       }
+       ring->irq_requested = true;
+
+       /* Disable/inactivate ring */
+       writel_relaxed(0x0, ring->regs + RING_CONTROL);
+
+       /* Program BD start address */
+       val = BD_START_ADDR_VALUE(ring->bd_dma_base);
+       writel_relaxed(val, ring->regs + RING_BD_START_ADDR);
+
+       /* BD write pointer will be same as HW write pointer */
+       ring->bd_write_offset =
+                       readl_relaxed(ring->regs + RING_BD_WRITE_PTR);
+       ring->bd_write_offset *= RING_DESC_SIZE;
+
+       /* Program completion start address */
+       val = CMPL_START_ADDR_VALUE(ring->cmpl_dma_base);
+       writel_relaxed(val, ring->regs + RING_CMPL_START_ADDR);
+
+       /* Ensure last pending message is cleared */
+       ring->last_pending_msg = NULL;
+
+       /* Completion read pointer will be same as HW write pointer */
+       ring->cmpl_read_offset =
+                       readl_relaxed(ring->regs + RING_CMPL_WRITE_PTR);
+       ring->cmpl_read_offset *= RING_DESC_SIZE;
+
+       /* Read ring Tx, Rx, and Outstanding counts to clear */
+       readl_relaxed(ring->regs + RING_NUM_REQ_RECV_LS);
+       readl_relaxed(ring->regs + RING_NUM_REQ_RECV_MS);
+       readl_relaxed(ring->regs + RING_NUM_REQ_TRANS_LS);
+       readl_relaxed(ring->regs + RING_NUM_REQ_TRANS_MS);
+       readl_relaxed(ring->regs + RING_NUM_REQ_OUTSTAND);
+
+       /* Configure RING_MSI_CONTROL */
+       val = 0;
+       val |= (ring->msi_timer_val << MSI_TIMER_VAL_SHIFT);
+       val |= BIT(MSI_ENABLE_SHIFT);
+       val |= (ring->msi_count_threshold & MSI_COUNT_MASK) << MSI_COUNT_SHIFT;
+       writel_relaxed(val, ring->regs + RING_MSI_CONTROL);
+
+       /* Enable/activate ring */
+       val = BIT(CONTROL_ACTIVE_SHIFT);
+       writel_relaxed(val, ring->regs + RING_CONTROL);
+
+       return 0;
+
+fail_free_cmpl_memory:
+       dma_pool_free(ring->mbox->cmpl_pool,
+                     ring->cmpl_base, ring->cmpl_dma_base);
+       ring->cmpl_base = NULL;
+fail_free_bd_memory:
+       dma_pool_free(ring->mbox->bd_pool,
+                     ring->bd_base, ring->bd_dma_base);
+       ring->bd_base = NULL;
+fail:
+       return ret;
+}
+
+static void flexrm_shutdown(struct mbox_chan *chan)
+{
+       u32 reqid;
+       unsigned int timeout;
+       struct brcm_message *msg;
+       struct flexrm_ring *ring = chan->con_priv;
+
+       /* Disable/inactivate ring */
+       writel_relaxed(0x0, ring->regs + RING_CONTROL);
+
+       /* Flush ring with timeout of 1s */
+       timeout = 1000;
+       writel_relaxed(BIT(CONTROL_FLUSH_SHIFT),
+                       ring->regs + RING_CONTROL);
+       do {
+               if (readl_relaxed(ring->regs + RING_FLUSH_DONE) &
+                   FLUSH_DONE_MASK)
+                       break;
+               mdelay(1);
+       } while (timeout--);
+
+       /* Abort all in-flight requests */
+       for (reqid = 0; reqid < RING_MAX_REQ_COUNT; reqid++) {
+               msg = ring->requests[reqid];
+               if (!msg)
+                       continue;
+
+               /* Release reqid for recycling */
+               ring->requests[reqid] = NULL;
+               ida_simple_remove(&ring->requests_ida, reqid);
+
+               /* Unmap DMA mappings */
+               flexrm_dma_unmap(ring->mbox->dev, msg);
+
+               /* Give-back message to mailbox client */
+               msg->error = -EIO;
+               mbox_chan_received_data(chan, msg);
+       }
+
+       /* Release IRQ */
+       if (ring->irq_requested) {
+               free_irq(ring->irq, ring);
+               ring->irq_requested = false;
+       }
+
+       /* Free-up completion descriptor ring */
+       if (ring->cmpl_base) {
+               dma_pool_free(ring->mbox->cmpl_pool,
+                             ring->cmpl_base, ring->cmpl_dma_base);
+               ring->cmpl_base = NULL;
+       }
+
+       /* Free-up BD descriptor ring */
+       if (ring->bd_base) {
+               dma_pool_free(ring->mbox->bd_pool,
+                             ring->bd_base, ring->bd_dma_base);
+               ring->bd_base = NULL;
+       }
+}
+
+static bool flexrm_last_tx_done(struct mbox_chan *chan)
+{
+       bool ret;
+       unsigned long flags;
+       struct flexrm_ring *ring = chan->con_priv;
+
+       spin_lock_irqsave(&ring->lock, flags);
+       ret = (ring->last_pending_msg) ? false : true;
+       spin_unlock_irqrestore(&ring->lock, flags);
+
+       return ret;
+}
+
+static const struct mbox_chan_ops flexrm_mbox_chan_ops = {
+       .send_data      = flexrm_send_data,
+       .startup        = flexrm_startup,
+       .shutdown       = flexrm_shutdown,
+       .last_tx_done   = flexrm_last_tx_done,
+       .peek_data      = flexrm_peek_data,
+};
+
+static struct mbox_chan *flexrm_mbox_of_xlate(struct mbox_controller *cntlr,
+                                       const struct of_phandle_args *pa)
+{
+       struct mbox_chan *chan;
+       struct flexrm_ring *ring;
+
+       if (pa->args_count < 3)
+               return ERR_PTR(-EINVAL);
+
+       if (pa->args[0] >= cntlr->num_chans)
+               return ERR_PTR(-ENOENT);
+
+       if (pa->args[1] > MSI_COUNT_MASK)
+               return ERR_PTR(-EINVAL);
+
+       if (pa->args[2] > MSI_TIMER_VAL_MASK)
+               return ERR_PTR(-EINVAL);
+
+       chan = &cntlr->chans[pa->args[0]];
+       ring = chan->con_priv;
+       ring->msi_count_threshold = pa->args[1];
+       ring->msi_timer_val = pa->args[2];
+
+       return chan;
+}
+
+/* ====== FlexRM platform driver ===== */
+
+static void flexrm_mbox_msi_write(struct msi_desc *desc, struct msi_msg *msg)
+{
+       struct device *dev = msi_desc_to_dev(desc);
+       struct flexrm_mbox *mbox = dev_get_drvdata(dev);
+       struct flexrm_ring *ring = &mbox->rings[desc->platform.msi_index];
+
+       /* Configure per-Ring MSI registers */
+       writel_relaxed(msg->address_lo, ring->regs + RING_MSI_ADDR_LS);
+       writel_relaxed(msg->address_hi, ring->regs + RING_MSI_ADDR_MS);
+       writel_relaxed(msg->data, ring->regs + RING_MSI_DATA_VALUE);
+}
+
+static int flexrm_mbox_probe(struct platform_device *pdev)
+{
+       int index, ret = 0;
+       void __iomem *regs;
+       void __iomem *regs_end;
+       struct msi_desc *desc;
+       struct resource *iomem;
+       struct flexrm_ring *ring;
+       struct flexrm_mbox *mbox;
+       struct device *dev = &pdev->dev;
+
+       /* Allocate driver mailbox struct */
+       mbox = devm_kzalloc(dev, sizeof(*mbox), GFP_KERNEL);
+       if (!mbox) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+       mbox->dev = dev;
+       platform_set_drvdata(pdev, mbox);
+
+       /* Get resource for registers */
+       iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!iomem || (resource_size(iomem) < RING_REGS_SIZE)) {
+               ret = -ENODEV;
+               goto fail;
+       }
+
+       /* Map registers of all rings */
+       mbox->regs = devm_ioremap_resource(&pdev->dev, iomem);
+       if (IS_ERR(mbox->regs)) {
+               ret = PTR_ERR(mbox->regs);
+               dev_err(&pdev->dev, "Failed to remap mailbox regs: %d\n", ret);
+               goto fail;
+       }
+       regs_end = mbox->regs + resource_size(iomem);
+
+       /* Scan and count available rings */
+       mbox->num_rings = 0;
+       for (regs = mbox->regs; regs < regs_end; regs += RING_REGS_SIZE) {
+               if (readl_relaxed(regs + RING_VER) == RING_VER_MAGIC)
+                       mbox->num_rings++;
+       }
+       if (!mbox->num_rings) {
+               ret = -ENODEV;
+               goto fail;
+       }
+
+       /* Allocate driver ring structs */
+       ring = devm_kcalloc(dev, mbox->num_rings, sizeof(*ring), GFP_KERNEL);
+       if (!ring) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+       mbox->rings = ring;
+
+       /* Initialize members of driver ring structs */
+       regs = mbox->regs;
+       for (index = 0; index < mbox->num_rings; index++) {
+               ring = &mbox->rings[index];
+               ring->num = index;
+               ring->mbox = mbox;
+               while ((regs < regs_end) &&
+                      (readl_relaxed(regs + RING_VER) != RING_VER_MAGIC))
+                       regs += RING_REGS_SIZE;
+               if (regs_end <= regs) {
+                       ret = -ENODEV;
+                       goto fail;
+               }
+               ring->regs = regs;
+               regs += RING_REGS_SIZE;
+               ring->irq = UINT_MAX;
+               ring->irq_requested = false;
+               ring->msi_timer_val = MSI_TIMER_VAL_MASK;
+               ring->msi_count_threshold = 0x1;
+               ida_init(&ring->requests_ida);
+               memset(ring->requests, 0, sizeof(ring->requests));
+               ring->bd_base = NULL;
+               ring->bd_dma_base = 0;
+               ring->cmpl_base = NULL;
+               ring->cmpl_dma_base = 0;
+               spin_lock_init(&ring->lock);
+               ring->last_pending_msg = NULL;
+               ring->cmpl_read_offset = 0;
+       }
+
+       /* FlexRM is capable of 40-bit physical addresses only */
+       ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
+       if (ret) {
+               ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+               if (ret)
+                       goto fail;
+       }
+
+       /* Create DMA pool for ring BD memory */
+       mbox->bd_pool = dma_pool_create("bd", dev, RING_BD_SIZE,
+                                       1 << RING_BD_ALIGN_ORDER, 0);
+       if (!mbox->bd_pool) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+
+       /* Create DMA pool for ring completion memory */
+       mbox->cmpl_pool = dma_pool_create("cmpl", dev, RING_CMPL_SIZE,
+                                         1 << RING_CMPL_ALIGN_ORDER, 0);
+       if (!mbox->cmpl_pool) {
+               ret = -ENOMEM;
+               goto fail_destroy_bd_pool;
+       }
+
+       /* Allocate platform MSIs for each ring */
+       ret = platform_msi_domain_alloc_irqs(dev, mbox->num_rings,
+                                               flexrm_mbox_msi_write);
+       if (ret)
+               goto fail_destroy_cmpl_pool;
+
+       /* Save alloced IRQ numbers for each ring */
+       for_each_msi_entry(desc, dev) {
+               ring = &mbox->rings[desc->platform.msi_index];
+               ring->irq = desc->irq;
+       }
+
+       /* Initialize mailbox controller */
+       mbox->controller.txdone_irq = false;
+       mbox->controller.txdone_poll = true;
+       mbox->controller.txpoll_period = 1;
+       mbox->controller.ops = &flexrm_mbox_chan_ops;
+       mbox->controller.dev = dev;
+       mbox->controller.num_chans = mbox->num_rings;
+       mbox->controller.of_xlate = flexrm_mbox_of_xlate;
+       mbox->controller.chans = devm_kcalloc(dev, mbox->num_rings,
+                               sizeof(*mbox->controller.chans), GFP_KERNEL);
+       if (!mbox->controller.chans) {
+               ret = -ENOMEM;
+               goto fail_free_msis;
+       }
+       for (index = 0; index < mbox->num_rings; index++)
+               mbox->controller.chans[index].con_priv = &mbox->rings[index];
+
+       /* Register mailbox controller */
+       ret = mbox_controller_register(&mbox->controller);
+       if (ret)
+               goto fail_free_msis;
+
+       dev_info(dev, "registered flexrm mailbox with %d channels\n",
+                       mbox->controller.num_chans);
+
+       return 0;
+
+fail_free_msis:
+       platform_msi_domain_free_irqs(dev);
+fail_destroy_cmpl_pool:
+       dma_pool_destroy(mbox->cmpl_pool);
+fail_destroy_bd_pool:
+       dma_pool_destroy(mbox->bd_pool);
+fail:
+       return ret;
+}
+
+static int flexrm_mbox_remove(struct platform_device *pdev)
+{
+       int index;
+       struct device *dev = &pdev->dev;
+       struct flexrm_ring *ring;
+       struct flexrm_mbox *mbox = platform_get_drvdata(pdev);
+
+       mbox_controller_unregister(&mbox->controller);
+
+       platform_msi_domain_free_irqs(dev);
+
+       dma_pool_destroy(mbox->cmpl_pool);
+       dma_pool_destroy(mbox->bd_pool);
+
+       for (index = 0; index < mbox->num_rings; index++) {
+               ring = &mbox->rings[index];
+               ida_destroy(&ring->requests_ida);
+       }
+
+       return 0;
+}
+
+static const struct of_device_id flexrm_mbox_of_match[] = {
+       { .compatible = "brcm,iproc-flexrm-mbox", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, flexrm_mbox_of_match);
+
+static struct platform_driver flexrm_mbox_driver = {
+       .driver = {
+               .name = "brcm-flexrm-mbox",
+               .of_match_table = flexrm_mbox_of_match,
+       },
+       .probe          = flexrm_mbox_probe,
+       .remove         = flexrm_mbox_remove,
+};
+module_platform_driver(flexrm_mbox_driver);
+
+MODULE_AUTHOR("Anup Patel <anup.patel@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom FlexRM mailbox driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c

index 2aeb034d5fb9cd4d0ce14e5c26a10fb6abf62538..4fe7be0bdd11f2e836897e9783b3c7fec8a7534d 100644 (file)
--- a/drivers/mailbox/bcm-pdc-mailbox.c
+++ b/drivers/mailbox/bcm-pdc-mailbox.c
@@ -18,7 +18,8 @@
   * Broadcom PDC Mailbox Driver
   * The PDC provides a ring based programming interface to one or more hardware
   * offload engines. For example, the PDC driver works with both SPU-M and SPU2
- * cryptographic offload hardware. In some chips the PDC is referred to as MDE.
+ * cryptographic offload hardware. In some chips the PDC is referred to as MDE,
+ * and in others the FA2/FA+ hardware is used with this PDC driver.
   *
   * The PDC driver registers with the Linux mailbox framework as a mailbox
   * controller, once for each PDC instance. Ring 0 for each PDC is registered as
@@ -108,6 +109,7 @@
  #define PDC_INTMASK_OFFSET   0x24
  #define PDC_INTSTATUS_OFFSET 0x20
  #define PDC_RCVLAZY0_OFFSET  (0x30 + 4 * PDC_RINGSET)
+#define FA_RCVLAZY0_OFFSET   0x100
  
  /*
   * For SPU2, configure MDE_CKSUM_CONTROL to write 17 bytes of metadata
@@ -162,6 +164,11 @@
  /* Maximum size buffer the DMA engine can handle */
  #define PDC_DMA_BUF_MAX 16384
  
+enum pdc_hw {
+       FA_HW,          /* FA2/FA+ hardware (i.e. Northstar Plus) */
+       PDC_HW          /* PDC/MDE hardware (i.e. Northstar 2, Pegasus) */
+};
+
  struct pdc_dma_map {
         void *ctx;          /* opaque context associated with frame */
  };
@@ -211,13 +218,13 @@ struct pdc_regs {
         u32  gptimer;                /* 0x028 */
  
         u32  PAD;
-       u32  intrcvlazy_0;           /* 0x030 */
-       u32  intrcvlazy_1;           /* 0x034 */
-       u32  intrcvlazy_2;           /* 0x038 */
-       u32  intrcvlazy_3;           /* 0x03c */
+       u32  intrcvlazy_0;           /* 0x030 (Only in PDC, not FA2) */
+       u32  intrcvlazy_1;           /* 0x034 (Only in PDC, not FA2) */
+       u32  intrcvlazy_2;           /* 0x038 (Only in PDC, not FA2) */
+       u32  intrcvlazy_3;           /* 0x03c (Only in PDC, not FA2) */
  
         u32  PAD[48];
-       u32  removed_intrecvlazy;    /* 0x100 */
+       u32  fa_intrecvlazy;         /* 0x100 (Only in FA2, not PDC) */
         u32  flowctlthresh;          /* 0x104 */
         u32  wrrthresh;              /* 0x108 */
         u32  gmac_idle_cnt_thresh;   /* 0x10c */
@@ -243,7 +250,7 @@ struct pdc_regs {
         u32  serdes_status1;         /* 0x1b0 */
         u32  PAD[11];                /* 0x1b4-1dc */
         u32  clk_ctl_st;             /* 0x1e0 */
-       u32  hw_war;                 /* 0x1e4 */
+       u32  hw_war;                 /* 0x1e4 (Only in PDC, not FA2) */
         u32  pwrctl;                 /* 0x1e8 */
         u32  PAD[5];
  
@@ -410,6 +417,9 @@ struct pdc_state {
         u32  txnobuf;          /* unable to create tx descriptor */
         u32  rxnobuf;          /* unable to create rx descriptor */
         u32  rx_oflow;         /* count of rx overflows */
+
+       /* hardware type - FA2 or PDC/MDE */
+       enum pdc_hw hw_type;
  };
  
  /* Global variables */
@@ -1396,7 +1406,13 @@ static int pdc_interrupts_init(struct pdc_state *pdcs)
  
         /* interrupt configuration */
         iowrite32(PDC_INTMASK, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET);
-       iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase + PDC_RCVLAZY0_OFFSET);
+
+       if (pdcs->hw_type == FA_HW)
+               iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase +
+                         FA_RCVLAZY0_OFFSET);
+       else
+               iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase +
+                         PDC_RCVLAZY0_OFFSET);
  
         /* read irq from device tree */
         pdcs->pdc_irq = irq_of_parse_and_map(dn, 0);
@@ -1465,6 +1481,17 @@ static int pdc_mb_init(struct pdc_state *pdcs)
         return 0;
  }
  
+/* Device tree API */
+static const int pdc_hw = PDC_HW;
+static const int fa_hw = FA_HW;
+
+static const struct of_device_id pdc_mbox_of_match[] = {
+       {.compatible = "brcm,iproc-pdc-mbox", .data = &pdc_hw},
+       {.compatible = "brcm,iproc-fa2-mbox", .data = &fa_hw},
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, pdc_mbox_of_match);
+
  /**
   * pdc_dt_read() - Read application-specific data from device tree.
   * @pdev:  Platform device
@@ -1481,6 +1508,8 @@ static int pdc_dt_read(struct platform_device *pdev, struct pdc_state *pdcs)
  {
         struct device *dev = &pdev->dev;
         struct device_node *dn = pdev->dev.of_node;
+       const struct of_device_id *match;
+       const int *hw_type;
         int err;
  
         err = of_property_read_u32(dn, "brcm,rx-status-len",
@@ -1492,6 +1521,14 @@ static int pdc_dt_read(struct platform_device *pdev, struct pdc_state *pdcs)
  
         pdcs->use_bcm_hdr = of_property_read_bool(dn, "brcm,use-bcm-hdr");
  
+       pdcs->hw_type = PDC_HW;
+
+       match = of_match_device(of_match_ptr(pdc_mbox_of_match), dev);
+       if (match != NULL) {
+               hw_type = match->data;
+               pdcs->hw_type = *hw_type;
+       }
+
         return 0;
  }
  
@@ -1525,7 +1562,7 @@ static int pdc_probe(struct platform_device *pdev)
         pdcs->pdc_idx = pdcg.num_spu;
         pdcg.num_spu++;
  
-       err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+       err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39));
         if (err) {
                 dev_warn(dev, "PDC device cannot perform DMA. Error %d.", err);
                 goto cleanup;
@@ -1611,12 +1648,6 @@ static int pdc_remove(struct platform_device *pdev)
         return 0;
  }
  
-static const struct of_device_id pdc_mbox_of_match[] = {
-       {.compatible = "brcm,iproc-pdc-mbox"},
-       { /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, pdc_mbox_of_match);
-
  static struct platform_driver pdc_mbox_driver = {
         .probe = pdc_probe,
         .remove = pdc_remove,
diff --git a/drivers/mailbox/hi6220-mailbox.c b/drivers/mailbox/hi6220-mailbox.c

index 613722db5daf314fd376e96ec18209b7d2132fd5..519376d3534cfa05d623819b4ceb3f5a398e0cf3 100644 (file)
--- a/drivers/mailbox/hi6220-mailbox.c
+++ b/drivers/mailbox/hi6220-mailbox.c
@@ -221,7 +221,7 @@ static void hi6220_mbox_shutdown(struct mbox_chan *chan)
         mbox->irq_map_chan[mchan->ack_irq] = NULL;
  }
  
-static struct mbox_chan_ops hi6220_mbox_ops = {
+static const struct mbox_chan_ops hi6220_mbox_ops = {
         .send_data    = hi6220_mbox_send_data,
         .startup      = hi6220_mbox_startup,
         .shutdown     = hi6220_mbox_shutdown,
diff --git a/drivers/mailbox/mailbox-xgene-slimpro.c b/drivers/mailbox/mailbox-xgene-slimpro.c

index dd2afbca51c9b8d2c155003548d31da28f81c3ce..a7040163dd43983d26071faec7ac9f815db9b799 100644 (file)
--- a/drivers/mailbox/mailbox-xgene-slimpro.c
+++ b/drivers/mailbox/mailbox-xgene-slimpro.c
@@ -174,7 +174,7 @@ static void slimpro_mbox_shutdown(struct mbox_chan *chan)
         devm_free_irq(mb_chan->dev, mb_chan->irq, mb_chan);
  }
  
-static struct mbox_chan_ops slimpro_mbox_ops = {
+static const struct mbox_chan_ops slimpro_mbox_ops = {
         .send_data = slimpro_mbox_send_data,
         .startup = slimpro_mbox_startup,
         .shutdown = slimpro_mbox_shutdown,
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c

index 4671f8a128723010dafe2d2f6f48df75a492c9fb..9dfbf7ea10a230579fbe4d102d491b045a8ea5b2 100644 (file)
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -103,11 +103,14 @@ static void tx_tick(struct mbox_chan *chan, int r)
         /* Submit next message */
         msg_submit(chan);
  
+       if (!mssg)
+               return;
+
         /* Notify the client */
-       if (mssg && chan->cl->tx_done)
+       if (chan->cl->tx_done)
                 chan->cl->tx_done(chan->cl, mssg, r);
  
-       if (chan->cl->tx_block)
+       if (r != -ETIME && chan->cl->tx_block)
                 complete(&chan->tx_complete);
  }
  
@@ -260,7 +263,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg)
  
         msg_submit(chan);
  
-       if (chan->cl->tx_block && chan->active_req) {
+       if (chan->cl->tx_block) {
                 unsigned long wait;
                 int ret;
  
@@ -271,8 +274,8 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg)
  
                 ret = wait_for_completion_timeout(&chan->tx_complete, wait);
                 if (ret == 0) {
-                       t = -EIO;
-                       tx_tick(chan, -EIO);
+                       t = -ETIME;
+                       tx_tick(chan, t);
                 }
         }
  
@@ -453,6 +456,12 @@ int mbox_controller_register(struct mbox_controller *mbox)
                 txdone = TXDONE_BY_ACK;
  
         if (txdone == TXDONE_BY_POLL) {
+
+               if (!mbox->ops->last_tx_done) {
+                       dev_err(mbox->dev, "last_tx_done method is absent\n");
+                       return -EINVAL;
+               }
+
                 hrtimer_init(&mbox->poll_hrt, CLOCK_MONOTONIC,
                              HRTIMER_MODE_REL);
                 mbox->poll_hrt.function = txdone_hrtimer;
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h

index a126919ed102763e9d86da2a9ce615ff0b8a2001..5d13930f0f22fc42e40228cee8eee1ecb7cfa8e5 100644 (file)
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -4,7 +4,6 @@
  
  #include <linux/blkdev.h>
  #include <linux/errno.h>
-#include <linux/blkdev.h>
  #include <linux/kernel.h>
  #include <linux/sched/clock.h>
  #include <linux/llist.h>
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c

index e4c2c1a1e9933282fa2b971999270199c8d9c224..6735c8d6a44551a6dd9cc25fa9b505316bfde060 100644 (file)
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -932,7 +932,7 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
         *result = true;
  
         r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
-                                  from_cblock(begin), &cmd->dirty_cursor);
+                                  from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
         if (r) {
                 DMERR("%s: dm_bitset_cursor_begin for dirty failed", __func__);
                 return r;
@@ -959,14 +959,16 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
                         return 0;
                 }
  
+               begin = to_cblock(from_cblock(begin) + 1);
+               if (begin == end)
+                       break;
+
                 r = dm_bitset_cursor_next(&cmd->dirty_cursor);
                 if (r) {
                         DMERR("%s: dm_bitset_cursor_next for dirty failed", __func__);
                         dm_bitset_cursor_end(&cmd->dirty_cursor);
                         return r;
                 }
-
-               begin = to_cblock(from_cblock(begin) + 1);
         }
  
         dm_bitset_cursor_end(&cmd->dirty_cursor);
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c

index 9c689b34e6e792105d64f2bb4835e11e37c91578..975922c8f23143c4fcf586714015343f9ed0b41d 100644 (file)
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2773,7 +2773,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
  
         ti->num_discard_bios = 1;
         ti->discards_supported = true;
-       ti->discard_zeroes_data_unsupported = true;
         ti->split_discard_bios = false;
  
         cache->features = ca->features;
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h

index 136fda3ff9e55d46a2ef686e8655ba50754d5c20..fea5bd52ada8fa6c01cf57fc681746854c635569 100644 (file)
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -132,6 +132,7 @@ void dm_init_md_queue(struct mapped_device *md);
  void dm_init_normal_md_queue(struct mapped_device *md);
  int md_in_flight(struct mapped_device *md);
  void disable_write_same(struct mapped_device *md);
+void disable_write_zeroes(struct mapped_device *md);
  
  static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
  {
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c

index 389a3637ffcc637c9a4540cec4db3b65a5cf773f..ef1d836bd81b61ec8f79a1e305685f272c776580 100644 (file)
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2030,7 +2030,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
         wake_up_process(cc->write_thread);
  
         ti->num_flush_bios = 1;
-       ti->discard_zeroes_data_unsupported = true;
  
         return 0;
  
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c

index 03940bf36f6ccf79b52ec5552cb636d69274b42d..3702e502466d37a902c64a74a1f5ad7b516770bb 100644 (file)
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -312,9 +312,12 @@ static void do_region(int op, int op_flags, unsigned region,
          */
         if (op == REQ_OP_DISCARD)
                 special_cmd_max_sectors = q->limits.max_discard_sectors;
+       else if (op == REQ_OP_WRITE_ZEROES)
+               special_cmd_max_sectors = q->limits.max_write_zeroes_sectors;
         else if (op == REQ_OP_WRITE_SAME)
                 special_cmd_max_sectors = q->limits.max_write_same_sectors;
-       if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_SAME) &&
+       if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
+            op == REQ_OP_WRITE_SAME)  &&
             special_cmd_max_sectors == 0) {
                 dec_count(io, region, -EOPNOTSUPP);
                 return;
@@ -328,11 +331,18 @@ static void do_region(int op, int op_flags, unsigned region,
                 /*
                  * Allocate a suitably sized-bio.
                  */
-               if ((op == REQ_OP_DISCARD) || (op == REQ_OP_WRITE_SAME))
+               switch (op) {
+               case REQ_OP_DISCARD:
+               case REQ_OP_WRITE_ZEROES:
+                       num_bvecs = 0;
+                       break;
+               case REQ_OP_WRITE_SAME:
                         num_bvecs = 1;
-               else
+                       break;
+               default:
                         num_bvecs = min_t(int, BIO_MAX_PAGES,
                                           dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
+               }
  
                 bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
                 bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
@@ -341,7 +351,7 @@ static void do_region(int op, int op_flags, unsigned region,
                 bio_set_op_attrs(bio, op, op_flags);
                 store_io_and_region_in_bio(bio, io, region);
  
-               if (op == REQ_OP_DISCARD) {
+               if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) {
                         num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
                         bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
                         remaining -= num_sectors;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c

index 9e9d04cb7d51f53e8292674abd9198911f8a3dc8..f85846741d508ca171260e0aab42f4591383db64 100644 (file)
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -733,11 +733,11 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
                 job->pages = &zero_page_list;
  
                 /*
-                * Use WRITE SAME to optimize zeroing if all dests support it.
+                * Use WRITE ZEROES to optimize zeroing if all dests support it.
                  */
-               job->rw = REQ_OP_WRITE_SAME;
+               job->rw = REQ_OP_WRITE_ZEROES;
                 for (i = 0; i < job->num_dests; i++)
-                       if (!bdev_write_same(job->dests[i].bdev)) {
+                       if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) {
                                 job->rw = WRITE;
                                 break;
                         }
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c

index 4788b0b989a9bac661f07a8deb2c7a86a96c8677..e17fd44ceef534352dfd2b9b15d787399bbaf065 100644 (file)
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -59,6 +59,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
         ti->num_flush_bios = 1;
         ti->num_discard_bios = 1;
         ti->num_write_same_bios = 1;
+       ti->num_write_zeroes_bios = 1;
         ti->private = lc;
         return 0;
  
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c

index 7f223dbed49f61d5797d4edba728989c9e435d3d..2950b145443d7e1c26f9831490406869f6ed2323 100644 (file)
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1103,6 +1103,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
         ti->num_flush_bios = 1;
         ti->num_discard_bios = 1;
         ti->num_write_same_bios = 1;
+       ti->num_write_zeroes_bios = 1;
         if (m->queue_mode == DM_TYPE_BIO_BASED)
                 ti->per_io_data_size = multipath_per_bio_data_size();
         else
@@ -1491,7 +1492,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
          */
         int r = DM_ENDIO_REQUEUE;
  
-       if (!error && !clone->errors)
+       if (!error)
                 return 0;       /* I/O complete */
  
         if (noretry_error(error))
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c

index f8564d63982f43f9e3fb453fc9adedf3f14be51a..2dae3e5b851cef95f13c9119a8774362b253f313 100644 (file)
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2813,7 +2813,9 @@ static void configure_discard_support(struct raid_set *rs)
         /* Assume discards not supported until after checks below. */
         ti->discards_supported = false;
  
-       /* RAID level 4,5,6 require discard_zeroes_data for data integrity! */
+       /*
+        * XXX: RAID level 4,5,6 require zeroing for safety.
+        */
         raid456 = (rs->md.level == 4 || rs->md.level == 5 || rs->md.level == 6);
  
         for (i = 0; i < rs->raid_disks; i++) {
@@ -2827,8 +2829,6 @@ static void configure_discard_support(struct raid_set *rs)
                         return;
  
                 if (raid456) {
-                       if (!q->limits.discard_zeroes_data)
-                               return;
                         if (!devices_handle_discard_safely) {
                                 DMERR("raid456 discard support disabled due to discard_zeroes_data uncertainty.");
                                 DMERR("Set dm-raid.devices_handle_discard_safely=Y to override.");
@@ -3726,7 +3726,7 @@ static int raid_preresume(struct dm_target *ti)
                 return r;
  
         /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */
-       if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) &&
+       if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
             mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) {
                 r = bitmap_resize(mddev->bitmap, mddev->dev_sectors,
                                   to_bytes(rs->requested_bitmap_chunk_sectors), 0);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c

index 2ddc2d20e62d17c603900959d40f541bf5eec076..a95cbb80fb34444144bad346b3e769c625e8c788 100644 (file)
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1124,7 +1124,6 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
         ti->num_flush_bios = 1;
         ti->num_discard_bios = 1;
         ti->per_io_data_size = sizeof(struct dm_raid1_bio_record);
-       ti->discard_zeroes_data_unsupported = true;
  
         ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_MEM_RECLAIM, 0);
         if (!ms->kmirrord_wq) {
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c

index 28955b94d2b26f47d7c54217d84c2a8a11af692a..bff7e3bdb4ed1d6bf8ff367e11fa13749dfd6776 100644 (file)
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -298,9 +298,14 @@ static void dm_done(struct request *clone, int error, bool mapped)
                         r = rq_end_io(tio->ti, clone, error, &tio->info);
         }
  
-       if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) &&
-                    !clone->q->limits.max_write_same_sectors))
-               disable_write_same(tio->md);
+       if (unlikely(r == -EREMOTEIO)) {
+               if (req_op(clone) == REQ_OP_WRITE_SAME &&
+                   !clone->q->limits.max_write_same_sectors)
+                       disable_write_same(tio->md);
+               if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
+                   !clone->q->limits.max_write_zeroes_sectors)
+                       disable_write_zeroes(tio->md);
+       }
  
         if (r <= 0)
                 /* The target wants to complete the I/O */
@@ -358,7 +363,7 @@ static void dm_complete_request(struct request *rq, int error)
         if (!rq->q->mq_ops)
                 blk_complete_request(rq);
         else
-               blk_mq_complete_request(rq, error);
+               blk_mq_complete_request(rq);
  }
  
  /*
@@ -755,13 +760,14 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
                 /* Undo dm_start_request() before requeuing */
                 rq_end_stats(md, rq);
                 rq_completed(md, rq_data_dir(rq), false);
+               blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
                 return BLK_MQ_RQ_QUEUE_BUSY;
         }
  
         return BLK_MQ_RQ_QUEUE_OK;
  }
  
-static struct blk_mq_ops dm_mq_ops = {
+static const struct blk_mq_ops dm_mq_ops = {
         .queue_rq = dm_mq_queue_rq,
         .complete = dm_softirq_done,
         .init_request = dm_mq_init_request,
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c

index 28193a57bf471e30da0bedd944fe148283584b9c..5ef49c121d9955dfafaf7b4ba30c50f3449c1df0 100644 (file)
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -169,6 +169,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
         ti->num_flush_bios = stripes;
         ti->num_discard_bios = stripes;
         ti->num_write_same_bios = stripes;
+       ti->num_write_zeroes_bios = stripes;
  
         sc->chunk_size = chunk_size;
         if (chunk_size & (chunk_size - 1))
@@ -293,6 +294,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
                 return DM_MAPIO_REMAPPED;
         }
         if (unlikely(bio_op(bio) == REQ_OP_DISCARD) ||
+           unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES) ||
             unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) {
                 target_bio_nr = dm_bio_get_target_bio_nr(bio);
                 BUG_ON(target_bio_nr >= sc->stripes);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c

index 3ad16d9c9d5aae956afc7b3c7ab820b4f52de4ed..958275aca008454a460b0fbe22f543bad2f4172a 100644 (file)
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1449,22 +1449,6 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
         return false;
  }
  
-static bool dm_table_discard_zeroes_data(struct dm_table *t)
-{
-       struct dm_target *ti;
-       unsigned i = 0;
-
-       /* Ensure that all targets supports discard_zeroes_data. */
-       while (i < dm_table_get_num_targets(t)) {
-               ti = dm_table_get_target(t, i++);
-
-               if (ti->discard_zeroes_data_unsupported)
-                       return false;
-       }
-
-       return true;
-}
-
  static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
                             sector_t start, sector_t len, void *data)
  {
@@ -1533,6 +1517,34 @@ static bool dm_table_supports_write_same(struct dm_table *t)
         return true;
  }
  
+static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev,
+                                          sector_t start, sector_t len, void *data)
+{
+       struct request_queue *q = bdev_get_queue(dev->bdev);
+
+       return q && !q->limits.max_write_zeroes_sectors;
+}
+
+static bool dm_table_supports_write_zeroes(struct dm_table *t)
+{
+       struct dm_target *ti;
+       unsigned i = 0;
+
+       while (i < dm_table_get_num_targets(t)) {
+               ti = dm_table_get_target(t, i++);
+
+               if (!ti->num_write_zeroes_bios)
+                       return false;
+
+               if (!ti->type->iterate_devices ||
+                   ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL))
+                       return false;
+       }
+
+       return true;
+}
+
+
  static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
                                   sector_t start, sector_t len, void *data)
  {
@@ -1592,9 +1604,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
         }
         blk_queue_write_cache(q, wc, fua);
  
-       if (!dm_table_discard_zeroes_data(t))
-               q->limits.discard_zeroes_data = 0;
-
         /* Ensure that all underlying devices are non-rotational. */
         if (dm_table_all_devices_attribute(t, device_is_nonrot))
                 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
@@ -1603,6 +1612,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
  
         if (!dm_table_supports_write_same(t))
                 q->limits.max_write_same_sectors = 0;
+       if (!dm_table_supports_write_zeroes(t))
+               q->limits.max_write_zeroes_sectors = 0;
  
         if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
                 queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c

index 2b266a2b5035b9fa699eb1afdf4c5dc53da54c66..a5f1916f621a9972449dbbfa2408397484a5d7f3 100644 (file)
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3263,7 +3263,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
          * them down to the data device.  The thin device's discard
          * processing will cause mappings to be removed from the btree.
          */
-       ti->discard_zeroes_data_unsupported = true;
         if (pf.discard_enabled && pf.discard_passdown) {
                 ti->num_discard_bios = 1;
  
@@ -4119,7 +4118,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
         ti->per_io_data_size = sizeof(struct dm_thin_endio_hook);
  
         /* In case the pool supports discards, pass them on. */
-       ti->discard_zeroes_data_unsupported = true;
         if (tc->pool->pf.discard_enabled) {
                 ti->discards_supported = true;
                 ti->num_discard_bios = 1;
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c

index 0f0eb8a3d922a212583ba491f3899a69a2b91bd6..78f36012eacacb3ca16888b880c0416f8234a14d 100644 (file)
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -146,8 +146,6 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
                 block = fec_buffer_rs_block(v, fio, n, i);
                 res = fec_decode_rs8(v, fio, block, &par[offset], neras);
                 if (res < 0) {
-                       dm_bufio_release(buf);
-
                         r = res;
                         goto error;
                 }
@@ -172,6 +170,8 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
  done:
         r = corrected;
  error:
+       dm_bufio_release(buf);
+
         if (r < 0 && neras)
                 DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
                             v->data_dev->name, (unsigned long long)rsb, r);
@@ -269,7 +269,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
                                           &is_zero) == 0) {
                         /* skip known zero blocks entirely */
                         if (is_zero)
-                               continue;
+                               goto done;
  
                         /*
                          * skip if we have already found the theoretical
@@ -439,6 +439,13 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
         if (!verity_fec_is_enabled(v))
                 return -EOPNOTSUPP;
  
+       if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) {
+               DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name);
+               return -EIO;
+       }
+
+       fio->level++;
+
         if (type == DM_VERITY_BLOCK_TYPE_METADATA)
                 block += v->data_blocks;
  
@@ -470,7 +477,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
         if (r < 0) {
                 r = fec_decode_rsb(v, io, fio, rsb, offset, true);
                 if (r < 0)
-                       return r;
+                       goto done;
         }
  
         if (dest)
@@ -480,6 +487,8 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
                 r = verity_for_bv_block(v, io, iter, fec_bv_copy);
         }
  
+done:
+       fio->level--;
         return r;
  }
  
@@ -520,6 +529,7 @@ void verity_fec_init_io(struct dm_verity_io *io)
         memset(fio->bufs, 0, sizeof(fio->bufs));
         fio->nbufs = 0;
         fio->output = NULL;
+       fio->level = 0;
  }
  
  /*
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h

index 7fa0298b995e9e3f1ac25c2356c758fad8b65d34..bb31ce87a933b80d11140f31607f804ff81d209a 100644 (file)
--- a/drivers/md/dm-verity-fec.h
+++ b/drivers/md/dm-verity-fec.h
@@ -27,6 +27,9 @@
  #define DM_VERITY_FEC_BUF_MAX \
         (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
  
+/* maximum recursion level for verity_fec_decode */
+#define DM_VERITY_FEC_MAX_RECURSION    4
+
  #define DM_VERITY_OPT_FEC_DEV          "use_fec_from_device"
  #define DM_VERITY_OPT_FEC_BLOCKS       "fec_blocks"
  #define DM_VERITY_OPT_FEC_START                "fec_start"
@@ -58,6 +61,7 @@ struct dm_verity_fec_io {
         unsigned nbufs;         /* number of buffers allocated */
         u8 *output;             /* buffer for corrected output */
         size_t output_pos;
+       unsigned level;         /* recursion level */
  };
  
  #ifdef CONFIG_DM_VERITY_FEC
diff --git a/drivers/md/dm.c b/drivers/md/dm.c

index f4ffd1eb8f44c3d5c44c50277fb703545157dbcf..8bf397729bbd28a964f58fe3bba3ca885bfc08ab 100644 (file)
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -810,7 +810,6 @@ static void dec_pending(struct dm_io *io, int error)
                         queue_io(md, bio);
                 } else {
                         /* done with normal IO or empty flush */
-                       trace_block_bio_complete(md->queue, bio, io_error);
                         bio->bi_error = io_error;
                         bio_endio(bio);
                 }
@@ -825,6 +824,14 @@ void disable_write_same(struct mapped_device *md)
         limits->max_write_same_sectors = 0;
  }
  
+void disable_write_zeroes(struct mapped_device *md)
+{
+       struct queue_limits *limits = dm_get_queue_limits(md);
+
+       /* device doesn't really support WRITE ZEROES, disable it */
+       limits->max_write_zeroes_sectors = 0;
+}
+
  static void clone_endio(struct bio *bio)
  {
         int error = bio->bi_error;
@@ -851,9 +858,14 @@ static void clone_endio(struct bio *bio)
                 }
         }
  
-       if (unlikely(r == -EREMOTEIO && (bio_op(bio) == REQ_OP_WRITE_SAME) &&
-                    !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
-               disable_write_same(md);
+       if (unlikely(r == -EREMOTEIO)) {
+               if (bio_op(bio) == REQ_OP_WRITE_SAME &&
+                   !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
+                       disable_write_same(md);
+               if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+                   !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors)
+                       disable_write_zeroes(md);
+       }
  
         free_tio(tio);
         dec_pending(io, error);
@@ -989,26 +1001,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
         struct dm_offload *o = container_of(cb, struct dm_offload, cb);
         struct bio_list list;
         struct bio *bio;
+       int i;
  
         INIT_LIST_HEAD(&o->cb.list);
  
         if (unlikely(!current->bio_list))
                 return;
  
-       list = *current->bio_list;
-       bio_list_init(current->bio_list);
-
-       while ((bio = bio_list_pop(&list))) {
-               struct bio_set *bs = bio->bi_pool;
-               if (unlikely(!bs) || bs == fs_bio_set) {
-                       bio_list_add(current->bio_list, bio);
-                       continue;
+       for (i = 0; i < 2; i++) {
+               list = current->bio_list[i];
+               bio_list_init(&current->bio_list[i]);
+
+               while ((bio = bio_list_pop(&list))) {
+                       struct bio_set *bs = bio->bi_pool;
+                       if (unlikely(!bs) || bs == fs_bio_set) {
+                               bio_list_add(&current->bio_list[i], bio);
+                               continue;
+                       }
+
+                       spin_lock(&bs->rescue_lock);
+                       bio_list_add(&bs->rescue_list, bio);
+                       queue_work(bs->rescue_workqueue, &bs->rescue_work);
+                       spin_unlock(&bs->rescue_lock);
                 }
-
-               spin_lock(&bs->rescue_lock);
-               bio_list_add(&bs->rescue_list, bio);
-               queue_work(bs->rescue_workqueue, &bs->rescue_work);
-               spin_unlock(&bs->rescue_lock);
         }
  }
  
@@ -1199,6 +1214,11 @@ static unsigned get_num_write_same_bios(struct dm_target *ti)
         return ti->num_write_same_bios;
  }
  
+static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
+{
+       return ti->num_write_zeroes_bios;
+}
+
  typedef bool (*is_split_required_fn)(struct dm_target *ti);
  
  static bool is_split_required_for_discard(struct dm_target *ti)
@@ -1253,6 +1273,11 @@ static int __send_write_same(struct clone_info *ci)
         return __send_changing_extent_only(ci, get_num_write_same_bios, NULL);
  }
  
+static int __send_write_zeroes(struct clone_info *ci)
+{
+       return __send_changing_extent_only(ci, get_num_write_zeroes_bios, NULL);
+}
+
  /*
   * Select the correct strategy for processing a non-flush bio.
   */
@@ -1267,6 +1292,8 @@ static int __split_and_process_non_flush(struct clone_info *ci)
                 return __send_discard(ci);
         else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
                 return __send_write_same(ci);
+       else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES))
+               return __send_write_zeroes(ci);
  
         ti = dm_table_find_target(ci->map, ci->sector);
         if (!dm_target_is_valid(ti))
diff --git a/drivers/md/linear.c b/drivers/md/linear.c

index 3e38e0207a3eb44339ad6431dc3557ae27d05612..377a8a3672e3da107119cadaf6f8add62eca6f5b 100644 (file)
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -293,6 +293,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
                                                       split, disk_devt(mddev->gendisk),
                                                       bio_sector);
                         mddev_check_writesame(mddev, split);
+                       mddev_check_write_zeroes(mddev, split);
                         generic_make_request(split);
                 }
         } while (split != bio);
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c

index 2b13117fb918cbe27775ba61cc68c6f78e5408ff..321ecac23027804d18ded577a5c05604ec46220a 100644 (file)
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -777,7 +777,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
                 bm_lockres->flags |= DLM_LKF_NOQUEUE;
                 ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
                 if (ret == -EAGAIN) {
-                       memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
                         s = read_resync_info(mddev, bm_lockres);
                         if (s) {
                                 pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
@@ -974,6 +973,7 @@ static int leave(struct mddev *mddev)
         lockres_free(cinfo->bitmap_lockres);
         unlock_all_bitmaps(mddev);
         dlm_release_lockspace(cinfo->lockspace, 2);
+       kfree(cinfo);
         return 0;
  }
  
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 548d1b8014f89e9f4b1170daff8fa677d758f39a..f6ae1d67bcd02c6b743258ef3ff6a05896828cb5 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
  }
  EXPORT_SYMBOL(md_flush_request);
  
-void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
-{
-       struct mddev *mddev = cb->data;
-       md_wakeup_thread(mddev->thread);
-       kfree(cb);
-}
-EXPORT_SYMBOL(md_unplug);
-
  static inline struct mddev *mddev_get(struct mddev *mddev)
  {
         atomic_inc(&mddev->active);
@@ -1887,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
         }
         sb = page_address(rdev->sb_page);
         sb->data_size = cpu_to_le64(num_sectors);
-       sb->super_offset = rdev->sb_start;
+       sb->super_offset = cpu_to_le64(rdev->sb_start);
         sb->sb_csum = calc_sb_1_csum(sb);
         do {
                 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
@@ -2295,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev)
         /* Check if any mddev parameters have changed */
         if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
             (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
-           (mddev->layout != le64_to_cpu(sb->layout)) ||
+           (mddev->layout != le32_to_cpu(sb->layout)) ||
             (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
             (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
                 return true;
@@ -6458,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
         mddev->layout        = info->layout;
         mddev->chunk_sectors = info->chunk_size >> 9;
  
-       mddev->max_disks     = MD_SB_DISKS;
-
         if (mddev->persistent) {
-               mddev->flags         = 0;
-               mddev->sb_flags         = 0;
+               mddev->max_disks = MD_SB_DISKS;
+               mddev->flags = 0;
+               mddev->sb_flags = 0;
         }
         set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
  
@@ -6533,8 +6524,12 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
                         return -ENOSPC;
         }
         rv = mddev->pers->resize(mddev, num_sectors);
-       if (!rv)
-               revalidate_disk(mddev->gendisk);
+       if (!rv) {
+               if (mddev->queue) {
+                       set_capacity(mddev->gendisk, mddev->array_sectors);
+                       revalidate_disk(mddev->gendisk);
+               }
+       }
         return rv;
  }
  
diff --git a/drivers/md/md.h b/drivers/md/md.h

index b8859cbf84b618b39ed3d92a2887e8764c403919..1e76d64ce1803be229477fd83887514980390dee 100644 (file)
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev);
  extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
                                    struct mddev *mddev);
  
-extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
  extern void md_reload_sb(struct mddev *mddev, int raid_disk);
  extern void md_update_sb(struct mddev *mddev, int force);
  extern void md_kick_rdev_from_array(struct md_rdev * rdev);
  struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
-static inline int mddev_check_plugged(struct mddev *mddev)
-{
-       return !!blk_check_plugged(md_unplug, mddev,
-                                  sizeof(struct blk_plug_cb));
-}
  
  static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
  {
@@ -715,4 +709,11 @@ static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
             !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
                 mddev->queue->limits.max_write_same_sectors = 0;
  }
+
+static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
+{
+       if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+           !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors)
+               mddev->queue->limits.max_write_zeroes_sectors = 0;
+}
  #endif /* _MD_MD_H */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c

index 79a12b59250bbca870be857eb7cf350c0c9b53ad..e95d521d93e9b912caa561121593b2b7e54dd3a6 100644 (file)
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -139,6 +139,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
         mp_bh->bio.bi_end_io = multipath_end_request;
         mp_bh->bio.bi_private = mp_bh;
         mddev_check_writesame(mddev, &mp_bh->bio);
+       mddev_check_write_zeroes(mddev, &mp_bh->bio);
         generic_make_request(&mp_bh->bio);
         return;
  }
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c

index 93347ca7c7a617e097ccafcbedbecdfa396d4968..ce7a6a56cf7385284bb702ab888e4f3ce2cf4821 100644 (file)
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -383,6 +383,7 @@ static int raid0_run(struct mddev *mddev)
  
                 blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
                 blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
+               blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
                 blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
  
                 blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
@@ -504,6 +505,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
                                                       split, disk_devt(mddev->gendisk),
                                                       bio_sector);
                         mddev_check_writesame(mddev, split);
+                       mddev_check_write_zeroes(mddev, split);
                         generic_make_request(split);
                 }
         } while (split != bio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index fbc2d7851b497fec0cacd45832bbd9c9d258eaae..b59cc100320af206e5ef9fddec7ebb6ca7d054d7 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf)
  static void freeze_array(struct r1conf *conf, int extra)
  {
         /* Stop sync I/O and normal I/O and wait for everything to
-        * go quite.
+        * go quiet.
          * This is called in two situations:
          * 1) management command handlers (reshape, remove disk, quiesce).
          * 2) one normal I/O request failed.
@@ -1587,9 +1587,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
                         split = bio;
                 }
  
-               if (bio_data_dir(split) == READ)
+               if (bio_data_dir(split) == READ) {
                         raid1_read_request(mddev, split);
-               else
+
+                       /*
+                        * If a bio is splitted, the first part of bio will
+                        * pass barrier but the bio is queued in
+                        * current->bio_list (see generic_make_request). If
+                        * there is a raise_barrier() called here, the second
+                        * part of bio can't pass barrier. But since the first
+                        * part bio isn't dispatched to underlaying disks yet,
+                        * the barrier is never released, hence raise_barrier
+                        * will alays wait. We have a deadlock.
+                        * Note, this only happens in read path. For write
+                        * path, the first part of bio is dispatched in a
+                        * schedule() call (because of blk plug) or offloaded
+                        * to raid10d.
+                        * Quitting from the function immediately can change
+                        * the bio order queued in bio_list and avoid the deadlock.
+                        */
+                       if (split != bio) {
+                               generic_make_request(bio);
+                               break;
+                       }
+               } else
                         raid1_write_request(mddev, split);
         } while (split != bio);
  }
@@ -3156,8 +3177,10 @@ static int raid1_run(struct mddev *mddev)
         if (IS_ERR(conf))
                 return PTR_ERR(conf);
  
-       if (mddev->queue)
+       if (mddev->queue) {
                 blk_queue_max_write_same_sectors(mddev->queue, 0);
+               blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
+       }
  
         rdev_for_each(rdev, mddev) {
                 if (!mddev->gendisk)
@@ -3246,8 +3269,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
                         return ret;
         }
         md_set_array_sectors(mddev, newsize);
-       set_capacity(mddev->gendisk, mddev->array_sectors);
-       revalidate_disk(mddev->gendisk);
         if (sectors > mddev->dev_sectors &&
             mddev->recovery_cp > mddev->dev_sectors) {
                 mddev->recovery_cp = mddev->dev_sectors;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index 063c43d83b72c2f0f753edb7b08f8dd608fa15ad..28ec3a93aceeac962245fe2f9934296a284b5bac 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -974,7 +974,8 @@ static void wait_barrier(struct r10conf *conf)
                                     !conf->barrier ||
                                     (atomic_read(&conf->nr_pending) &&
                                      current->bio_list &&
-                                    !bio_list_empty(current->bio_list)),
+                                    (!bio_list_empty(&current->bio_list[0]) ||
+                                     !bio_list_empty(&current->bio_list[1]))),
                                     conf->resync_lock);
                 conf->nr_waiting--;
                 if (!conf->nr_waiting)
@@ -1477,11 +1478,24 @@ retry_write:
                         mbio->bi_bdev = (void*)rdev;
  
                         atomic_inc(&r10_bio->remaining);
+
+                       cb = blk_check_plugged(raid10_unplug, mddev,
+                                              sizeof(*plug));
+                       if (cb)
+                               plug = container_of(cb, struct raid10_plug_cb,
+                                                   cb);
+                       else
+                               plug = NULL;
                         spin_lock_irqsave(&conf->device_lock, flags);
-                       bio_list_add(&conf->pending_bio_list, mbio);
-                       conf->pending_count++;
+                       if (plug) {
+                               bio_list_add(&plug->pending, mbio);
+                               plug->pending_cnt++;
+                       } else {
+                               bio_list_add(&conf->pending_bio_list, mbio);
+                               conf->pending_count++;
+                       }
                         spin_unlock_irqrestore(&conf->device_lock, flags);
-                       if (!mddev_check_plugged(mddev))
+                       if (!plug)
                                 md_wakeup_thread(mddev->thread);
                 }
         }
@@ -1571,7 +1585,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
                         split = bio;
                 }
  
+               /*
+                * If a bio is splitted, the first part of bio will pass
+                * barrier but the bio is queued in current->bio_list (see
+                * generic_make_request). If there is a raise_barrier() called
+                * here, the second part of bio can't pass barrier. But since
+                * the first part bio isn't dispatched to underlaying disks
+                * yet, the barrier is never released, hence raise_barrier will
+                * alays wait. We have a deadlock.
+                * Note, this only happens in read path. For write path, the
+                * first part of bio is dispatched in a schedule() call
+                * (because of blk plug) or offloaded to raid10d.
+                * Quitting from the function immediately can change the bio
+                * order queued in bio_list and avoid the deadlock.
+                */
                 __make_request(mddev, split);
+               if (split != bio && bio_data_dir(bio) == READ) {
+                       generic_make_request(bio);
+                       break;
+               }
         } while (split != bio);
  
         /* In case raid10d snuck in to freeze_array */
@@ -3717,6 +3749,7 @@ static int raid10_run(struct mddev *mddev)
                 blk_queue_max_discard_sectors(mddev->queue,
                                               mddev->chunk_sectors);
                 blk_queue_max_write_same_sectors(mddev->queue, 0);
+               blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
                 blk_queue_io_min(mddev->queue, chunk_size);
                 if (conf->geo.raid_disks % conf->geo.near_copies)
                         blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
@@ -3943,10 +3976,6 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
                         return ret;
         }
         md_set_array_sectors(mddev, size);
-       if (mddev->queue) {
-               set_capacity(mddev->gendisk, mddev->array_sectors);
-               revalidate_disk(mddev->gendisk);
-       }
         if (sectors > mddev->dev_sectors &&
             mddev->recovery_cp > oldsize) {
                 mddev->recovery_cp = oldsize;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 4fb09b3fcb410468a9b1939b93d9529e70dd592d..2efdb0d6746074a0416f18b0c0dfbab7cc5be6a8 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs,
                      (test_bit(R5_Wantdrain, &dev->flags) ||
                       test_bit(R5_InJournal, &dev->flags))) ||
                     (srctype == SYNDROME_SRC_WRITTEN &&
-                    dev->written)) {
+                    (dev->written ||
+                     test_bit(R5_InJournal, &dev->flags)))) {
                         if (test_bit(R5_InJournal, &dev->flags))
                                 srcs[slot] = sh->dev[i].orig_page;
                         else
@@ -5030,8 +5031,6 @@ static void raid5_align_endio(struct bio *bi)
         rdev_dec_pending(rdev, conf->mddev);
  
         if (!error) {
-               trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
-                                        raid_bi, 0);
                 bio_endio(raid_bi);
                 if (atomic_dec_and_test(&conf->active_aligned_reads))
                         wake_up(&conf->wait_for_quiescent);
@@ -7228,7 +7227,6 @@ static int raid5_run(struct mddev *mddev)
  
         if (mddev->queue) {
                 int chunk_size;
-               bool discard_supported = true;
                 /* read-ahead size must cover two whole stripes, which
                  * is 2 * (datadisks) * chunksize where 'n' is the
                  * number of raid devices
@@ -7264,48 +7262,32 @@ static int raid5_run(struct mddev *mddev)
                 blk_queue_max_discard_sectors(mddev->queue,
                                               0xfffe * STRIPE_SECTORS);
  
-               /*
-                * unaligned part of discard request will be ignored, so can't
-                * guarantee discard_zeroes_data
-                */
-               mddev->queue->limits.discard_zeroes_data = 0;
-
                 blk_queue_max_write_same_sectors(mddev->queue, 0);
+               blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
  
                 rdev_for_each(rdev, mddev) {
                         disk_stack_limits(mddev->gendisk, rdev->bdev,
                                           rdev->data_offset << 9);
                         disk_stack_limits(mddev->gendisk, rdev->bdev,
                                           rdev->new_data_offset << 9);
-                       /*
-                        * discard_zeroes_data is required, otherwise data
-                        * could be lost. Consider a scenario: discard a stripe
-                        * (the stripe could be inconsistent if
-                        * discard_zeroes_data is 0); write one disk of the
-                        * stripe (the stripe could be inconsistent again
-                        * depending on which disks are used to calculate
-                        * parity); the disk is broken; The stripe data of this
-                        * disk is lost.
-                        */
-                       if (!blk_queue_discard(bdev_get_queue(rdev->bdev)) ||
-                           !bdev_get_queue(rdev->bdev)->
-                                               limits.discard_zeroes_data)
-                               discard_supported = false;
-                       /* Unfortunately, discard_zeroes_data is not currently
-                        * a guarantee - just a hint.  So we only allow DISCARD
-                        * if the sysadmin has confirmed that only safe devices
-                        * are in use by setting a module parameter.
-                        */
-                       if (!devices_handle_discard_safely) {
-                               if (discard_supported) {
-                                       pr_info("md/raid456: discard support disabled due to uncertainty.\n");
-                                       pr_info("Set raid456.devices_handle_discard_safely=Y to override.\n");
-                               }
-                               discard_supported = false;
-                       }
                 }
  
-               if (discard_supported &&
+               /*
+                * zeroing is required, otherwise data
+                * could be lost. Consider a scenario: discard a stripe
+                * (the stripe could be inconsistent if
+                * discard_zeroes_data is 0); write one disk of the
+                * stripe (the stripe could be inconsistent again
+                * depending on which disks are used to calculate
+                * parity); the disk is broken; The stripe data of this
+                * disk is lost.
+                *
+                * We only allow DISCARD if the sysadmin has confirmed that
+                * only safe devices are in use by setting a module parameter.
+                * A better idea might be to turn DISCARD into WRITE_ZEROES
+                * requests, as that is required to be safe.
+                */
+               if (devices_handle_discard_safely &&
                     mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
                     mddev->queue->limits.discard_granularity >= stripe)
                         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
@@ -7605,8 +7587,6 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
                         return ret;
         }
         md_set_array_sectors(mddev, newsize);
-       set_capacity(mddev->gendisk, mddev->array_sectors);
-       revalidate_disk(mddev->gendisk);
         if (sectors > mddev->dev_sectors &&
             mddev->recovery_cp > mddev->dev_sectors) {
                 mddev->recovery_cp = mddev->dev_sectors;
diff --git a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h

index 7a681d8202c7ee9e9eed6dd2dbb4bf118d8f9508..4442e478db72a2420207efc2deca49d56c92c30c 100644 (file)
--- a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h
+++ b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h
@@ -256,8 +256,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
  *
  * The actual DAP implementation may be restricted to only one of the modes.
  * A compiler warning or error will be generated if the DAP implementation
-* overides or cannot handle the mode defined below.
-*
+* overrides or cannot handle the mode defined below.
  */
  #ifndef DRXDAP_SINGLE_MASTER
  #define DRXDAP_SINGLE_MASTER 1
@@ -272,7 +271,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
  *
  * This maximum size may be restricted by the actual DAP implementation.
  * A compiler warning or error will be generated if the DAP implementation
-* overides or cannot handle the chunksize defined below.
+* overrides or cannot handle the chunksize defined below.
  *
  * Beware that the DAP uses  DRXDAP_MAX_WCHUNKSIZE to create a temporary data
  * buffer. Do not undefine or choose too large, unless your system is able to
@@ -292,8 +291,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
  *
  * This maximum size may be restricted by the actual DAP implementation.
  * A compiler warning or error will be generated if the DAP implementation
-* overides or cannot handle the chunksize defined below.
-*
+* overrides or cannot handle the chunksize defined below.
  */
  #ifndef DRXDAP_MAX_RCHUNKSIZE
  #define  DRXDAP_MAX_RCHUNKSIZE 60
diff --git a/drivers/media/platform/coda/imx-vdoa.c b/drivers/media/platform/coda/imx-vdoa.c

index 67fd8ffa60a418f6538d6b358c3e3d91f4f385b6..669a4c82f1ffa4c79b5e47173536faedecf52c34 100644 (file)
--- a/drivers/media/platform/coda/imx-vdoa.c
+++ b/drivers/media/platform/coda/imx-vdoa.c
@@ -321,7 +321,7 @@ static const struct of_device_id vdoa_dt_ids[] = {
  };
  MODULE_DEVICE_TABLE(of, vdoa_dt_ids);
  
-static const struct platform_driver vdoa_driver = {
+static struct platform_driver vdoa_driver = {
         .probe          = vdoa_probe,
         .remove         = vdoa_remove,
         .driver         = {
diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c

index cbb03768f5d73574b4d95939a08d7605e94dd6d7..0f0c389f889713ad024eda400ea1f70799710dae 100644 (file)
--- a/drivers/media/platform/exynos-gsc/gsc-core.c
+++ b/drivers/media/platform/exynos-gsc/gsc-core.c
@@ -861,9 +861,7 @@ int gsc_prepare_addr(struct gsc_ctx *ctx, struct vb2_buffer *vb,
  
         if ((frame->fmt->pixelformat == V4L2_PIX_FMT_VYUY) ||
                 (frame->fmt->pixelformat == V4L2_PIX_FMT_YVYU) ||
-               (frame->fmt->pixelformat == V4L2_PIX_FMT_NV61) ||
                 (frame->fmt->pixelformat == V4L2_PIX_FMT_YVU420) ||
-               (frame->fmt->pixelformat == V4L2_PIX_FMT_NV21) ||
                 (frame->fmt->pixelformat == V4L2_PIX_FMT_YVU420M))
                 swap(addr->cb, addr->cr);
  
diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c

index 823608112d89c14272c9ca2f5f8ebf46ffd30383..7918b928f0589b59b52c17c8470c3e3159d33c1e 100644 (file)
--- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
+++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
@@ -632,8 +632,8 @@ static int bdisp_open(struct file *file)
  
  error_ctrls:
         bdisp_ctrls_delete(ctx);
-error_fh:
         v4l2_fh_del(&ctx->fh);
+error_fh:
         v4l2_fh_exit(&ctx->fh);
         bdisp_hw_free_nodes(ctx);
  mem_ctx:
diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c

index b4b583f7137a54eb86f8592724603b296afb9347..b4c0f10fc3b0f12eb9f114ac063ac5b0a85ecb1a 100644 (file)
--- a/drivers/media/platform/vsp1/vsp1_drm.c
+++ b/drivers/media/platform/vsp1/vsp1_drm.c
@@ -54,12 +54,11 @@ EXPORT_SYMBOL_GPL(vsp1_du_init);
  /**
   * vsp1_du_setup_lif - Setup the output part of the VSP pipeline
   * @dev: the VSP device
- * @width: output frame width in pixels
- * @height: output frame height in pixels
+ * @cfg: the LIF configuration
   *
- * Configure the output part of VSP DRM pipeline for the given frame @width and
- * @height. This sets up formats on the BRU source pad, the WPF0 sink and source
- * pads, and the LIF sink pad.
+ * Configure the output part of VSP DRM pipeline for the given frame @cfg.width
+ * and @cfg.height. This sets up formats on the BRU source pad, the WPF0 sink
+ * and source pads, and the LIF sink pad.
   *
   * As the media bus code on the BRU source pad is conditioned by the
   * configuration of the BRU sink 0 pad, we also set up the formats on all BRU
@@ -69,8 +68,7 @@ EXPORT_SYMBOL_GPL(vsp1_du_init);
   *
   * Return 0 on success or a negative error code on failure.
   */
-int vsp1_du_setup_lif(struct device *dev, unsigned int width,
-                     unsigned int height)
+int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg)
  {
         struct vsp1_device *vsp1 = dev_get_drvdata(dev);
         struct vsp1_pipeline *pipe = &vsp1->drm->pipe;
@@ -79,11 +77,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
         unsigned int i;
         int ret;
  
-       dev_dbg(vsp1->dev, "%s: configuring LIF with format %ux%u\n",
-               __func__, width, height);
-
-       if (width == 0 || height == 0) {
-               /* Zero width or height means the CRTC is being disabled, stop
+       if (!cfg) {
+               /* NULL configuration means the CRTC is being disabled, stop
                  * the pipeline and turn the light off.
                  */
                 ret = vsp1_pipeline_stop(pipe);
@@ -108,6 +103,9 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
                 return 0;
         }
  
+       dev_dbg(vsp1->dev, "%s: configuring LIF with format %ux%u\n",
+               __func__, cfg->width, cfg->height);
+
         /* Configure the format at the BRU sinks and propagate it through the
          * pipeline.
          */
@@ -117,8 +115,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
         for (i = 0; i < bru->entity.source_pad; ++i) {
                 format.pad = i;
  
-               format.format.width = width;
-               format.format.height = height;
+               format.format.width = cfg->width;
+               format.format.height = cfg->height;
                 format.format.code = MEDIA_BUS_FMT_ARGB8888_1X32;
                 format.format.field = V4L2_FIELD_NONE;
  
@@ -133,8 +131,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
         }
  
         format.pad = bru->entity.source_pad;
-       format.format.width = width;
-       format.format.height = height;
+       format.format.width = cfg->width;
+       format.format.height = cfg->height;
         format.format.code = MEDIA_BUS_FMT_ARGB8888_1X32;
         format.format.field = V4L2_FIELD_NONE;
  
@@ -180,7 +178,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
         /* Verify that the format at the output of the pipeline matches the
          * requested frame size and media bus code.
          */
-       if (format.format.width != width || format.format.height != height ||
+       if (format.format.width != cfg->width ||
+           format.format.height != cfg->height ||
             format.format.code != MEDIA_BUS_FMT_ARGB8888_1X32) {
                 dev_dbg(vsp1->dev, "%s: format mismatch\n", __func__);
                 return -EPIPE;
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c

index 393dccaabdd02ac83744faf049ef1a08675fe7ae..1688893a65bb57d2d2ff0d667f82d27fbd88dc37 100644 (file)
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -436,6 +436,8 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file)
                 return -ERESTARTSYS;
  
         ir = irctls[iminor(inode)];
+       mutex_unlock(&lirc_dev_lock);
+
         if (!ir) {
                 retval = -ENODEV;
                 goto error;
@@ -476,8 +478,6 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file)
         }
  
  error:
-       mutex_unlock(&lirc_dev_lock);
-
         nonseekable_open(inode, file);
  
         return retval;
diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c

index b109f8246b968d99cacde9b6ee73719f554a4bfd..ec4b25bd2ec29912f062ae1b654a5ac05434b6f7 100644 (file)
--- a/drivers/media/rc/nuvoton-cir.c
+++ b/drivers/media/rc/nuvoton-cir.c
@@ -176,12 +176,13 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev,
  {
         u8 tolerance, config;
         struct nvt_dev *nvt = dev->priv;
+       unsigned long flags;
         int i;
  
         /* hardcode the tolerance to 10% */
         tolerance = DIV_ROUND_UP(count, 10);
  
-       spin_lock(&nvt->lock);
+       spin_lock_irqsave(&nvt->lock, flags);
  
         nvt_clear_cir_wake_fifo(nvt);
         nvt_cir_wake_reg_write(nvt, count, CIR_WAKE_FIFO_CMP_DEEP);
@@ -203,7 +204,7 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev,
  
         nvt_cir_wake_reg_write(nvt, config, CIR_WAKE_IRCON);
  
-       spin_unlock(&nvt->lock);
+       spin_unlock_irqrestore(&nvt->lock, flags);
  }
  
  static ssize_t wakeup_data_show(struct device *dev,
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c

index 2424946740e64fb602f55a30d5f158a212cc88ce..d84533699668d20e1797bc7feef1693f74e87be5 100644 (file)
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -1663,6 +1663,7 @@ static int rc_setup_rx_device(struct rc_dev *dev)
  {
         int rc;
         struct rc_map *rc_map;
+       u64 rc_type;
  
         if (!dev->map_name)
                 return -EINVAL;
@@ -1677,15 +1678,18 @@ static int rc_setup_rx_device(struct rc_dev *dev)
         if (rc)
                 return rc;
  
-       if (dev->change_protocol) {
-               u64 rc_type = (1ll << rc_map->rc_type);
+       rc_type = BIT_ULL(rc_map->rc_type);
  
+       if (dev->change_protocol) {
                 rc = dev->change_protocol(dev, &rc_type);
                 if (rc < 0)
                         goto out_table;
                 dev->enabled_protocols = rc_type;
         }
  
+       if (dev->driver_type == RC_DRIVER_IR_RAW)
+               ir_raw_load_modules(&rc_type);
+
         set_bit(EV_KEY, dev->input_dev->evbit);
         set_bit(EV_REP, dev->input_dev->evbit);
         set_bit(EV_MSC, dev->input_dev->evbit);
@@ -1777,12 +1781,6 @@ int rc_register_device(struct rc_dev *dev)
                 dev->input_name ?: "Unspecified device", path ?: "N/A");
         kfree(path);
  
-       if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
-               rc = rc_setup_rx_device(dev);
-               if (rc)
-                       goto out_dev;
-       }
-
         if (dev->driver_type == RC_DRIVER_IR_RAW ||
             dev->driver_type == RC_DRIVER_IR_RAW_TX) {
                 if (!raw_init) {
@@ -1791,7 +1789,13 @@ int rc_register_device(struct rc_dev *dev)
                 }
                 rc = ir_raw_event_register(dev);
                 if (rc < 0)
-                       goto out_rx;
+                       goto out_dev;
+       }
+
+       if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
+               rc = rc_setup_rx_device(dev);
+               if (rc)
+                       goto out_raw;
         }
  
         /* Allow the RC sysfs nodes to be accessible */
@@ -1803,8 +1807,8 @@ int rc_register_device(struct rc_dev *dev)
  
         return 0;
  
-out_rx:
-       rc_free_rx_device(dev);
+out_raw:
+       ir_raw_event_unregister(dev);
  out_dev:
         device_del(&dev->dev);
  out_unlock:
diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c

index 923fb2299553cb96c0db87368a322ea875da4652..41b54e40176c2393b846a1fb59f6e2cacf187c74 100644 (file)
--- a/drivers/media/rc/serial_ir.c
+++ b/drivers/media/rc/serial_ir.c
@@ -487,10 +487,69 @@ static void serial_ir_timeout(unsigned long arg)
         ir_raw_event_handle(serial_ir.rcdev);
  }
  
+/* Needed by serial_ir_probe() */
+static int serial_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
+                       unsigned int count);
+static int serial_ir_tx_duty_cycle(struct rc_dev *dev, u32 cycle);
+static int serial_ir_tx_carrier(struct rc_dev *dev, u32 carrier);
+static int serial_ir_open(struct rc_dev *rcdev);
+static void serial_ir_close(struct rc_dev *rcdev);
+
  static int serial_ir_probe(struct platform_device *dev)
  {
+       struct rc_dev *rcdev;
         int i, nlow, nhigh, result;
  
+       rcdev = devm_rc_allocate_device(&dev->dev, RC_DRIVER_IR_RAW);
+       if (!rcdev)
+               return -ENOMEM;
+
+       if (hardware[type].send_pulse && hardware[type].send_space)
+               rcdev->tx_ir = serial_ir_tx;
+       if (hardware[type].set_send_carrier)
+               rcdev->s_tx_carrier = serial_ir_tx_carrier;
+       if (hardware[type].set_duty_cycle)
+               rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle;
+
+       switch (type) {
+       case IR_HOMEBREW:
+               rcdev->input_name = "Serial IR type home-brew";
+               break;
+       case IR_IRDEO:
+               rcdev->input_name = "Serial IR type IRdeo";
+               break;
+       case IR_IRDEO_REMOTE:
+               rcdev->input_name = "Serial IR type IRdeo remote";
+               break;
+       case IR_ANIMAX:
+               rcdev->input_name = "Serial IR type AnimaX";
+               break;
+       case IR_IGOR:
+               rcdev->input_name = "Serial IR type IgorPlug";
+               break;
+       }
+
+       rcdev->input_phys = KBUILD_MODNAME "/input0";
+       rcdev->input_id.bustype = BUS_HOST;
+       rcdev->input_id.vendor = 0x0001;
+       rcdev->input_id.product = 0x0001;
+       rcdev->input_id.version = 0x0100;
+       rcdev->open = serial_ir_open;
+       rcdev->close = serial_ir_close;
+       rcdev->dev.parent = &serial_ir.pdev->dev;
+       rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER;
+       rcdev->driver_name = KBUILD_MODNAME;
+       rcdev->map_name = RC_MAP_RC6_MCE;
+       rcdev->min_timeout = 1;
+       rcdev->timeout = IR_DEFAULT_TIMEOUT;
+       rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT;
+       rcdev->rx_resolution = 250000;
+
+       serial_ir.rcdev = rcdev;
+
+       setup_timer(&serial_ir.timeout_timer, serial_ir_timeout,
+                   (unsigned long)&serial_ir);
+
         result = devm_request_irq(&dev->dev, irq, serial_ir_irq_handler,
                                   share_irq ? IRQF_SHARED : 0,
                                   KBUILD_MODNAME, &hardware);
@@ -516,9 +575,6 @@ static int serial_ir_probe(struct platform_device *dev)
                 return -EBUSY;
         }
  
-       setup_timer(&serial_ir.timeout_timer, serial_ir_timeout,
-                   (unsigned long)&serial_ir);
-
         result = hardware_init_port();
         if (result < 0)
                 return result;
@@ -552,7 +608,8 @@ static int serial_ir_probe(struct platform_device *dev)
                          sense ? "low" : "high");
  
         dev_dbg(&dev->dev, "Interrupt %d, port %04x obtained\n", irq, io);
-       return 0;
+
+       return devm_rc_register_device(&dev->dev, rcdev);
  }
  
  static int serial_ir_open(struct rc_dev *rcdev)
@@ -723,7 +780,6 @@ static void serial_ir_exit(void)
  
  static int __init serial_ir_init_module(void)
  {
-       struct rc_dev *rcdev;
         int result;
  
         switch (type) {
@@ -754,63 +810,9 @@ static int __init serial_ir_init_module(void)
                 sense = !!sense;
  
         result = serial_ir_init();
-       if (result)
-               return result;
-
-       rcdev = devm_rc_allocate_device(&serial_ir.pdev->dev, RC_DRIVER_IR_RAW);
-       if (!rcdev) {
-               result = -ENOMEM;
-               goto serial_cleanup;
-       }
-
-       if (hardware[type].send_pulse && hardware[type].send_space)
-               rcdev->tx_ir = serial_ir_tx;
-       if (hardware[type].set_send_carrier)
-               rcdev->s_tx_carrier = serial_ir_tx_carrier;
-       if (hardware[type].set_duty_cycle)
-               rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle;
-
-       switch (type) {
-       case IR_HOMEBREW:
-               rcdev->input_name = "Serial IR type home-brew";
-               break;
-       case IR_IRDEO:
-               rcdev->input_name = "Serial IR type IRdeo";
-               break;
-       case IR_IRDEO_REMOTE:
-               rcdev->input_name = "Serial IR type IRdeo remote";
-               break;
-       case IR_ANIMAX:
-               rcdev->input_name = "Serial IR type AnimaX";
-               break;
-       case IR_IGOR:
-               rcdev->input_name = "Serial IR type IgorPlug";
-               break;
-       }
-
-       rcdev->input_phys = KBUILD_MODNAME "/input0";
-       rcdev->input_id.bustype = BUS_HOST;
-       rcdev->input_id.vendor = 0x0001;
-       rcdev->input_id.product = 0x0001;
-       rcdev->input_id.version = 0x0100;
-       rcdev->open = serial_ir_open;
-       rcdev->close = serial_ir_close;
-       rcdev->dev.parent = &serial_ir.pdev->dev;
-       rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER;
-       rcdev->driver_name = KBUILD_MODNAME;
-       rcdev->map_name = RC_MAP_RC6_MCE;
-       rcdev->min_timeout = 1;
-       rcdev->timeout = IR_DEFAULT_TIMEOUT;
-       rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT;
-       rcdev->rx_resolution = 250000;
-
-       serial_ir.rcdev = rcdev;
-
-       result = rc_register_device(rcdev);
-
         if (!result)
                 return 0;
-serial_cleanup:
+
         serial_ir_exit();
         return result;
  }
@@ -818,7 +820,6 @@ serial_cleanup:
  static void __exit serial_ir_exit_module(void)
  {
         del_timer_sync(&serial_ir.timeout_timer);
-       rc_unregister_device(serial_ir.rcdev);
         serial_ir_exit();
  }
  
diff --git a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c

index ab9866024ec7983d597efd157476820222ad8134..04033efe7ad5394d4fd9493ce6b790de1404666b 100644 (file)
--- a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c
+++ b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c
@@ -36,16 +36,18 @@ static int usb_cypress_writemem(struct usb_device *udev,u16 addr,u8 *data, u8 le
  int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw, int type)
  {
         struct hexline *hx;
-       u8 reset;
-       int ret,pos=0;
+       u8 *buf;
+       int ret, pos = 0;
+       u16 cpu_cs_register = cypress[type].cpu_cs_register;
  
-       hx = kmalloc(sizeof(*hx), GFP_KERNEL);
-       if (!hx)
+       buf = kmalloc(sizeof(*hx), GFP_KERNEL);
+       if (!buf)
                 return -ENOMEM;
+       hx = (struct hexline *)buf;
  
         /* stop the CPU */
-       reset = 1;
-       if ((ret = usb_cypress_writemem(udev,cypress[type].cpu_cs_register,&reset,1)) != 1)
+       buf[0] = 1;
+       if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1)
                 err("could not stop the USB controller CPU.");
  
         while ((ret = dvb_usb_get_hexline(fw, hx, &pos)) > 0) {
@@ -61,21 +63,21 @@ int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw
         }
         if (ret < 0) {
                 err("firmware download failed at %d with %d",pos,ret);
-               kfree(hx);
+               kfree(buf);
                 return ret;
         }
  
         if (ret == 0) {
                 /* restart the CPU */
-               reset = 0;
-               if (ret || usb_cypress_writemem(udev,cypress[type].cpu_cs_register,&reset,1) != 1) {
+               buf[0] = 0;
+               if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1) {
                         err("could not restart the USB controller CPU.");
                         ret = -EINVAL;
                 }
         } else
                 ret = -EIO;
  
-       kfree(hx);
+       kfree(buf);
  
         return ret;
  }
diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c

index 6ca502d834b4f2cfcc0e6c6a3699bdaaea04d293..4f42d57f81d9541d25f02af65086f6465af90728 100644 (file)
--- a/drivers/media/usb/dvb-usb/dw2102.c
+++ b/drivers/media/usb/dvb-usb/dw2102.c
@@ -68,6 +68,7 @@
  struct dw2102_state {
         u8 initialized;
         u8 last_lock;
+       u8 data[MAX_XFER_SIZE + 4];
         struct i2c_client *i2c_client_demod;
         struct i2c_client *i2c_client_tuner;
  
@@ -661,62 +662,72 @@ static int su3000_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg msg[],
                                                                 int num)
  {
         struct dvb_usb_device *d = i2c_get_adapdata(adap);
-       u8 obuf[0x40], ibuf[0x40];
+       struct dw2102_state *state;
  
         if (!d)
                 return -ENODEV;
+
+       state = d->priv;
+
         if (mutex_lock_interruptible(&d->i2c_mutex) < 0)
                 return -EAGAIN;
+       if (mutex_lock_interruptible(&d->data_mutex) < 0) {
+               mutex_unlock(&d->i2c_mutex);
+               return -EAGAIN;
+       }
  
         switch (num) {
         case 1:
                 switch (msg[0].addr) {
                 case SU3000_STREAM_CTRL:
-                       obuf[0] = msg[0].buf[0] + 0x36;
-                       obuf[1] = 3;
-                       obuf[2] = 0;
-                       if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 0, 0) < 0)
+                       state->data[0] = msg[0].buf[0] + 0x36;
+                       state->data[1] = 3;
+                       state->data[2] = 0;
+                       if (dvb_usb_generic_rw(d, state->data, 3,
+                                       state->data, 0, 0) < 0)
                                 err("i2c transfer failed.");
                         break;
                 case DW2102_RC_QUERY:
-                       obuf[0] = 0x10;
-                       if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 2, 0) < 0)
+                       state->data[0] = 0x10;
+                       if (dvb_usb_generic_rw(d, state->data, 1,
+                                       state->data, 2, 0) < 0)
                                 err("i2c transfer failed.");
-                       msg[0].buf[1] = ibuf[0];
-                       msg[0].buf[0] = ibuf[1];
+                       msg[0].buf[1] = state->data[0];
+                       msg[0].buf[0] = state->data[1];
                         break;
                 default:
                         /* always i2c write*/
-                       obuf[0] = 0x08;
-                       obuf[1] = msg[0].addr;
-                       obuf[2] = msg[0].len;
+                       state->data[0] = 0x08;
+                       state->data[1] = msg[0].addr;
+                       state->data[2] = msg[0].len;
  
-                       memcpy(&obuf[3], msg[0].buf, msg[0].len);
+                       memcpy(&state->data[3], msg[0].buf, msg[0].len);
  
-                       if (dvb_usb_generic_rw(d, obuf, msg[0].len + 3,
-                                               ibuf, 1, 0) < 0)
+                       if (dvb_usb_generic_rw(d, state->data, msg[0].len + 3,
+                                               state->data, 1, 0) < 0)
                                 err("i2c transfer failed.");
  
                 }
                 break;
         case 2:
                 /* always i2c read */
-               obuf[0] = 0x09;
-               obuf[1] = msg[0].len;
-               obuf[2] = msg[1].len;
-               obuf[3] = msg[0].addr;
-               memcpy(&obuf[4], msg[0].buf, msg[0].len);
-
-               if (dvb_usb_generic_rw(d, obuf, msg[0].len + 4,
-                                       ibuf, msg[1].len + 1, 0) < 0)
+               state->data[0] = 0x09;
+               state->data[1] = msg[0].len;
+               state->data[2] = msg[1].len;
+               state->data[3] = msg[0].addr;
+               memcpy(&state->data[4], msg[0].buf, msg[0].len);
+
+               if (dvb_usb_generic_rw(d, state->data, msg[0].len + 4,
+                                       state->data, msg[1].len + 1, 0) < 0)
                         err("i2c transfer failed.");
  
-               memcpy(msg[1].buf, &ibuf[1], msg[1].len);
+               memcpy(msg[1].buf, &state->data[1], msg[1].len);
                 break;
         default:
                 warn("more than 2 i2c messages at a time is not handled yet.");
                 break;
         }
+       mutex_unlock(&d->data_mutex);
         mutex_unlock(&d->i2c_mutex);
         return num;
  }
@@ -844,17 +855,23 @@ static int su3000_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff)
  static int su3000_power_ctrl(struct dvb_usb_device *d, int i)
  {
         struct dw2102_state *state = (struct dw2102_state *)d->priv;
-       u8 obuf[] = {0xde, 0};
+       int ret = 0;
  
         info("%s: %d, initialized %d", __func__, i, state->initialized);
  
         if (i && !state->initialized) {
+               mutex_lock(&d->data_mutex);
+
+               state->data[0] = 0xde;
+               state->data[1] = 0;
+
                 state->initialized = 1;
                 /* reset board */
-               return dvb_usb_generic_rw(d, obuf, 2, NULL, 0, 0);
+               ret = dvb_usb_generic_rw(d, state->data, 2, NULL, 0, 0);
+               mutex_unlock(&d->data_mutex);
         }
  
-       return 0;
+       return ret;
  }
  
  static int su3000_read_mac_address(struct dvb_usb_device *d, u8 mac[6])
@@ -1309,49 +1326,57 @@ static int prof_7500_frontend_attach(struct dvb_usb_adapter *d)
         return 0;
  }
  
-static int su3000_frontend_attach(struct dvb_usb_adapter *d)
+static int su3000_frontend_attach(struct dvb_usb_adapter *adap)
  {
-       u8 obuf[3] = { 0xe, 0x80, 0 };
-       u8 ibuf[] = { 0 };
+       struct dvb_usb_device *d = adap->dev;
+       struct dw2102_state *state = d->priv;
+
+       mutex_lock(&d->data_mutex);
+
+       state->data[0] = 0xe;
+       state->data[1] = 0x80;
+       state->data[2] = 0;
  
-       if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
  
-       obuf[0] = 0xe;
-       obuf[1] = 0x02;
-       obuf[2] = 1;
+       state->data[0] = 0xe;
+       state->data[1] = 0x02;
+       state->data[2] = 1;
  
-       if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
         msleep(300);
  
-       obuf[0] = 0xe;
-       obuf[1] = 0x83;
-       obuf[2] = 0;
+       state->data[0] = 0xe;
+       state->data[1] = 0x83;
+       state->data[2] = 0;
  
-       if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
  
-       obuf[0] = 0xe;
-       obuf[1] = 0x83;
-       obuf[2] = 1;
+       state->data[0] = 0xe;
+       state->data[1] = 0x83;
+       state->data[2] = 1;
  
-       if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
  
-       obuf[0] = 0x51;
+       state->data[0] = 0x51;
  
-       if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
                 err("command 0x51 transfer failed.");
  
-       d->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config,
-                                       &d->dev->i2c_adap);
-       if (d->fe_adap[0].fe == NULL)
+       mutex_unlock(&d->data_mutex);
+
+       adap->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config,
+                                       &d->i2c_adap);
+       if (adap->fe_adap[0].fe == NULL)
                 return -EIO;
  
-       if (dvb_attach(ts2020_attach, d->fe_adap[0].fe,
+       if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe,
                                 &dw2104_ts2020_config,
-                               &d->dev->i2c_adap)) {
+                               &d->i2c_adap)) {
                 info("Attached DS3000/TS2020!");
                 return 0;
         }
@@ -1360,47 +1385,55 @@ static int su3000_frontend_attach(struct dvb_usb_adapter *d)
         return -EIO;
  }
  
-static int t220_frontend_attach(struct dvb_usb_adapter *d)
+static int t220_frontend_attach(struct dvb_usb_adapter *adap)
  {
-       u8 obuf[3] = { 0xe, 0x87, 0 };
-       u8 ibuf[] = { 0 };
+       struct dvb_usb_device *d = adap->dev;
+       struct dw2102_state *state = d->priv;
+
+       mutex_lock(&d->data_mutex);
  
-       if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+       state->data[0] = 0xe;
+       state->data[1] = 0x87;
+       state->data[2] = 0x0;
+
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
  
-       obuf[0] = 0xe;
-       obuf[1] = 0x86;
-       obuf[2] = 1;
+       state->data[0] = 0xe;
+       state->data[1] = 0x86;
+       state->data[2] = 1;
  
-       if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
  
-       obuf[0] = 0xe;
-       obuf[1] = 0x80;
-       obuf[2] = 0;
+       state->data[0] = 0xe;
+       state->data[1] = 0x80;
+       state->data[2] = 0;
  
-       if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
  
         msleep(50);
  
-       obuf[0] = 0xe;
-       obuf[1] = 0x80;
-       obuf[2] = 1;
+       state->data[0] = 0xe;
+       state->data[1] = 0x80;
+       state->data[2] = 1;
  
-       if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
  
-       obuf[0] = 0x51;
+       state->data[0] = 0x51;
  
-       if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
                 err("command 0x51 transfer failed.");
  
-       d->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config,
-                                       &d->dev->i2c_adap, NULL);
-       if (d->fe_adap[0].fe != NULL) {
-               if (dvb_attach(tda18271_attach, d->fe_adap[0].fe, 0x60,
-                                       &d->dev->i2c_adap, &tda18271_config)) {
+       mutex_unlock(&d->data_mutex);
+
+       adap->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config,
+                                       &d->i2c_adap, NULL);
+       if (adap->fe_adap[0].fe != NULL) {
+               if (dvb_attach(tda18271_attach, adap->fe_adap[0].fe, 0x60,
+                                       &d->i2c_adap, &tda18271_config)) {
                         info("Attached TDA18271HD/CXD2820R!");
                         return 0;
                 }
@@ -1410,23 +1443,30 @@ static int t220_frontend_attach(struct dvb_usb_adapter *d)
         return -EIO;
  }
  
-static int m88rs2000_frontend_attach(struct dvb_usb_adapter *d)
+static int m88rs2000_frontend_attach(struct dvb_usb_adapter *adap)
  {
-       u8 obuf[] = { 0x51 };
-       u8 ibuf[] = { 0 };
+       struct dvb_usb_device *d = adap->dev;
+       struct dw2102_state *state = d->priv;
+
+       mutex_lock(&d->data_mutex);
  
-       if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0)
+       state->data[0] = 0x51;
+
+       if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
                 err("command 0x51 transfer failed.");
  
-       d->fe_adap[0].fe = dvb_attach(m88rs2000_attach, &s421_m88rs2000_config,
-                                       &d->dev->i2c_adap);
+       mutex_unlock(&d->data_mutex);
  
-       if (d->fe_adap[0].fe == NULL)
+       adap->fe_adap[0].fe = dvb_attach(m88rs2000_attach,
+                                       &s421_m88rs2000_config,
+                                       &d->i2c_adap);
+
+       if (adap->fe_adap[0].fe == NULL)
                 return -EIO;
  
-       if (dvb_attach(ts2020_attach, d->fe_adap[0].fe,
+       if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe,
                                 &dw2104_ts2020_config,
-                               &d->dev->i2c_adap)) {
+                               &d->i2c_adap)) {
                 info("Attached RS2000/TS2020!");
                 return 0;
         }
@@ -1439,44 +1479,50 @@ static int tt_s2_4600_frontend_attach(struct dvb_usb_adapter *adap)
  {
         struct dvb_usb_device *d = adap->dev;
         struct dw2102_state *state = d->priv;
-       u8 obuf[3] = { 0xe, 0x80, 0 };
-       u8 ibuf[] = { 0 };
         struct i2c_adapter *i2c_adapter;
         struct i2c_client *client;
         struct i2c_board_info board_info;
         struct m88ds3103_platform_data m88ds3103_pdata = {};
         struct ts2020_config ts2020_config = {};
  
-       if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+       mutex_lock(&d->data_mutex);
+
+       state->data[0] = 0xe;
+       state->data[1] = 0x80;
+       state->data[2] = 0x0;
+
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
  
-       obuf[0] = 0xe;
-       obuf[1] = 0x02;
-       obuf[2] = 1;
+       state->data[0] = 0xe;
+       state->data[1] = 0x02;
+       state->data[2] = 1;
  
-       if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
         msleep(300);
  
-       obuf[0] = 0xe;
-       obuf[1] = 0x83;
-       obuf[2] = 0;
+       state->data[0] = 0xe;
+       state->data[1] = 0x83;
+       state->data[2] = 0;
  
-       if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
  
-       obuf[0] = 0xe;
-       obuf[1] = 0x83;
-       obuf[2] = 1;
+       state->data[0] = 0xe;
+       state->data[1] = 0x83;
+       state->data[2] = 1;
  
-       if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
                 err("command 0x0e transfer failed.");
  
-       obuf[0] = 0x51;
+       state->data[0] = 0x51;
  
-       if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 1, 0) < 0)
+       if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
                 err("command 0x51 transfer failed.");
  
+       mutex_unlock(&d->data_mutex);
+
         /* attach demod */
         m88ds3103_pdata.clk = 27000000;
         m88ds3103_pdata.i2c_wr_max = 33;
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c

index 5457c361ad586424050c98958bdefd22a4c17db7..bf0fe0137dfed2c893001abb8cfe8b83861dae08 100644 (file)
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -1947,9 +1947,7 @@ static int gpmc_probe_onenand_child(struct platform_device *pdev,
         if (!of_property_read_u32(child, "dma-channel", &val))
                 gpmc_onenand_data->dma_channel = val;
  
-       gpmc_onenand_init(gpmc_onenand_data);
-
-       return 0;
+       return gpmc_onenand_init(gpmc_onenand_data);
  }
  #else
  static int gpmc_probe_onenand_child(struct platform_device *pdev,
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c

index 91f645992c9416ab07e765a05c006143fb15a17f..b27ea98b781f77747c010f90f20d9da6883a3070 100644 (file)
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1792,15 +1792,14 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
  
         /* If we're permanently dead, give up. */
         if (state == pci_channel_io_perm_failure) {
-               /* Tell the AFU drivers; but we don't care what they
-                * say, we're going away.
-                */
                 for (i = 0; i < adapter->slices; i++) {
                         afu = adapter->afu[i];
-                       /* Only participate in EEH if we are on a virtual PHB */
-                       if (afu->phb == NULL)
-                               return PCI_ERS_RESULT_NONE;
-                       cxl_vphb_error_detected(afu, state);
+                       /*
+                        * Tell the AFU drivers; but we don't care what they
+                        * say, we're going away.
+                        */
+                       if (afu->phb != NULL)
+                               cxl_vphb_error_detected(afu, state);
                 }
                 return PCI_ERS_RESULT_DISCONNECT;
         }
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c

index 3600c9993a9830504d0cc5bcd61c4b8f19376762..29f2daed37e07b1e1359a2d36ff54bd04208233a 100644 (file)
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -112,11 +112,9 @@ struct mkhi_msg {
  
  static int mei_osver(struct mei_cl_device *cldev)
  {
-       int ret;
         const size_t size = sizeof(struct mkhi_msg_hdr) +
                             sizeof(struct mkhi_fwcaps) +
                             sizeof(struct mei_os_ver);
-       size_t length = 8;
         char buf[size];
         struct mkhi_msg *req;
         struct mkhi_fwcaps *fwcaps;
@@ -137,15 +135,7 @@ static int mei_osver(struct mei_cl_device *cldev)
         os_ver = (struct mei_os_ver *)fwcaps->data;
         os_ver->os_type = OSTYPE_LINUX;
  
-       ret = __mei_cl_send(cldev->cl, buf, size, mode);
-       if (ret < 0)
-               return ret;
-
-       ret = __mei_cl_recv(cldev->cl, buf, length, 0);
-       if (ret < 0)
-               return ret;
-
-       return 0;
+       return __mei_cl_send(cldev->cl, buf, size, mode);
  }
  
  static void mei_mkhi_fix(struct mei_cl_device *cldev)
@@ -160,7 +150,7 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev)
                 return;
  
         ret = mei_osver(cldev);
-       if (ret)
+       if (ret < 0)
                 dev_err(&cldev->dev, "OS version command failed %d\n", ret);
  
         mei_cldev_disable(cldev);
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c

index cfb1cdf176fa9001e83894c63c9eb6480516d230..13c55b8f9261861c20710c4ce039c0d605937909 100644 (file)
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -124,8 +124,6 @@ int mei_reset(struct mei_device *dev)
  
         mei_clear_interrupts(dev);
  
-       mei_synchronize_irq(dev);
-
         /* we're already in reset, cancel the init timer
          * if the reset was called due the hbm protocol error
          * we need to call it before hw start
@@ -304,6 +302,9 @@ static void mei_reset_work(struct work_struct *work)
                 container_of(work, struct mei_device,  reset_work);
         int ret;
  
+       mei_clear_interrupts(dev);
+       mei_synchronize_irq(dev);
+
         mutex_lock(&dev->device_lock);
  
         ret = mei_reset(dev);
@@ -328,6 +329,9 @@ void mei_stop(struct mei_device *dev)
  
         mei_cancel_work(dev);
  
+       mei_clear_interrupts(dev);
+       mei_synchronize_irq(dev);
+
         mutex_lock(&dev->device_lock);
  
         dev->dev_state = MEI_DEV_POWER_DOWN;
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c

index 6fb773dbcd0c3233d62136dcf673afb7b80efcea..93be82fc338ad8b2c3e454dad1e1491f20d47c1c 100644 (file)
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -219,15 +219,20 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
         int write, unsigned long *paddr, int *pageshift)
  {
         pgd_t *pgdp;
-       pmd_t *pmdp;
+       p4d_t *p4dp;
         pud_t *pudp;
+       pmd_t *pmdp;
         pte_t pte;
  
         pgdp = pgd_offset(vma->vm_mm, vaddr);
         if (unlikely(pgd_none(*pgdp)))
                 goto err;
  
-       pudp = pud_offset(pgdp, vaddr);
+       p4dp = p4d_offset(pgdp, vaddr);
+       if (unlikely(p4d_none(*p4dp)))
+               goto err;
+
+       pudp = pud_offset(p4dp, vaddr);
         if (unlikely(pud_none(*pudp)))
                 goto err;
  
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c

index 9d659542a335b444914f2ead245f0975ac970a2b..dad5abee656ef550b0ac041e354e2c7ef89164e3 100644 (file)
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -566,10 +566,10 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
          */
         error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS,
                         PCI_IRQ_MSIX);
-       if (error) {
+       if (error < 0) {
                 error = pci_alloc_irq_vectors(pdev, 1, 1,
                                 PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
-               if (error)
+               if (error < 0)
                         goto err_remove_bitmap;
         } else {
                 vmci_dev->exclusive_vectors = true;
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c

index 1621fa08e2069298f6a8438c8babdf11ae4a817c..ff3da960c4736147b2c1348681412953508ac49b 100644 (file)
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1560,11 +1560,8 @@ static bool mmc_blk_rw_cmd_err(struct mmc_blk_data *md, struct mmc_card *card,
                                struct mmc_blk_request *brq, struct request *req,
                                bool old_req_pending)
  {
-       struct mmc_queue_req *mq_rq;
         bool req_pending;
  
-       mq_rq = container_of(brq, struct mmc_queue_req, brq);
-
         /*
          * If this is an SD card and we're writing, we can first
          * mark the known good sectors as ok.
@@ -1701,7 +1698,8 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
                 case MMC_BLK_CMD_ERR:
                         req_pending = mmc_blk_rw_cmd_err(md, card, brq, old_req, req_pending);
                         if (mmc_blk_reset(md, card->host, type)) {
-                               mmc_blk_rw_cmd_abort(card, old_req);
+                               if (req_pending)
+                                       mmc_blk_rw_cmd_abort(card, old_req);
                                 mmc_blk_rw_try_restart(mq, new_req);
                                 return;
                         }
@@ -1817,6 +1815,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
                 mmc_blk_issue_flush(mq, req);
         } else {
                 mmc_blk_issue_rw_rq(mq, req);
+               card->host->context_info.is_waiting_last_req = false;
         }
  
  out:
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c

index 7fd722868875f396e3e4e8147774913ab860b0e2..b502601df228156c60e03e3ddabd64006bd515f7 100644 (file)
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1730,7 +1730,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
                 err = mmc_select_hs400(card);
                 if (err)
                         goto free_card;
-       } else {
+       } else if (!mmc_card_hs400es(card)) {
                 /* Select the desired bus width optionally */
                 err = mmc_select_bus_width(card);
                 if (err > 0 && mmc_card_hs(card)) {
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c

index 493eb10ce58045851fc1064426acc08b842f4811..4c54ad34e17a186db798b23317e0c5e485495a4f 100644 (file)
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -167,8 +167,6 @@ static void mmc_queue_setup_discard(struct request_queue *q,
  
         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
         blk_queue_max_discard_sectors(q, max_discard);
-       if (card->erased_byte == 0 && !mmc_can_discard(card))
-               q->limits.discard_zeroes_data = 1;
         q->limits.discard_granularity = card->pref_erase << 9;
         /* granularity must not be greater than max. discard */
         if (card->pref_erase > max_discard)
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c

index e992a7f8a16fc3019016aa1f2844cfbfb437ad97..2b32b88949ba40dfb3901cf9588f670b7aad8441 100644 (file)
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -267,7 +267,7 @@ static void sdio_release_func(struct device *dev)
         sdio_free_func_cis(func);
  
         kfree(func->info);
-
+       kfree(func->tmpbuf);
         kfree(func);
  }
  
@@ -282,6 +282,16 @@ struct sdio_func *sdio_alloc_func(struct mmc_card *card)
         if (!func)
                 return ERR_PTR(-ENOMEM);
  
+       /*
+        * allocate buffer separately to make sure it's properly aligned for
+        * DMA usage (incl. 64 bit DMA)
+        */
+       func->tmpbuf = kmalloc(4, GFP_KERNEL);
+       if (!func->tmpbuf) {
+               kfree(func);
+               return ERR_PTR(-ENOMEM);
+       }
+
         func->card = card;
  
         device_initialize(&func->dev);
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c

index a9ac0b4573131f48cad46044e018b5de479cf695..8718432751c50c6d5a955b104c250b151430d19c 100644 (file)
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -22,6 +22,7 @@
  #include <linux/ioport.h>
  #include <linux/module.h>
  #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
  #include <linux/seq_file.h>
  #include <linux/slab.h>
  #include <linux/stat.h>
@@ -1621,10 +1622,16 @@ static void dw_mci_init_card(struct mmc_host *mmc, struct mmc_card *card)
  
                 if (card->type == MMC_TYPE_SDIO ||
                     card->type == MMC_TYPE_SD_COMBO) {
-                       set_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+                       if (!test_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags)) {
+                               pm_runtime_get_noresume(mmc->parent);
+                               set_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+                       }
                         clk_en_a = clk_en_a_old & ~clken_low_pwr;
                 } else {
-                       clear_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+                       if (test_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags)) {
+                               pm_runtime_put_noidle(mmc->parent);
+                               clear_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+                       }
                         clk_en_a = clk_en_a_old | clken_low_pwr;
                 }
  
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c

index 8e32580c12b520017eb73af884f1b04607c3a78b..b235d8da0602a84e78ff8a4478016f642c67bf43 100644 (file)
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -580,7 +580,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
                 }
         }
         sdr_set_field(host->base + MSDC_CFG, MSDC_CFG_CKMOD | MSDC_CFG_CKDIV,
-                       (mode << 8) | (div % 0xff));
+                     (mode << 8) | div);
         sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_CKPDN);
         while (!(readl(host->base + MSDC_CFG) & MSDC_CFG_CKSTB))
                 cpu_relax();
@@ -1559,7 +1559,7 @@ static int msdc_drv_probe(struct platform_device *pdev)
         host->src_clk_freq = clk_get_rate(host->src_clk);
         /* Set host parameters to mmc */
         mmc->ops = &mt_msdc_ops;
-       mmc->f_min = host->src_clk_freq / (4 * 255);
+       mmc->f_min = DIV_ROUND_UP(host->src_clk_freq, 4 * 255);
  
         mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23;
         /* MMC core transfer sizes tunable parameters */
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c

index 7123ef96ed18523c88553146035103f3517bd372..445fc47dc3e77e39b17e7531eb7dbed58ddd3279 100644 (file)
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -830,6 +830,7 @@ static int esdhc_change_pinstate(struct sdhci_host *host,
  
         switch (uhs) {
         case MMC_TIMING_UHS_SDR50:
+       case MMC_TIMING_UHS_DDR50:
                 pinctrl = imx_data->pins_100mhz;
                 break;
         case MMC_TIMING_UHS_SDR104:
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c

index 410a55b1c25fe5f2ef32ff8f2d26c4c3286f4b71..1cfd7f90033944c6a6441aaf7256b1ce44350354 100644 (file)
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -28,13 +28,9 @@
  #include "sdhci-pltfm.h"
  #include <linux/of.h>
  
-#define SDHCI_ARASAN_CLK_CTRL_OFFSET   0x2c
  #define SDHCI_ARASAN_VENDOR_REGISTER   0x78
  
  #define VENDOR_ENHANCED_STROBE         BIT(0)
-#define CLK_CTRL_TIMEOUT_SHIFT         16
-#define CLK_CTRL_TIMEOUT_MASK          (0xf << CLK_CTRL_TIMEOUT_SHIFT)
-#define CLK_CTRL_TIMEOUT_MIN_EXP       13
  
  #define PHY_CLK_TOO_SLOW_HZ            400000
  
@@ -163,15 +159,15 @@ static int sdhci_arasan_syscon_write(struct sdhci_host *host,
  
  static unsigned int sdhci_arasan_get_timeout_clock(struct sdhci_host *host)
  {
-       u32 div;
         unsigned long freq;
         struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
  
-       div = readl(host->ioaddr + SDHCI_ARASAN_CLK_CTRL_OFFSET);
-       div = (div & CLK_CTRL_TIMEOUT_MASK) >> CLK_CTRL_TIMEOUT_SHIFT;
+       /* SDHCI timeout clock is in kHz */
+       freq = DIV_ROUND_UP(clk_get_rate(pltfm_host->clk), 1000);
  
-       freq = clk_get_rate(pltfm_host->clk);
-       freq /= 1 << (CLK_CTRL_TIMEOUT_MIN_EXP + div);
+       /* or in MHz */
+       if (host->caps & SDHCI_TIMEOUT_CLK_UNIT)
+               freq = DIV_ROUND_UP(freq, 1000);
  
         return freq;
  }
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c

index 2f9ad213377a2ccb9091693ba749266fa4b420c4..d5430ed02a67896616bd1371a22f51bb59be8ee2 100644 (file)
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -29,6 +29,8 @@
  
  #include "sdhci-pltfm.h"
  
+#define SDMMC_MC1R     0x204
+#define                SDMMC_MC1R_DDR          BIT(3)
  #define SDMMC_CACR     0x230
  #define                SDMMC_CACR_CAPWREN      BIT(0)
  #define                SDMMC_CACR_KEY          (0x46 << 8)
@@ -85,11 +87,37 @@ static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock)
         sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
  }
  
+/*
+ * In this specific implementation of the SDHCI controller, the power register
+ * needs to have a valid voltage set even when the power supply is managed by
+ * an external regulator.
+ */
+static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode,
+                    unsigned short vdd)
+{
+       if (!IS_ERR(host->mmc->supply.vmmc)) {
+               struct mmc_host *mmc = host->mmc;
+
+               spin_unlock_irq(&host->lock);
+               mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+               spin_lock_irq(&host->lock);
+       }
+       sdhci_set_power_noreg(host, mode, vdd);
+}
+
+void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing)
+{
+       if (timing == MMC_TIMING_MMC_DDR52)
+               sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R);
+       sdhci_set_uhs_signaling(host, timing);
+}
+
  static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
         .set_clock              = sdhci_at91_set_clock,
         .set_bus_width          = sdhci_set_bus_width,
         .reset                  = sdhci_reset,
-       .set_uhs_signaling      = sdhci_set_uhs_signaling,
+       .set_uhs_signaling      = sdhci_at91_set_uhs_signaling,
+       .set_power              = sdhci_at91_set_power,
  };
  
  static const struct sdhci_pltfm_data soc_data_sama5d2 = {
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c

index 982b3e349426141710abef87f75819c518bf4a45..86560d590786f3f62a65c8668f2e601fd27b75be 100644 (file)
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -451,6 +451,8 @@ static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode,
         if (mode == MMC_POWER_OFF)
                 return;
  
+       spin_unlock_irq(&host->lock);
+
         /*
          * Bus power might not enable after D3 -> D0 transition due to the
          * present state not yet having propagated. Retry for up to 2ms.
@@ -463,6 +465,8 @@ static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode,
                 reg |= SDHCI_POWER_ON;
                 sdhci_writeb(host, reg, SDHCI_POWER_CONTROL);
         }
+
+       spin_lock_irq(&host->lock);
  }
  
  static const struct sdhci_ops sdhci_intel_byt_ops = {
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c

index 6fdd7a70f229b8bfd08f6d8b9df509dd0fec2bbd..63bc33a54d0dd8e63b50197611be31aae6f288fc 100644 (file)
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1362,7 +1362,9 @@ void sdhci_enable_clk(struct sdhci_host *host, u16 clk)
                         return;
                 }
                 timeout--;
-               mdelay(1);
+               spin_unlock_irq(&host->lock);
+               usleep_range(900, 1100);
+               spin_lock_irq(&host->lock);
         }
  
         clk |= SDHCI_CLOCK_CARD_EN;
@@ -1828,6 +1830,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
         struct sdhci_host *host = mmc_priv(mmc);
         unsigned long flags;
  
+       if (enable)
+               pm_runtime_get_noresume(host->mmc->parent);
+
         spin_lock_irqsave(&host->lock, flags);
         if (enable)
                 host->flags |= SDHCI_SDIO_IRQ_ENABLED;
@@ -1836,6 +1841,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
  
         sdhci_enable_sdio_irq_nolock(host, enable);
         spin_unlock_irqrestore(&host->lock, flags);
+
+       if (!enable)
+               pm_runtime_put_noidle(host->mmc->parent);
  }
  
  static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
diff --git a/drivers/mmc/host/ushc.c b/drivers/mmc/host/ushc.c

index d2c386f09d69f4edd20b0ac778624b40f826f1e3..1d843357422e8a398590aa1bcd883ab644b5d56e 100644 (file)
--- a/drivers/mmc/host/ushc.c
+++ b/drivers/mmc/host/ushc.c
@@ -426,6 +426,9 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
         struct ushc_data *ushc;
         int ret;
  
+       if (intf->cur_altsetting->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
         mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev);
         if (mmc == NULL)
                 return -ENOMEM;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c

index 66a9dedd10620b34aad39182ecdc2b444df360d3..1517da3ddd7d0b9752f8b7d4ebea8aaa3a6ae298 100644 (file)
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -46,7 +46,7 @@
  
  #include "mtdcore.h"
  
-static struct backing_dev_info *mtd_bdi;
+struct backing_dev_info *mtd_bdi;
  
  #ifdef CONFIG_PM_SLEEP
  
@@ -496,11 +496,9 @@ int add_mtd_device(struct mtd_info *mtd)
          * mtd_device_parse_register() multiple times on the same master MTD,
          * especially with CONFIG_MTD_PARTITIONED_MASTER=y.
          */
-       if (WARN_ONCE(mtd->backing_dev_info, "MTD already registered\n"))
+       if (WARN_ONCE(mtd->dev.type, "MTD already registered\n"))
                 return -EEXIST;
  
-       mtd->backing_dev_info = mtd_bdi;
-
         BUG_ON(mtd->writesize == 0);
         mutex_lock(&mtd_table_mutex);
  
@@ -1775,13 +1773,18 @@ static struct backing_dev_info * __init mtd_bdi_init(char *name)
         struct backing_dev_info *bdi;
         int ret;
  
-       bdi = kzalloc(sizeof(*bdi), GFP_KERNEL);
+       bdi = bdi_alloc(GFP_KERNEL);
         if (!bdi)
                 return ERR_PTR(-ENOMEM);
  
-       ret = bdi_setup_and_register(bdi, name);
+       bdi->name = name;
+       /*
+        * We put '-0' suffix to the name to get the same name format as we
+        * used to get. Since this is called only once, we get a unique name. 
+        */
+       ret = bdi_register(bdi, "%.28s-0", name);
         if (ret)
-               kfree(bdi);
+               bdi_put(bdi);
  
         return ret ? ERR_PTR(ret) : bdi;
  }
@@ -1813,8 +1816,7 @@ static int __init init_mtd(void)
  out_procfs:
         if (proc_mtd)
                 remove_proc_entry("mtd", NULL);
-       bdi_destroy(mtd_bdi);
-       kfree(mtd_bdi);
+       bdi_put(mtd_bdi);
  err_bdi:
         class_unregister(&mtd_class);
  err_reg:
@@ -1828,8 +1830,7 @@ static void __exit cleanup_mtd(void)
         if (proc_mtd)
                 remove_proc_entry("mtd", NULL);
         class_unregister(&mtd_class);
-       bdi_destroy(mtd_bdi);
-       kfree(mtd_bdi);
+       bdi_put(mtd_bdi);
         idr_destroy(&mtd_idr);
  }
  
diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c

index 20c02a3b7417cd3025a6c17142f86a3479e2e3a8..e43fea896d1ed8437a426a5fe0db010ceff7508a 100644 (file)
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -18,6 +18,7 @@
  #include <linux/ctype.h>
  #include <linux/slab.h>
  #include <linux/major.h>
+#include <linux/backing-dev.h>
  
  /*
   * compare superblocks to see if they're equivalent
@@ -38,6 +39,8 @@ static int get_sb_mtd_compare(struct super_block *sb, void *_mtd)
         return 0;
  }
  
+extern struct backing_dev_info *mtd_bdi;
+
  /*
   * mark the superblock by the MTD device it is using
   * - set the device number to be the correct MTD block device for pesuperstence
@@ -49,7 +52,8 @@ static int get_sb_mtd_set(struct super_block *sb, void *_mtd)
  
         sb->s_mtd = mtd;
         sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, mtd->index);
-       sb->s_bdi = mtd->backing_dev_info;
+       sb->s_bdi = bdi_get(mtd_bdi);
+
         return 0;
  }
  
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c

index 1ae872bfc3ba5be342993f4d176b5a2d4390da57..747645c74134de4cd620a284e0f26ca8a61d4991 100644 (file)
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -186,7 +186,7 @@ static inline int write_enable(struct spi_nor *nor)
  }
  
  /*
- * Send write disble instruction to the chip.
+ * Send write disable instruction to the chip.
   */
  static inline int write_disable(struct spi_nor *nor)
  {
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c

index c80869e60909c91f66ea4a1bb532bd18368e724d..51f2be8889b575cdc1a1f207696d41d620753108 100644 (file)
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -347,7 +347,7 @@ static int ubiblock_init_request(void *data, struct request *req,
         return 0;
  }
  
-static struct blk_mq_ops ubiblock_mq_ops = {
+static const struct blk_mq_ops ubiblock_mq_ops = {
         .queue_rq       = ubiblock_queue_rq,
         .init_request   = ubiblock_init_request,
  };
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c

index 0134ba32a05784b65d1a0e6d470eee7a857df74c..39712560b4c1b55aa847f0ac69f6815edb11e678 100644 (file)
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -148,11 +148,11 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol,
                         return err;
         }
  
-       if (bytes == 0) {
-               err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL);
-               if (err)
-                       return err;
+       err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL);
+       if (err)
+               return err;
  
+       if (bytes == 0) {
                 err = clear_update_marker(ubi, vol, 0);
                 if (err)
                         return err;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c

index 8a4ba8b88e52f9d5b1ba318e5dbfb53344f6ebca..34481c9be1d192137e2dc7e3c8475184dfa3bec0 100644 (file)
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1104,11 +1104,11 @@ static void bond_compute_features(struct bonding *bond)
                 gso_max_size = min(gso_max_size, slave->dev->gso_max_size);
                 gso_max_segs = min(gso_max_segs, slave->dev->gso_max_segs);
         }
+       bond_dev->hard_header_len = max_hard_header_len;
  
  done:
         bond_dev->vlan_features = vlan_features;
         bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL;
-       bond_dev->hard_header_len = max_hard_header_len;
         bond_dev->gso_max_segs = gso_max_segs;
         netif_set_gso_max_size(bond_dev, gso_max_size);
  
diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c

index 138f5ae75c0bc6fcf912d30975197caedb0e0b3e..4d1fe8d9504234f436a0b9dd739b41089f563225 100644 (file)
--- a/drivers/net/can/ifi_canfd/ifi_canfd.c
+++ b/drivers/net/can/ifi_canfd/ifi_canfd.c
@@ -557,7 +557,7 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota)
         int work_done = 0;
  
         u32 stcmd = readl(priv->base + IFI_CANFD_STCMD);
-       u32 rxstcmd = readl(priv->base + IFI_CANFD_STCMD);
+       u32 rxstcmd = readl(priv->base + IFI_CANFD_RXSTCMD);
         u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR);
  
         /* Handle bus state changes */
diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c

index caed4e6960f8c77fdca254f5ed4e7ccbe69792fc..11662f479e760ba77f613c90bfc8026b005da3ea 100644 (file)
--- a/drivers/net/can/rcar/rcar_can.c
+++ b/drivers/net/can/rcar/rcar_can.c
@@ -826,8 +826,7 @@ static int rcar_can_probe(struct platform_device *pdev)
  
         devm_can_led_init(ndev);
  
-       dev_info(&pdev->dev, "device registered (regs @ %p, IRQ%d)\n",
-                priv->regs, ndev->irq);
+       dev_info(&pdev->dev, "device registered (IRQ%d)\n", ndev->irq);
  
         return 0;
  fail_candev:
diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig

index 8483a40e7e9ef52327e15e03fa4100c73f68a53f..5f9e0e6301d06ecbe466e2f4eb4e8b7fee8ece27 100644 (file)
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -72,6 +72,8 @@ config CAN_PEAK_USB
           PCAN-USB Pro         dual CAN 2.0b channels USB adapter
           PCAN-USB FD          single CAN-FD channel USB adapter
           PCAN-USB Pro FD      dual CAN-FD channels USB adapter
+         PCAN-Chip USB        CAN-FD to USB stamp module
+         PCAN-USB X6          6 CAN-FD channels USB adapter
  
           (see also http://www.peak-system.com).
  
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c

index 300349fe8dc04945d956ec0dd17a470aa7ddb426..eecee7f8dfb70763aa0e7c3f90f85ae66ff85385 100644 (file)
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -739,13 +739,18 @@ static const struct net_device_ops gs_usb_netdev_ops = {
  static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
  {
         struct gs_can *dev = netdev_priv(netdev);
-       struct gs_identify_mode imode;
+       struct gs_identify_mode *imode;
         int rc;
  
+       imode = kmalloc(sizeof(*imode), GFP_KERNEL);
+
+       if (!imode)
+               return -ENOMEM;
+
         if (do_identify)
-               imode.mode = GS_CAN_IDENTIFY_ON;
+               imode->mode = GS_CAN_IDENTIFY_ON;
         else
-               imode.mode = GS_CAN_IDENTIFY_OFF;
+               imode->mode = GS_CAN_IDENTIFY_OFF;
  
         rc = usb_control_msg(interface_to_usbdev(dev->iface),
                              usb_sndctrlpipe(interface_to_usbdev(dev->iface),
@@ -755,10 +760,12 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
                              USB_RECIP_INTERFACE,
                              dev->channel,
                              0,
-                            &imode,
-                            sizeof(imode),
+                            imode,
+                            sizeof(*imode),
                              100);
  
+       kfree(imode);
+
         return (rc > 0) ? 0 : rc;
  }
  
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c

index 0b0302af3bd2dc3da893ed120afa028d21b219da..57913dbbae0a970f5f28051d10f064792d69333c 100644 (file)
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -39,6 +39,7 @@ static struct usb_device_id peak_usb_table[] = {
         {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPRO_PRODUCT_ID)},
         {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBFD_PRODUCT_ID)},
         {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPROFD_PRODUCT_ID)},
+       {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBCHIP_PRODUCT_ID)},
         {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBX6_PRODUCT_ID)},
         {} /* Terminating entry */
  };
@@ -51,6 +52,7 @@ static const struct peak_usb_adapter *const peak_usb_adapters_list[] = {
         &pcan_usb_pro,
         &pcan_usb_fd,
         &pcan_usb_pro_fd,
+       &pcan_usb_chip,
         &pcan_usb_x6,
  };
  
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h

index 3cbfb069893d5ce1d2da16969426433240ad4ce6..c01316cac354b364a422b3312efa7756771223b8 100644 (file)
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
@@ -27,6 +27,7 @@
  #define PCAN_USBPRO_PRODUCT_ID         0x000d
  #define PCAN_USBPROFD_PRODUCT_ID       0x0011
  #define PCAN_USBFD_PRODUCT_ID          0x0012
+#define PCAN_USBCHIP_PRODUCT_ID                0x0013
  #define PCAN_USBX6_PRODUCT_ID          0x0014
  
  #define PCAN_USB_DRIVER_NAME           "peak_usb"
@@ -90,6 +91,7 @@ struct peak_usb_adapter {
  extern const struct peak_usb_adapter pcan_usb;
  extern const struct peak_usb_adapter pcan_usb_pro;
  extern const struct peak_usb_adapter pcan_usb_fd;
+extern const struct peak_usb_adapter pcan_usb_chip;
  extern const struct peak_usb_adapter pcan_usb_pro_fd;
  extern const struct peak_usb_adapter pcan_usb_x6;
  
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c

index 304732550f0a628a7fa9ae9c94e67113d9a869a0..528d3bb4917f1ecb5caf2ef67a71ae8adad51c44 100644 (file)
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -1061,6 +1061,78 @@ const struct peak_usb_adapter pcan_usb_fd = {
         .do_get_berr_counter = pcan_usb_fd_get_berr_counter,
  };
  
+/* describes the PCAN-CHIP USB */
+static const struct can_bittiming_const pcan_usb_chip_const = {
+       .name = "pcan_chip_usb",
+       .tseg1_min = 1,
+       .tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS),
+       .tseg2_min = 1,
+       .tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS),
+       .sjw_max = (1 << PUCAN_TSLOW_SJW_BITS),
+       .brp_min = 1,
+       .brp_max = (1 << PUCAN_TSLOW_BRP_BITS),
+       .brp_inc = 1,
+};
+
+static const struct can_bittiming_const pcan_usb_chip_data_const = {
+       .name = "pcan_chip_usb",
+       .tseg1_min = 1,
+       .tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS),
+       .tseg2_min = 1,
+       .tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS),
+       .sjw_max = (1 << PUCAN_TFAST_SJW_BITS),
+       .brp_min = 1,
+       .brp_max = (1 << PUCAN_TFAST_BRP_BITS),
+       .brp_inc = 1,
+};
+
+const struct peak_usb_adapter pcan_usb_chip = {
+       .name = "PCAN-Chip USB",
+       .device_id = PCAN_USBCHIP_PRODUCT_ID,
+       .ctrl_count = PCAN_USBFD_CHANNEL_COUNT,
+       .ctrlmode_supported = CAN_CTRLMODE_FD |
+               CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY,
+       .clock = {
+               .freq = PCAN_UFD_CRYSTAL_HZ,
+       },
+       .bittiming_const = &pcan_usb_chip_const,
+       .data_bittiming_const = &pcan_usb_chip_data_const,
+
+       /* size of device private data */
+       .sizeof_dev_private = sizeof(struct pcan_usb_fd_device),
+
+       /* timestamps usage */
+       .ts_used_bits = 32,
+       .ts_period = 1000000, /* calibration period in ts. */
+       .us_per_ts_scale = 1, /* us = (ts * scale) >> shift */
+       .us_per_ts_shift = 0,
+
+       /* give here messages in/out endpoints */
+       .ep_msg_in = PCAN_USBPRO_EP_MSGIN,
+       .ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0},
+
+       /* size of rx/tx usb buffers */
+       .rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE,
+       .tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE,
+
+       /* device callbacks */
+       .intf_probe = pcan_usb_pro_probe,       /* same as PCAN-USB Pro */
+       .dev_init = pcan_usb_fd_init,
+
+       .dev_exit = pcan_usb_fd_exit,
+       .dev_free = pcan_usb_fd_free,
+       .dev_set_bus = pcan_usb_fd_set_bus,
+       .dev_set_bittiming = pcan_usb_fd_set_bittiming_slow,
+       .dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast,
+       .dev_decode_buf = pcan_usb_fd_decode_buf,
+       .dev_start = pcan_usb_fd_start,
+       .dev_stop = pcan_usb_fd_stop,
+       .dev_restart_async = pcan_usb_fd_restart_async,
+       .dev_encode_msg = pcan_usb_fd_encode_msg,
+
+       .do_get_berr_counter = pcan_usb_fd_get_berr_counter,
+};
+
  /* describes the PCAN-USB Pro FD adapter */
  static const struct can_bittiming_const pcan_usb_pro_fd_const = {
         .name = "pcan_usb_pro_fd",
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c

index 8cf4801994e883be64010934a0413cfbdb86ed16..fa0eece21eef9825e716dd7744c556211a0b41c3 100644 (file)
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -326,6 +326,7 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid,
  
  static void b53_set_forwarding(struct b53_device *dev, int enable)
  {
+       struct dsa_switch *ds = dev->ds;
         u8 mgmt;
  
         b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt);
@@ -336,6 +337,15 @@ static void b53_set_forwarding(struct b53_device *dev, int enable)
                 mgmt &= ~SM_SW_FWD_EN;
  
         b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
+
+       /* Include IMP port in dumb forwarding mode when no tagging protocol is
+        * set
+        */
+       if (ds->ops->get_tag_protocol(ds) == DSA_TAG_PROTO_NONE) {
+               b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, &mgmt);
+               mgmt |= B53_MII_DUMB_FWDG_EN;
+               b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt);
+       }
  }
  
  static void b53_enable_vlan(struct b53_device *dev, bool enable)
@@ -598,7 +608,8 @@ static void b53_switch_reset_gpio(struct b53_device *dev)
  
  static int b53_switch_reset(struct b53_device *dev)
  {
-       u8 mgmt;
+       unsigned int timeout = 1000;
+       u8 mgmt, reg;
  
         b53_switch_reset_gpio(dev);
  
@@ -607,6 +618,28 @@ static int b53_switch_reset(struct b53_device *dev)
                 b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, 0x00);
         }
  
+       /* This is specific to 58xx devices here, do not use is58xx() which
+        * covers the larger Starfigther 2 family, including 7445/7278 which
+        * still use this driver as a library and need to perform the reset
+        * earlier.
+        */
+       if (dev->chip_id == BCM58XX_DEVICE_ID) {
+               b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, &reg);
+               reg |= SW_RST | EN_SW_RST | EN_CH_RST;
+               b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, reg);
+
+               do {
+                       b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, &reg);
+                       if (!(reg & SW_RST))
+                               break;
+
+                       usleep_range(1000, 2000);
+               } while (timeout-- > 0);
+
+               if (timeout == 0)
+                       return -ETIMEDOUT;
+       }
+
         b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt);
  
         if (!(mgmt & SM_SW_FWD_EN)) {
@@ -1731,7 +1764,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
                 .vlans  = 4096,
                 .enabled_ports = 0x1ff,
                 .arl_entries = 4,
-               .cpu_port = B53_CPU_PORT_25,
+               .cpu_port = B53_CPU_PORT,
                 .vta_regs = B53_VTA_REGS,
                 .duplex_reg = B53_DUPLEX_STAT_GE,
                 .jumbo_pm_reg = B53_JUMBO_PORT_MASK,
diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h

index 9fd24c418fa4256e8517d5dec1dfa97681ba72db..e5c86d44667af1fd9f68a550f169a322989f39bd 100644 (file)
--- a/drivers/net/dsa/b53/b53_regs.h
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -104,6 +104,10 @@
  #define  B53_UC_FWD_EN                 BIT(6)
  #define  B53_MC_FWD_EN                 BIT(7)
  
+/* Switch control (8 bit) */
+#define B53_SWITCH_CTRL                        0x22
+#define  B53_MII_DUMB_FWDG_EN          BIT(6)
+
  /* (16 bit) */
  #define B53_UC_FLOOD_MASK              0x32
  #define B53_MC_FLOOD_MASK              0x34
@@ -139,6 +143,7 @@
  /* Software reset register (8 bit) */
  #define B53_SOFTRESET                  0x79
  #define   SW_RST                       BIT(7)
+#define   EN_CH_RST                    BIT(6)
  #define   EN_SW_RST                    BIT(4)
  
  /* Fast Aging Control register (8 bit) */
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h

index 8a280e7d66bddc998763288a5756b2ae6a7f70bc..127adbeefb105cc031f3782b534d175f29fb7143 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -984,29 +984,29 @@
  #define XP_ECC_CNT1_DESC_DED_WIDTH             8
  #define XP_ECC_CNT1_DESC_SEC_INDEX             0
  #define XP_ECC_CNT1_DESC_SEC_WIDTH             8
-#define XP_ECC_IER_DESC_DED_INDEX              0
+#define XP_ECC_IER_DESC_DED_INDEX              5
  #define XP_ECC_IER_DESC_DED_WIDTH              1
-#define XP_ECC_IER_DESC_SEC_INDEX              1
+#define XP_ECC_IER_DESC_SEC_INDEX              4
  #define XP_ECC_IER_DESC_SEC_WIDTH              1
-#define XP_ECC_IER_RX_DED_INDEX                        2
+#define XP_ECC_IER_RX_DED_INDEX                        3
  #define XP_ECC_IER_RX_DED_WIDTH                        1
-#define XP_ECC_IER_RX_SEC_INDEX                        3
+#define XP_ECC_IER_RX_SEC_INDEX                        2
  #define XP_ECC_IER_RX_SEC_WIDTH                        1
-#define XP_ECC_IER_TX_DED_INDEX                        4
+#define XP_ECC_IER_TX_DED_INDEX                        1
  #define XP_ECC_IER_TX_DED_WIDTH                        1
-#define XP_ECC_IER_TX_SEC_INDEX                        5
+#define XP_ECC_IER_TX_SEC_INDEX                        0
  #define XP_ECC_IER_TX_SEC_WIDTH                        1
-#define XP_ECC_ISR_DESC_DED_INDEX              0
+#define XP_ECC_ISR_DESC_DED_INDEX              5
  #define XP_ECC_ISR_DESC_DED_WIDTH              1
-#define XP_ECC_ISR_DESC_SEC_INDEX              1
+#define XP_ECC_ISR_DESC_SEC_INDEX              4
  #define XP_ECC_ISR_DESC_SEC_WIDTH              1
-#define XP_ECC_ISR_RX_DED_INDEX                        2
+#define XP_ECC_ISR_RX_DED_INDEX                        3
  #define XP_ECC_ISR_RX_DED_WIDTH                        1
-#define XP_ECC_ISR_RX_SEC_INDEX                        3
+#define XP_ECC_ISR_RX_SEC_INDEX                        2
  #define XP_ECC_ISR_RX_SEC_WIDTH                        1
-#define XP_ECC_ISR_TX_DED_INDEX                        4
+#define XP_ECC_ISR_TX_DED_INDEX                        1
  #define XP_ECC_ISR_TX_DED_WIDTH                        1
-#define XP_ECC_ISR_TX_SEC_INDEX                        5
+#define XP_ECC_ISR_TX_SEC_INDEX                        0
  #define XP_ECC_ISR_TX_SEC_WIDTH                        1
  #define XP_I2C_MUTEX_BUSY_INDEX                        31
  #define XP_I2C_MUTEX_BUSY_WIDTH                        1
@@ -1148,8 +1148,8 @@
  #define RX_PACKET_ATTRIBUTES_CSUM_DONE_WIDTH   1
  #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_INDEX   1
  #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH   1
-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_INDEX  2
-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_WIDTH  1
+#define RX_PACKET_ATTRIBUTES_LAST_INDEX                2
+#define RX_PACKET_ATTRIBUTES_LAST_WIDTH                1
  #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_INDEX        3
  #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_WIDTH        1
  #define RX_PACKET_ATTRIBUTES_CONTEXT_INDEX     4
@@ -1158,6 +1158,8 @@
  #define RX_PACKET_ATTRIBUTES_RX_TSTAMP_WIDTH   1
  #define RX_PACKET_ATTRIBUTES_RSS_HASH_INDEX    6
  #define RX_PACKET_ATTRIBUTES_RSS_HASH_WIDTH    1
+#define RX_PACKET_ATTRIBUTES_FIRST_INDEX       7
+#define RX_PACKET_ATTRIBUTES_FIRST_WIDTH       1
  
  #define RX_NORMAL_DESC0_OVT_INDEX              0
  #define RX_NORMAL_DESC0_OVT_WIDTH              16
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c

index 937f37a5dcb2cded9963b3732bddb08557ad50ba..24a687ce4388182716438770c49e2dca7ff81114 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1896,10 +1896,15 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
  
         /* Get the header length */
         if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD)) {
+               XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+                              FIRST, 1);
                 rdata->rx.hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2,
                                                       RX_NORMAL_DESC2, HL);
                 if (rdata->rx.hdr_len)
                         pdata->ext_stats.rx_split_header_packets++;
+       } else {
+               XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+                              FIRST, 0);
         }
  
         /* Get the RSS hash */
@@ -1922,19 +1927,16 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
                 }
         }
  
-       /* Get the packet length */
-       rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
-
-       if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) {
-               /* Not all the data has been transferred for this packet */
-               XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
-                              INCOMPLETE, 1);
+       /* Not all the data has been transferred for this packet */
+       if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD))
                 return 0;
-       }
  
         /* This is the last of the data for this packet */
         XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
-                      INCOMPLETE, 0);
+                      LAST, 1);
+
+       /* Get the packet length */
+       rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
  
         /* Set checksum done indicator as appropriate */
         if (netdev->features & NETIF_F_RXCSUM)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c

index 248f60d171a5a0ce76744a95e2d59039939e6538..a713abd9d03e63aea8aab96c4f5569d1d6b3e44d 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1971,13 +1971,12 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata,
  {
         struct sk_buff *skb;
         u8 *packet;
-       unsigned int copy_len;
  
         skb = napi_alloc_skb(napi, rdata->rx.hdr.dma_len);
         if (!skb)
                 return NULL;
  
-       /* Start with the header buffer which may contain just the header
+       /* Pull in the header buffer which may contain just the header
          * or the header plus data
          */
         dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.hdr.dma_base,
@@ -1986,30 +1985,49 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata,
  
         packet = page_address(rdata->rx.hdr.pa.pages) +
                  rdata->rx.hdr.pa.pages_offset;
-       copy_len = (rdata->rx.hdr_len) ? rdata->rx.hdr_len : len;
-       copy_len = min(rdata->rx.hdr.dma_len, copy_len);
-       skb_copy_to_linear_data(skb, packet, copy_len);
-       skb_put(skb, copy_len);
-
-       len -= copy_len;
-       if (len) {
-               /* Add the remaining data as a frag */
-               dma_sync_single_range_for_cpu(pdata->dev,
-                                             rdata->rx.buf.dma_base,
-                                             rdata->rx.buf.dma_off,
-                                             rdata->rx.buf.dma_len,
-                                             DMA_FROM_DEVICE);
-
-               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-                               rdata->rx.buf.pa.pages,
-                               rdata->rx.buf.pa.pages_offset,
-                               len, rdata->rx.buf.dma_len);
-               rdata->rx.buf.pa.pages = NULL;
-       }
+       skb_copy_to_linear_data(skb, packet, len);
+       skb_put(skb, len);
  
         return skb;
  }
  
+static unsigned int xgbe_rx_buf1_len(struct xgbe_ring_data *rdata,
+                                    struct xgbe_packet_data *packet)
+{
+       /* Always zero if not the first descriptor */
+       if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST))
+               return 0;
+
+       /* First descriptor with split header, return header length */
+       if (rdata->rx.hdr_len)
+               return rdata->rx.hdr_len;
+
+       /* First descriptor but not the last descriptor and no split header,
+        * so the full buffer was used
+        */
+       if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
+               return rdata->rx.hdr.dma_len;
+
+       /* First descriptor and last descriptor and no split header, so
+        * calculate how much of the buffer was used
+        */
+       return min_t(unsigned int, rdata->rx.hdr.dma_len, rdata->rx.len);
+}
+
+static unsigned int xgbe_rx_buf2_len(struct xgbe_ring_data *rdata,
+                                    struct xgbe_packet_data *packet,
+                                    unsigned int len)
+{
+       /* Always the full buffer if not the last descriptor */
+       if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
+               return rdata->rx.buf.dma_len;
+
+       /* Last descriptor so calculate how much of the buffer was used
+        * for the last bit of data
+        */
+       return rdata->rx.len - len;
+}
+
  static int xgbe_tx_poll(struct xgbe_channel *channel)
  {
         struct xgbe_prv_data *pdata = channel->pdata;
@@ -2092,8 +2110,8 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
         struct napi_struct *napi;
         struct sk_buff *skb;
         struct skb_shared_hwtstamps *hwtstamps;
-       unsigned int incomplete, error, context_next, context;
-       unsigned int len, rdesc_len, max_len;
+       unsigned int last, error, context_next, context;
+       unsigned int len, buf1_len, buf2_len, max_len;
         unsigned int received = 0;
         int packet_count = 0;
  
@@ -2103,7 +2121,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
         if (!ring)
                 return 0;
  
-       incomplete = 0;
+       last = 0;
         context_next = 0;
  
         napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
@@ -2137,9 +2155,8 @@ read_again:
                 received++;
                 ring->cur++;
  
-               incomplete = XGMAC_GET_BITS(packet->attributes,
-                                           RX_PACKET_ATTRIBUTES,
-                                           INCOMPLETE);
+               last = XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+                                     LAST);
                 context_next = XGMAC_GET_BITS(packet->attributes,
                                               RX_PACKET_ATTRIBUTES,
                                               CONTEXT_NEXT);
@@ -2148,7 +2165,7 @@ read_again:
                                          CONTEXT);
  
                 /* Earlier error, just drain the remaining data */
-               if ((incomplete || context_next) && error)
+               if ((!last || context_next) && error)
                         goto read_again;
  
                 if (error || packet->errors) {
@@ -2160,16 +2177,22 @@ read_again:
                 }
  
                 if (!context) {
-                       /* Length is cumulative, get this descriptor's length */
-                       rdesc_len = rdata->rx.len - len;
-                       len += rdesc_len;
+                       /* Get the data length in the descriptor buffers */
+                       buf1_len = xgbe_rx_buf1_len(rdata, packet);
+                       len += buf1_len;
+                       buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
+                       len += buf2_len;
  
-                       if (rdesc_len && !skb) {
+                       if (!skb) {
                                 skb = xgbe_create_skb(pdata, napi, rdata,
-                                                     rdesc_len);
-                               if (!skb)
+                                                     buf1_len);
+                               if (!skb) {
                                         error = 1;
-                       } else if (rdesc_len) {
+                                       goto skip_data;
+                               }
+                       }
+
+                       if (buf2_len) {
                                 dma_sync_single_range_for_cpu(pdata->dev,
                                                         rdata->rx.buf.dma_base,
                                                         rdata->rx.buf.dma_off,
@@ -2179,13 +2202,14 @@ read_again:
                                 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
                                                 rdata->rx.buf.pa.pages,
                                                 rdata->rx.buf.pa.pages_offset,
-                                               rdesc_len,
+                                               buf2_len,
                                                 rdata->rx.buf.dma_len);
                                 rdata->rx.buf.pa.pages = NULL;
                         }
                 }
  
-               if (incomplete || context_next)
+skip_data:
+               if (!last || context_next)
                         goto read_again;
  
                 if (!skb)
@@ -2243,7 +2267,7 @@ next_packet:
         }
  
         /* Check if we need to save state before leaving */
-       if (received && (incomplete || context_next)) {
+       if (received && (!last || context_next)) {
                 rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
                 rdata->state_saved = 1;
                 rdata->state.skb = skb;
@@ -2272,10 +2296,7 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget)
         processed = xgbe_rx_poll(channel, budget);
  
         /* If we processed everything, we are done */
-       if (processed < budget) {
-               /* Turn off polling */
-               napi_complete_done(napi, processed);
-
+       if ((processed < budget) && napi_complete_done(napi, processed)) {
                 /* Enable Tx and Rx interrupts */
                 if (pdata->channel_irq_mode)
                         xgbe_enable_rx_tx_int(pdata, channel);
@@ -2317,10 +2338,7 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget)
         } while ((processed < budget) && (processed != last_processed));
  
         /* If we processed everything, we are done */
-       if (processed < budget) {
-               /* Turn off polling */
-               napi_complete_done(napi, processed);
-
+       if ((processed < budget) && napi_complete_done(napi, processed)) {
                 /* Enable Tx and Rx interrupts */
                 xgbe_enable_rx_tx_ints(pdata);
         }
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c

index dad63623be6a93672974fb43ee50b518fff59ab5..5d6c40d86775dd0189de49173210f4d38ee6934c 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -98,11 +98,7 @@ static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu)
  
         if (err < 0)
                 goto err_exit;
-
-       if (netif_running(ndev)) {
-               aq_ndev_close(ndev);
-               aq_ndev_open(ndev);
-       }
+       ndev->mtu = new_mtu;
  
  err_exit:
         return err;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c

index ee78444bfb8851214709920795e26d658c4ca9b5..cdb02991f249c6354b7095d9d777316617c2be42 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -487,6 +487,9 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
                 dx_buff->mss = skb_shinfo(skb)->gso_size;
                 dx_buff->is_txc = 1U;
  
+               dx_buff->is_ipv6 =
+                       (ip_hdr(skb)->version == 6) ? 1U : 0U;
+
                 dx = aq_ring_next_dx(ring, dx);
                 dx_buff = &ring->buff_ring[dx];
                 ++ret;
@@ -510,10 +513,22 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
         if (skb->ip_summed == CHECKSUM_PARTIAL) {
                 dx_buff->is_ip_cso = (htons(ETH_P_IP) == skb->protocol) ?
                         1U : 0U;
-               dx_buff->is_tcp_cso =
-                       (ip_hdr(skb)->protocol == IPPROTO_TCP) ? 1U : 0U;
-               dx_buff->is_udp_cso =
-                       (ip_hdr(skb)->protocol == IPPROTO_UDP) ? 1U : 0U;
+
+               if (ip_hdr(skb)->version == 4) {
+                       dx_buff->is_tcp_cso =
+                               (ip_hdr(skb)->protocol == IPPROTO_TCP) ?
+                                       1U : 0U;
+                       dx_buff->is_udp_cso =
+                               (ip_hdr(skb)->protocol == IPPROTO_UDP) ?
+                                       1U : 0U;
+               } else if (ip_hdr(skb)->version == 6) {
+                       dx_buff->is_tcp_cso =
+                               (ipv6_hdr(skb)->nexthdr == NEXTHDR_TCP) ?
+                                       1U : 0U;
+                       dx_buff->is_udp_cso =
+                               (ipv6_hdr(skb)->nexthdr == NEXTHDR_UDP) ?
+                                       1U : 0U;
+               }
         }
  
         for (; nr_frags--; ++frag_count) {
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c

index 581de71a958a3519682272a49dec55432e9eebed..4c6c882c6a1c424238473ea40ecf9f0ebf7cee28 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -213,9 +213,9 @@ void aq_pci_func_free_irqs(struct aq_pci_func_s *self)
                 if (!((1U << i) & self->msix_entry_mask))
                         continue;
  
-               free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
                 if (pdev->msix_enabled)
                         irq_set_affinity_hint(pci_irq_vector(pdev, i), NULL);
+               free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
                 self->msix_entry_mask &= ~(1U << i);
         }
  }
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c

index 0358e6072d45ab94181409de0dd17e80106fbf2d..3a8a4aa13687ff42510e7a260b1de6715a55d8d5 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -101,6 +101,7 @@ int aq_ring_init(struct aq_ring_s *self)
         self->hw_head = 0;
         self->sw_head = 0;
         self->sw_tail = 0;
+       spin_lock_init(&self->header.lock);
         return 0;
  }
  
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h

index 2572546450685d25c8ca4beb71150c356e7c82f6..eecd6d1c4d731a4e648e6811a5615498ee3a8965 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -58,7 +58,8 @@ struct __packed aq_ring_buff_s {
                         u8 len_l2;
                         u8 len_l3;
                         u8 len_l4;
-                       u8 rsvd2;
+                       u8 is_ipv6:1;
+                       u8 rsvd2:7;
                         u32 len_pkt;
                 };
         };
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c

index a2b746a2dd50b8825250f5ab1de8a01b5afb32c9..4ee15ff06a448b72dbd6763427d9d02dfadad268 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -433,6 +433,9 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
                                     buff->len_l3 +
                                     buff->len_l2);
                         is_gso = true;
+
+                       if (buff->is_ipv6)
+                               txd->ctl |= HW_ATL_A0_TXD_CTL_CMD_IPV6;
                 } else {
                         buff_pa_len = buff->len;
  
@@ -458,6 +461,7 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
                         if (unlikely(buff->is_eop)) {
                                 txd->ctl |= HW_ATL_A0_TXD_CTL_EOP;
                                 txd->ctl |= HW_ATL_A0_TXD_CTL_CMD_WB;
+                               is_gso = false;
                         }
                 }
  
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h

index 1093ea18823a32fc6cb441ab45b0b3a9a82fecc2..0592a0330cf0d601f4b9a27f0d349aeccc66f833 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
@@ -137,6 +137,7 @@ static struct aq_hw_caps_s hw_atl_a0_hw_caps_ = {
         .tx_rings = HW_ATL_A0_TX_RINGS,
         .rx_rings = HW_ATL_A0_RX_RINGS,
         .hw_features = NETIF_F_HW_CSUM |
+                       NETIF_F_RXCSUM |
                         NETIF_F_RXHASH |
                         NETIF_F_SG |
                         NETIF_F_TSO,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c

index cab2931dab9ac354821e4a30bb8517ccfe0041eb..42150708191dbf67d91b33218a2a275e9b5fd45d 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -471,6 +471,9 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
                                     buff->len_l3 +
                                     buff->len_l2);
                         is_gso = true;
+
+                       if (buff->is_ipv6)
+                               txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_IPV6;
                 } else {
                         buff_pa_len = buff->len;
  
@@ -496,6 +499,7 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
                         if (unlikely(buff->is_eop)) {
                                 txd->ctl |= HW_ATL_B0_TXD_CTL_EOP;
                                 txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_WB;
+                               is_gso = false;
                         }
                 }
  
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h

index 8bdee3ddd5a0bd9044063caf5686fefcc6b5465f..f3957e9303405c3f26c9f7f7d6507009d5804534 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
@@ -188,6 +188,7 @@ static struct aq_hw_caps_s hw_atl_b0_hw_caps_ = {
         .tx_rings = HW_ATL_B0_TX_RINGS,
         .rx_rings = HW_ATL_B0_RX_RINGS,
         .hw_features = NETIF_F_HW_CSUM |
+                       NETIF_F_RXCSUM |
                         NETIF_F_RXHASH |
                         NETIF_F_SG |
                         NETIF_F_TSO |
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h

index 0a23034bbe3ff8d392483e7a5a201caf30c76526..352beff796ae5b090d8e3fa831078cc7182d3a2c 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2277,7 +2277,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
                                  GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RBCP) | \
                                  GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RSVD_GRC))
  
-#define HW_INTERRUT_ASSERT_SET_0 \
+#define HW_INTERRUPT_ASSERT_SET_0 \
                                 (AEU_INPUTS_ATTN_BITS_TSDM_HW_INTERRUPT | \
                                  AEU_INPUTS_ATTN_BITS_TCM_HW_INTERRUPT | \
                                  AEU_INPUTS_ATTN_BITS_TSEMI_HW_INTERRUPT | \
@@ -2290,7 +2290,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
                                  AEU_INPUTS_ATTN_BITS_TSEMI_PARITY_ERROR |\
                                  AEU_INPUTS_ATTN_BITS_TCM_PARITY_ERROR |\
                                  AEU_INPUTS_ATTN_BITS_PBCLIENT_PARITY_ERROR)
-#define HW_INTERRUT_ASSERT_SET_1 \
+#define HW_INTERRUPT_ASSERT_SET_1 \
                                 (AEU_INPUTS_ATTN_BITS_QM_HW_INTERRUPT | \
                                  AEU_INPUTS_ATTN_BITS_TIMERS_HW_INTERRUPT | \
                                  AEU_INPUTS_ATTN_BITS_XSDM_HW_INTERRUPT | \
@@ -2318,7 +2318,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
                                  AEU_INPUTS_ATTN_BITS_UPB_PARITY_ERROR | \
                                  AEU_INPUTS_ATTN_BITS_CSDM_PARITY_ERROR |\
                                  AEU_INPUTS_ATTN_BITS_CCM_PARITY_ERROR)
-#define HW_INTERRUT_ASSERT_SET_2 \
+#define HW_INTERRUPT_ASSERT_SET_2 \
                                 (AEU_INPUTS_ATTN_BITS_CSEMI_HW_INTERRUPT | \
                                  AEU_INPUTS_ATTN_BITS_CDU_HW_INTERRUPT | \
                                  AEU_INPUTS_ATTN_BITS_DMAE_HW_INTERRUPT | \
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c

index d8d06fdfc42b9d685244513c1542b69bd78d7ca9..a851f95c307a3331a889972bc3c60273219f8a7b 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -4166,14 +4166,14 @@ static void bnx2x_attn_int_deasserted0(struct bnx2x *bp, u32 attn)
                 bnx2x_release_phy_lock(bp);
         }
  
-       if (attn & HW_INTERRUT_ASSERT_SET_0) {
+       if (attn & HW_INTERRUPT_ASSERT_SET_0) {
  
                 val = REG_RD(bp, reg_offset);
-               val &= ~(attn & HW_INTERRUT_ASSERT_SET_0);
+               val &= ~(attn & HW_INTERRUPT_ASSERT_SET_0);
                 REG_WR(bp, reg_offset, val);
  
                 BNX2X_ERR("FATAL HW block attention set0 0x%x\n",
-                         (u32)(attn & HW_INTERRUT_ASSERT_SET_0));
+                         (u32)(attn & HW_INTERRUPT_ASSERT_SET_0));
                 bnx2x_panic();
         }
  }
@@ -4191,7 +4191,7 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn)
                         BNX2X_ERR("FATAL error from DORQ\n");
         }
  
-       if (attn & HW_INTERRUT_ASSERT_SET_1) {
+       if (attn & HW_INTERRUPT_ASSERT_SET_1) {
  
                 int port = BP_PORT(bp);
                 int reg_offset;
@@ -4200,11 +4200,11 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn)
                                      MISC_REG_AEU_ENABLE1_FUNC_0_OUT_1);
  
                 val = REG_RD(bp, reg_offset);
-               val &= ~(attn & HW_INTERRUT_ASSERT_SET_1);
+               val &= ~(attn & HW_INTERRUPT_ASSERT_SET_1);
                 REG_WR(bp, reg_offset, val);
  
                 BNX2X_ERR("FATAL HW block attention set1 0x%x\n",
-                         (u32)(attn & HW_INTERRUT_ASSERT_SET_1));
+                         (u32)(attn & HW_INTERRUPT_ASSERT_SET_1));
                 bnx2x_panic();
         }
  }
@@ -4235,7 +4235,7 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x *bp, u32 attn)
                 }
         }
  
-       if (attn & HW_INTERRUT_ASSERT_SET_2) {
+       if (attn & HW_INTERRUPT_ASSERT_SET_2) {
  
                 int port = BP_PORT(bp);
                 int reg_offset;
@@ -4244,11 +4244,11 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x *bp, u32 attn)
                                      MISC_REG_AEU_ENABLE1_FUNC_0_OUT_2);
  
                 val = REG_RD(bp, reg_offset);
-               val &= ~(attn & HW_INTERRUT_ASSERT_SET_2);
+               val &= ~(attn & HW_INTERRUPT_ASSERT_SET_2);
                 REG_WR(bp, reg_offset, val);
  
                 BNX2X_ERR("FATAL HW block attention set2 0x%x\n",
-                         (u32)(attn & HW_INTERRUT_ASSERT_SET_2));
+                         (u32)(attn & HW_INTERRUPT_ASSERT_SET_2));
                 bnx2x_panic();
         }
  }
@@ -13292,17 +13292,15 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
         dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
                 NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA;
  
-       /* VF with OLD Hypervisor or old PF do not support filtering */
         if (IS_PF(bp)) {
                 if (chip_is_e1x)
                         bp->accept_any_vlan = true;
                 else
                         dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#ifdef CONFIG_BNX2X_SRIOV
-       } else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
-               dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#endif
         }
+       /* For VF we'll know whether to enable VLAN filtering after
+        * getting a response to CHANNEL_TLV_ACQUIRE from PF.
+        */
  
         dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX;
         dev->features |= NETIF_F_HIGHDMA;
@@ -13738,7 +13736,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
         if (!netif_running(bp->dev)) {
                 DP(BNX2X_MSG_PTP,
                    "PTP adjfreq called while the interface is down\n");
-               return -EFAULT;
+               return -ENETDOWN;
         }
  
         if (ppb < 0) {
@@ -13797,6 +13795,12 @@ static int bnx2x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
  {
         struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
  
+       if (!netif_running(bp->dev)) {
+               DP(BNX2X_MSG_PTP,
+                  "PTP adjtime called while the interface is down\n");
+               return -ENETDOWN;
+       }
+
         DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta);
  
         timecounter_adjtime(&bp->timecounter, delta);
@@ -13809,6 +13813,12 @@ static int bnx2x_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
         struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
         u64 ns;
  
+       if (!netif_running(bp->dev)) {
+               DP(BNX2X_MSG_PTP,
+                  "PTP gettime called while the interface is down\n");
+               return -ENETDOWN;
+       }
+
         ns = timecounter_read(&bp->timecounter);
  
         DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns);
@@ -13824,6 +13834,12 @@ static int bnx2x_ptp_settime(struct ptp_clock_info *ptp,
         struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
         u64 ns;
  
+       if (!netif_running(bp->dev)) {
+               DP(BNX2X_MSG_PTP,
+                  "PTP settime called while the interface is down\n");
+               return -ENETDOWN;
+       }
+
         ns = timespec64_to_ns(ts);
  
         DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns);
@@ -13991,6 +14007,14 @@ static int bnx2x_init_one(struct pci_dev *pdev,
                 rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count);
                 if (rc)
                         goto init_one_freemem;
+
+#ifdef CONFIG_BNX2X_SRIOV
+               /* VF with OLD Hypervisor or old PF do not support filtering */
+               if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
+                       dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+                       dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+               }
+#endif
         }
  
         /* Enable SRIOV if capability found in configuration space */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c

index 6fad22adbbb9e72fda1208d2b689ef371ac77d09..bdfd53b46bc568286ac9debc70bb14563329040b 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -434,7 +434,9 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
  
         /* Add/Remove the filter */
         rc = bnx2x_config_vlan_mac(bp, &ramrod);
-       if (rc && rc != -EEXIST) {
+       if (rc == -EEXIST)
+               return 0;
+       if (rc) {
                 BNX2X_ERR("Failed to %s %s\n",
                           filter->add ? "add" : "delete",
                           (filter->type == BNX2X_VF_FILTER_VLAN_MAC) ?
@@ -444,6 +446,8 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
                 return rc;
         }
  
+       filter->applied = true;
+
         return 0;
  }
  
@@ -469,8 +473,10 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf,
         /* Rollback if needed */
         if (i != filters->count) {
                 BNX2X_ERR("Managed only %d/%d filters - rolling back\n",
-                         i, filters->count + 1);
+                         i, filters->count);
                 while (--i >= 0) {
+                       if (!filters->filters[i].applied)
+                               continue;
                         filters->filters[i].add = !filters->filters[i].add;
                         bnx2x_vf_mac_vlan_config(bp, vf, qid,
                                                  &filters->filters[i],
@@ -1899,7 +1905,8 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
                         continue;
                 }
  
-               DP(BNX2X_MSG_IOV, "add addresses for vf %d\n", vf->abs_vfid);
+               DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+                      "add addresses for vf %d\n", vf->abs_vfid);
                 for_each_vfq(vf, j) {
                         struct bnx2x_vf_queue *rxq = vfq_get(vf, j);
  
@@ -1920,11 +1927,12 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
                                 cpu_to_le32(U64_HI(q_stats_addr));
                         cur_query_entry->address.lo =
                                 cpu_to_le32(U64_LO(q_stats_addr));
-                       DP(BNX2X_MSG_IOV,
-                          "added address %x %x for vf %d queue %d client %d\n",
-                          cur_query_entry->address.hi,
-                          cur_query_entry->address.lo, cur_query_entry->funcID,
-                          j, cur_query_entry->index);
+                       DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+                              "added address %x %x for vf %d queue %d client %d\n",
+                              cur_query_entry->address.hi,
+                              cur_query_entry->address.lo,
+                              cur_query_entry->funcID,
+                              j, cur_query_entry->index);
                         cur_query_entry++;
                         cur_data_offset += sizeof(struct per_queue_stats);
                         stats_count++;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h

index 7a6d406f4c111774ea606c98253608273ae8787b..888d0b6632e86f2f7ab7e2f9e605be87fa4c7061 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -114,6 +114,7 @@ struct bnx2x_vf_mac_vlan_filter {
         (BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/
  
         bool add;
+       bool applied;
         u8 *mac;
         u16 vid;
  };
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c

index bfae300cf25ff881292dc36ad56e51e37132cd76..76a4668c50fe98edb3d6e955351a357a3e3f0608 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -868,7 +868,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
         struct bnx2x *bp = netdev_priv(dev);
         struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
         struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
-       int rc, i = 0;
+       int rc = 0, i = 0;
         struct netdev_hw_addr *ha;
  
         if (bp->state != BNX2X_STATE_OPEN) {
@@ -883,6 +883,15 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
         /* Get Rx mode requested */
         DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags);
  
+       /* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */
+       if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) {
+               DP(NETIF_MSG_IFUP,
+                  "VF supports not more than %d multicast MAC addresses\n",
+                  PFVF_MAX_MULTICAST_PER_VF);
+               rc = -EINVAL;
+               goto out;
+       }
+
         netdev_for_each_mc_addr(ha, dev) {
                 DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n",
                    bnx2x_mc_addr(ha));
@@ -890,16 +899,6 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
                 i++;
         }
  
-       /* We support four PFVF_MAX_MULTICAST_PER_VF mcast
-         * addresses tops
-         */
-       if (i >= PFVF_MAX_MULTICAST_PER_VF) {
-               DP(NETIF_MSG_IFUP,
-                  "VF supports not more than %d multicast MAC addresses\n",
-                  PFVF_MAX_MULTICAST_PER_VF);
-               return -EINVAL;
-       }
-
         req->n_multicast = i;
         req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED;
         req->vf_qid = 0;
@@ -924,7 +923,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
  out:
         bnx2x_vfpf_finalize(bp, &req->first_tlv);
  
-       return 0;
+       return rc;
  }
  
  /* request pf to add a vlan for the vf */
@@ -1778,6 +1777,23 @@ static int bnx2x_vf_mbx_qfilters(struct bnx2x *bp, struct bnx2x_virtf *vf)
                                 goto op_err;
                 }
  
+               /* build vlan list */
+               fl = NULL;
+
+               rc = bnx2x_vf_mbx_macvlan_list(bp, vf, msg, &fl,
+                                              VFPF_VLAN_FILTER);
+               if (rc)
+                       goto op_err;
+
+               if (fl) {
+                       /* set vlan list */
+                       rc = bnx2x_vf_mac_vlan_config_list(bp, vf, fl,
+                                                          msg->vf_qid,
+                                                          false);
+                       if (rc)
+                               goto op_err;
+               }
+
         }
  
         if (msg->flags & VFPF_SET_Q_FILTERS_RX_MASK_CHANGED) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

index 235733e91c791b8ff04951cbdd42ec10ab435bae..1f1e54ba0ecb31ffd053161e458b4d6817cb510b 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1983,20 +1983,25 @@ static void bnxt_free_rx_skbs(struct bnxt *bp)
  
                 for (j = 0; j < max_idx; j++) {
                         struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[j];
+                       dma_addr_t mapping = rx_buf->mapping;
                         void *data = rx_buf->data;
  
                         if (!data)
                                 continue;
  
-                       dma_unmap_single(&pdev->dev, rx_buf->mapping,
-                                        bp->rx_buf_use_size, bp->rx_dir);
-
                         rx_buf->data = NULL;
  
-                       if (BNXT_RX_PAGE_MODE(bp))
+                       if (BNXT_RX_PAGE_MODE(bp)) {
+                               mapping -= bp->rx_dma_offset;
+                               dma_unmap_page(&pdev->dev, mapping,
+                                              PAGE_SIZE, bp->rx_dir);
                                 __free_page(data);
-                       else
+                       } else {
+                               dma_unmap_single(&pdev->dev, mapping,
+                                                bp->rx_buf_use_size,
+                                                bp->rx_dir);
                                 kfree(data);
+                       }
                 }
  
                 for (j = 0; j < max_agg_idx; j++) {
@@ -2455,6 +2460,18 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr)
         return 0;
  }
  
+static void bnxt_init_cp_rings(struct bnxt *bp)
+{
+       int i;
+
+       for (i = 0; i < bp->cp_nr_rings; i++) {
+               struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+               struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
+
+               ring->fw_ring_id = INVALID_HW_RING_ID;
+       }
+}
+
  static int bnxt_init_rx_rings(struct bnxt *bp)
  {
         int i, rc = 0;
@@ -4465,6 +4482,10 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
                 vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
         }
  #endif
+       if (BNXT_PF(bp) && (le16_to_cpu(resp->flags) &
+                           FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED))
+               bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
+
         switch (resp->port_partition_type) {
         case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
         case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5:
@@ -4728,7 +4749,7 @@ static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa)
                 rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags);
                 if (rc) {
                         netdev_err(bp->dev, "hwrm vnic set tpa failure rc for vnic %d: %x\n",
-                                  rc, i);
+                                  i, rc);
                         return rc;
                 }
         }
@@ -5002,6 +5023,7 @@ static int bnxt_shutdown_nic(struct bnxt *bp, bool irq_re_init)
  
  static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init)
  {
+       bnxt_init_cp_rings(bp);
         bnxt_init_rx_rings(bp);
         bnxt_init_tx_rings(bp);
         bnxt_init_ring_grps(bp, irq_re_init);
@@ -5507,8 +5529,9 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
                 bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
                                  PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK;
         }
-       link_info->support_auto_speeds =
-               le16_to_cpu(resp->supported_speeds_auto_mode);
+       if (resp->supported_speeds_auto_mode)
+               link_info->support_auto_speeds =
+                       le16_to_cpu(resp->supported_speeds_auto_mode);
  
  hwrm_phy_qcaps_exit:
         mutex_unlock(&bp->hwrm_cmd_lock);
@@ -6495,8 +6518,14 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent)
         if (!silent)
                 bnxt_dbg_dump_states(bp);
         if (netif_running(bp->dev)) {
+               int rc;
+
+               if (!silent)
+                       bnxt_ulp_stop(bp);
                 bnxt_close_nic(bp, false, false);
-               bnxt_open_nic(bp, false, false);
+               rc = bnxt_open_nic(bp, false, false);
+               if (!silent && !rc)
+                       bnxt_ulp_start(bp);
         }
  }
  
@@ -7444,6 +7473,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
         if (rc)
                 goto init_err_pci_clean;
  
+       rc = bnxt_hwrm_func_reset(bp);
+       if (rc)
+               goto init_err_pci_clean;
+
         bnxt_hwrm_fw_set_time(bp);
  
         dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
@@ -7554,10 +7587,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
         if (rc)
                 goto init_err_pci_clean;
  
-       rc = bnxt_hwrm_func_reset(bp);
-       if (rc)
-               goto init_err_pci_clean;
-
         rc = bnxt_init_int_mode(bp);
         if (rc)
                 goto init_err_pci_clean;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h

index faf26a2f726b808792fd837437bf7abb9279a8c7..c7a5b84a5cb20ecb1112f831d868238f9cead76b 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -993,6 +993,7 @@ struct bnxt {
                                          BNXT_FLAG_ROCEV2_CAP)
         #define BNXT_FLAG_NO_AGG_RINGS  0x20000
         #define BNXT_FLAG_RX_PAGE_MODE  0x40000
+       #define BNXT_FLAG_FW_LLDP_AGENT 0x80000
         #define BNXT_FLAG_CHIP_NITRO_A0 0x1000000
  
         #define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |             \
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c

index fdf2d8caf7bfaae56b4b39c415feaa343f29d579..03532061d211b168d1bd7774d2aeea9b4ed3a776 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -474,7 +474,7 @@ void bnxt_dcb_init(struct bnxt *bp)
                 return;
  
         bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE;
-       if (BNXT_PF(bp))
+       if (BNXT_PF(bp) && !(bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
                 bp->dcbx_cap |= DCB_CAP_DCBX_HOST;
         else
                 bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c

index f92896835d2a4ceb18e7d69fdc53ce7730402667..365895ed3c3e240584da21fad2caebc6384482d5 100644 (file)
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1,7 +1,7 @@
  /*
   * Broadcom GENET (Gigabit Ethernet) controller driver
   *
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License version 2 as
@@ -450,6 +450,22 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
                         genet_dma_ring_regs[r]);
  }
  
+static int bcmgenet_begin(struct net_device *dev)
+{
+       struct bcmgenet_priv *priv = netdev_priv(dev);
+
+       /* Turn on the clock */
+       return clk_prepare_enable(priv->clk);
+}
+
+static void bcmgenet_complete(struct net_device *dev)
+{
+       struct bcmgenet_priv *priv = netdev_priv(dev);
+
+       /* Turn off the clock */
+       clk_disable_unprepare(priv->clk);
+}
+
  static int bcmgenet_get_link_ksettings(struct net_device *dev,
                                        struct ethtool_link_ksettings *cmd)
  {
@@ -778,8 +794,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = {
         STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes),
         /* Misc UniMAC counters */
         STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt,
-                       UMAC_RBUF_OVFL_CNT),
-       STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT),
+                       UMAC_RBUF_OVFL_CNT_V1),
+       STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt,
+                       UMAC_RBUF_ERR_CNT_V1),
         STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT),
         STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
         STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed),
@@ -821,6 +838,45 @@ static void bcmgenet_get_strings(struct net_device *dev, u32 stringset,
         }
  }
  
+static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset)
+{
+       u16 new_offset;
+       u32 val;
+
+       switch (offset) {
+       case UMAC_RBUF_OVFL_CNT_V1:
+               if (GENET_IS_V2(priv))
+                       new_offset = RBUF_OVFL_CNT_V2;
+               else
+                       new_offset = RBUF_OVFL_CNT_V3PLUS;
+
+               val = bcmgenet_rbuf_readl(priv, new_offset);
+               /* clear if overflowed */
+               if (val == ~0)
+                       bcmgenet_rbuf_writel(priv, 0, new_offset);
+               break;
+       case UMAC_RBUF_ERR_CNT_V1:
+               if (GENET_IS_V2(priv))
+                       new_offset = RBUF_ERR_CNT_V2;
+               else
+                       new_offset = RBUF_ERR_CNT_V3PLUS;
+
+               val = bcmgenet_rbuf_readl(priv, new_offset);
+               /* clear if overflowed */
+               if (val == ~0)
+                       bcmgenet_rbuf_writel(priv, 0, new_offset);
+               break;
+       default:
+               val = bcmgenet_umac_readl(priv, offset);
+               /* clear if overflowed */
+               if (val == ~0)
+                       bcmgenet_umac_writel(priv, 0, offset);
+               break;
+       }
+
+       return val;
+}
+
  static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
  {
         int i, j = 0;
@@ -836,19 +892,28 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
                 case BCMGENET_STAT_NETDEV:
                 case BCMGENET_STAT_SOFT:
                         continue;
-               case BCMGENET_STAT_MIB_RX:
-               case BCMGENET_STAT_MIB_TX:
                 case BCMGENET_STAT_RUNT:
-                       if (s->type != BCMGENET_STAT_MIB_RX)
-                               offset = BCMGENET_STAT_OFFSET;
+                       offset += BCMGENET_STAT_OFFSET;
+                       /* fall through */
+               case BCMGENET_STAT_MIB_TX:
+                       offset += BCMGENET_STAT_OFFSET;
+                       /* fall through */
+               case BCMGENET_STAT_MIB_RX:
                         val = bcmgenet_umac_readl(priv,
                                                   UMAC_MIB_START + j + offset);
+                       offset = 0;     /* Reset Offset */
                         break;
                 case BCMGENET_STAT_MISC:
-                       val = bcmgenet_umac_readl(priv, s->reg_offset);
-                       /* clear if overflowed */
-                       if (val == ~0)
-                               bcmgenet_umac_writel(priv, 0, s->reg_offset);
+                       if (GENET_IS_V1(priv)) {
+                               val = bcmgenet_umac_readl(priv, s->reg_offset);
+                               /* clear if overflowed */
+                               if (val == ~0)
+                                       bcmgenet_umac_writel(priv, 0,
+                                                            s->reg_offset);
+                       } else {
+                               val = bcmgenet_update_stat_misc(priv,
+                                                               s->reg_offset);
+                       }
                         break;
                 }
  
@@ -973,6 +1038,8 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
  
  /* standard ethtool support functions. */
  static const struct ethtool_ops bcmgenet_ethtool_ops = {
+       .begin                  = bcmgenet_begin,
+       .complete               = bcmgenet_complete,
         .get_strings            = bcmgenet_get_strings,
         .get_sset_count         = bcmgenet_get_sset_count,
         .get_ethtool_stats      = bcmgenet_get_ethtool_stats,
@@ -1167,7 +1234,6 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
         struct bcmgenet_priv *priv = netdev_priv(dev);
         struct device *kdev = &priv->pdev->dev;
         struct enet_cb *tx_cb_ptr;
-       struct netdev_queue *txq;
         unsigned int pkts_compl = 0;
         unsigned int bytes_compl = 0;
         unsigned int c_index;
@@ -1219,13 +1285,8 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
         dev->stats.tx_packets += pkts_compl;
         dev->stats.tx_bytes += bytes_compl;
  
-       txq = netdev_get_tx_queue(dev, ring->queue);
-       netdev_tx_completed_queue(txq, pkts_compl, bytes_compl);
-
-       if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
-               if (netif_tx_queue_stopped(txq))
-                       netif_tx_wake_queue(txq);
-       }
+       netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->queue),
+                                 pkts_compl, bytes_compl);
  
         return pkts_compl;
  }
@@ -1248,8 +1309,16 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget)
         struct bcmgenet_tx_ring *ring =
                 container_of(napi, struct bcmgenet_tx_ring, napi);
         unsigned int work_done = 0;
+       struct netdev_queue *txq;
+       unsigned long flags;
  
-       work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring);
+       spin_lock_irqsave(&ring->lock, flags);
+       work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring);
+       if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
+               txq = netdev_get_tx_queue(ring->priv->dev, ring->queue);
+               netif_tx_wake_queue(txq);
+       }
+       spin_unlock_irqrestore(&ring->lock, flags);
  
         if (work_done == 0) {
                 napi_complete(napi);
@@ -2457,24 +2526,28 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
  /* Interrupt bottom half */
  static void bcmgenet_irq_task(struct work_struct *work)
  {
+       unsigned long flags;
+       unsigned int status;
         struct bcmgenet_priv *priv = container_of(
                         work, struct bcmgenet_priv, bcmgenet_irq_work);
  
         netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
  
-       if (priv->irq0_stat & UMAC_IRQ_MPD_R) {
-               priv->irq0_stat &= ~UMAC_IRQ_MPD_R;
+       spin_lock_irqsave(&priv->lock, flags);
+       status = priv->irq0_stat;
+       priv->irq0_stat = 0;
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       if (status & UMAC_IRQ_MPD_R) {
                 netif_dbg(priv, wol, priv->dev,
                           "magic packet detected, waking up\n");
                 bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
         }
  
         /* Link UP/DOWN event */
-       if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
+       if (status & UMAC_IRQ_LINK_EVENT)
                 phy_mac_interrupt(priv->phydev,
-                                 !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
-               priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
-       }
+                                 !!(status & UMAC_IRQ_LINK_UP));
  }
  
  /* bcmgenet_isr1: handle Rx and Tx priority queues */
@@ -2483,22 +2556,21 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
         struct bcmgenet_priv *priv = dev_id;
         struct bcmgenet_rx_ring *rx_ring;
         struct bcmgenet_tx_ring *tx_ring;
-       unsigned int index;
+       unsigned int index, status;
  
-       /* Save irq status for bottom-half processing. */
-       priv->irq1_stat =
-               bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
+       /* Read irq status */
+       status = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
                 ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
  
         /* clear interrupts */
-       bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
+       bcmgenet_intrl2_1_writel(priv, status, INTRL2_CPU_CLEAR);
  
         netif_dbg(priv, intr, priv->dev,
-                 "%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
+                 "%s: IRQ=0x%x\n", __func__, status);
  
         /* Check Rx priority queue interrupts */
         for (index = 0; index < priv->hw_params->rx_queues; index++) {
-               if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
+               if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
                         continue;
  
                 rx_ring = &priv->rx_rings[index];
@@ -2511,7 +2583,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
  
         /* Check Tx priority queue interrupts */
         for (index = 0; index < priv->hw_params->tx_queues; index++) {
-               if (!(priv->irq1_stat & BIT(index)))
+               if (!(status & BIT(index)))
                         continue;
  
                 tx_ring = &priv->tx_rings[index];
@@ -2531,19 +2603,20 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
         struct bcmgenet_priv *priv = dev_id;
         struct bcmgenet_rx_ring *rx_ring;
         struct bcmgenet_tx_ring *tx_ring;
+       unsigned int status;
+       unsigned long flags;
  
-       /* Save irq status for bottom-half processing. */
-       priv->irq0_stat =
-               bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
+       /* Read irq status */
+       status = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
                 ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
  
         /* clear interrupts */
-       bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR);
+       bcmgenet_intrl2_0_writel(priv, status, INTRL2_CPU_CLEAR);
  
         netif_dbg(priv, intr, priv->dev,
-                 "IRQ=0x%x\n", priv->irq0_stat);
+                 "IRQ=0x%x\n", status);
  
-       if (priv->irq0_stat & UMAC_IRQ_RXDMA_DONE) {
+       if (status & UMAC_IRQ_RXDMA_DONE) {
                 rx_ring = &priv->rx_rings[DESC_INDEX];
  
                 if (likely(napi_schedule_prep(&rx_ring->napi))) {
@@ -2552,7 +2625,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
                 }
         }
  
-       if (priv->irq0_stat & UMAC_IRQ_TXDMA_DONE) {
+       if (status & UMAC_IRQ_TXDMA_DONE) {
                 tx_ring = &priv->tx_rings[DESC_INDEX];
  
                 if (likely(napi_schedule_prep(&tx_ring->napi))) {
@@ -2561,22 +2634,23 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
                 }
         }
  
-       if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
-                               UMAC_IRQ_PHY_DET_F |
-                               UMAC_IRQ_LINK_EVENT |
-                               UMAC_IRQ_HFB_SM |
-                               UMAC_IRQ_HFB_MM |
-                               UMAC_IRQ_MPD_R)) {
-               /* all other interested interrupts handled in bottom half */
-               schedule_work(&priv->bcmgenet_irq_work);
-       }
-
         if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
-           priv->irq0_stat & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
-               priv->irq0_stat &= ~(UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR);
+               status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
                 wake_up(&priv->wq);
         }
  
+       /* all other interested interrupts handled in bottom half */
+       status &= (UMAC_IRQ_LINK_EVENT |
+                  UMAC_IRQ_MPD_R);
+       if (status) {
+               /* Save irq status for bottom-half processing. */
+               spin_lock_irqsave(&priv->lock, flags);
+               priv->irq0_stat |= status;
+               spin_unlock_irqrestore(&priv->lock, flags);
+
+               schedule_work(&priv->bcmgenet_irq_work);
+       }
+
         return IRQ_HANDLED;
  }
  
@@ -2801,6 +2875,8 @@ err_irq0:
  err_fini_dma:
         bcmgenet_fini_dma(priv);
  err_clk_disable:
+       if (priv->internal_phy)
+               bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
         clk_disable_unprepare(priv->clk);
         return ret;
  }
@@ -3177,6 +3253,12 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
          */
         gphy_rev = reg & 0xffff;
  
+       /* This is reserved so should require special treatment */
+       if (gphy_rev == 0 || gphy_rev == 0x01ff) {
+               pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
+               return;
+       }
+
         /* This is the good old scheme, just GPHY major, no minor nor patch */
         if ((gphy_rev & 0xf0) != 0)
                 priv->gphy_rev = gphy_rev << 8;
@@ -3185,12 +3267,6 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
         else if ((gphy_rev & 0xff00) != 0)
                 priv->gphy_rev = gphy_rev;
  
-       /* This is reserved so should require special treatment */
-       else if (gphy_rev == 0 || gphy_rev == 0x01ff) {
-               pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
-               return;
-       }
-
  #ifdef CONFIG_PHYS_ADDR_T_64BIT
         if (!(params->flags & GENET_HAS_40BITS))
                 pr_warn("GENET does not support 40-bits PA\n");
@@ -3233,6 +3309,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
         const void *macaddr;
         struct resource *r;
         int err = -EIO;
+       const char *phy_mode_str;
  
         /* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */
         dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1,
@@ -3276,6 +3353,8 @@ static int bcmgenet_probe(struct platform_device *pdev)
                 goto err;
         }
  
+       spin_lock_init(&priv->lock);
+
         SET_NETDEV_DEV(dev, &pdev->dev);
         dev_set_drvdata(&pdev->dev, dev);
         ether_addr_copy(dev->dev_addr, macaddr);
@@ -3338,6 +3417,13 @@ static int bcmgenet_probe(struct platform_device *pdev)
                 priv->clk_eee = NULL;
         }
  
+       /* If this is an internal GPHY, power it on now, before UniMAC is
+        * brought out of reset as absolutely no UniMAC activity is allowed
+        */
+       if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) &&
+           !strcasecmp(phy_mode_str, "internal"))
+               bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+
         err = reset_umac(priv);
         if (err)
                 goto err_clk_disable;
@@ -3395,7 +3481,8 @@ static int bcmgenet_suspend(struct device *d)
  
         bcmgenet_netif_stop(dev);
  
-       phy_suspend(priv->phydev);
+       if (!device_may_wakeup(d))
+               phy_suspend(priv->phydev);
  
         netif_device_detach(dev);
  
@@ -3492,7 +3579,8 @@ static int bcmgenet_resume(struct device *d)
  
         netif_device_attach(dev);
  
-       phy_resume(priv->phydev);
+       if (!device_may_wakeup(d))
+               phy_resume(priv->phydev);
  
         if (priv->eee.eee_enabled)
                 bcmgenet_eee_enable_set(dev, true);
@@ -3502,6 +3590,8 @@ static int bcmgenet_resume(struct device *d)
         return 0;
  
  out_clk_disable:
+       if (priv->internal_phy)
+               bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
         clk_disable_unprepare(priv->clk);
         return ret;
  }
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h

index 1e2dc34d331a49e05a8fc9a66156dfeeb00ee10f..db7f289d65ae2abd1589446ee0cadc00ffbf0254 100644 (file)
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License version 2 as
@@ -214,7 +214,9 @@ struct bcmgenet_mib_counters {
  #define  MDIO_REG_SHIFT                        16
  #define  MDIO_REG_MASK                 0x1F
  
-#define UMAC_RBUF_OVFL_CNT             0x61C
+#define UMAC_RBUF_OVFL_CNT_V1          0x61C
+#define RBUF_OVFL_CNT_V2               0x80
+#define RBUF_OVFL_CNT_V3PLUS           0x94
  
  #define UMAC_MPD_CTRL                  0x620
  #define  MPD_EN                                (1 << 0)
@@ -224,7 +226,9 @@ struct bcmgenet_mib_counters {
  
  #define UMAC_MPD_PW_MS                 0x624
  #define UMAC_MPD_PW_LS                 0x628
-#define UMAC_RBUF_ERR_CNT              0x634
+#define UMAC_RBUF_ERR_CNT_V1           0x634
+#define RBUF_ERR_CNT_V2                        0x84
+#define RBUF_ERR_CNT_V3PLUS            0x98
  #define UMAC_MDF_ERR_CNT               0x638
  #define UMAC_MDF_CTRL                  0x650
  #define UMAC_MDF_ADDR                  0x654
@@ -619,11 +623,13 @@ struct bcmgenet_priv {
         struct work_struct bcmgenet_irq_work;
         int irq0;
         int irq1;
-       unsigned int irq0_stat;
-       unsigned int irq1_stat;
         int wol_irq;
         bool wol_irq_disabled;
  
+       /* shared status */
+       spinlock_t lock;
+       unsigned int irq0_stat;
+
         /* HW descriptors/checksum variables */
         bool desc_64b_en;
         bool desc_rxchk_en;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c

index e87607621e62a076104d67046a10603305d66ecf..2f9281936f0e434a328e1c716a29a76be2d9090c 100644 (file)
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -220,20 +220,6 @@ void bcmgenet_phy_power_set(struct net_device *dev, bool enable)
         udelay(60);
  }
  
-static void bcmgenet_internal_phy_setup(struct net_device *dev)
-{
-       struct bcmgenet_priv *priv = netdev_priv(dev);
-       u32 reg;
-
-       /* Power up PHY */
-       bcmgenet_phy_power_set(dev, true);
-       /* enable APD */
-       reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
-       reg |= EXT_PWR_DN_EN_LD;
-       bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
-       bcmgenet_mii_reset(dev);
-}
-
  static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
  {
         u32 reg;
@@ -281,7 +267,6 @@ int bcmgenet_mii_config(struct net_device *dev)
  
                 if (priv->internal_phy) {
                         phy_name = "internal PHY";
-                       bcmgenet_internal_phy_setup(dev);
                 } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
                         phy_name = "MoCA";
                         bcmgenet_moca_phy_setup(priv);
diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c

index 9e59663a6eadb012de6f4a4474484800401fce3b..0f6811860ad51de9b871e806f3f254a1abfcf2eb 100644 (file)
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
@@ -1930,13 +1930,13 @@ static void
  bfa_ioc_send_enable(struct bfa_ioc *ioc)
  {
         struct bfi_ioc_ctrl_req enable_req;
-       struct timeval tv;
  
         bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ,
                     bfa_ioc_portid(ioc));
         enable_req.clscode = htons(ioc->clscode);
-       do_gettimeofday(&tv);
-       enable_req.tv_sec = ntohl(tv.tv_sec);
+       enable_req.rsvd = htons(0);
+       /* overflow in 2106 */
+       enable_req.tv_sec = ntohl(ktime_get_real_seconds());
         bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req));
  }
  
@@ -1947,6 +1947,10 @@ bfa_ioc_send_disable(struct bfa_ioc *ioc)
  
         bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ,
                     bfa_ioc_portid(ioc));
+       disable_req.clscode = htons(ioc->clscode);
+       disable_req.rsvd = htons(0);
+       /* overflow in 2106 */
+       disable_req.tv_sec = ntohl(ktime_get_real_seconds());
         bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req));
  }
  
diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c

index 05c1c1dd7751bd720fac026876c7fcf7392eca03..cebfe3bd086e36f60f717579f03037058b1d1d9e 100644 (file)
--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
@@ -325,7 +325,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf,
                 return PTR_ERR(kern_buf);
  
         rc = sscanf(kern_buf, "%x:%x", &addr, &len);
-       if (rc < 2) {
+       if (rc < 2 || len > UINT_MAX >> 2) {
                 netdev_warn(bnad->netdev, "failed to read user buffer\n");
                 kfree(kern_buf);
                 return -EINVAL;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c

index be9c0e3f5ade7d4e61694da214702f0223ab5d59..92f46b1375c32527b29e24a4476d6b455835bd46 100644 (file)
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -152,7 +152,7 @@ struct octnic_gather {
          */
         struct octeon_sg_entry *sg;
  
-       u64 sg_dma_ptr;
+       dma_addr_t sg_dma_ptr;
  };
  
  struct handshake {
@@ -734,6 +734,9 @@ static void delete_glists(struct lio *lio)
         struct octnic_gather *g;
         int i;
  
+       kfree(lio->glist_lock);
+       lio->glist_lock = NULL;
+
         if (!lio->glist)
                 return;
  
@@ -741,23 +744,26 @@ static void delete_glists(struct lio *lio)
                 do {
                         g = (struct octnic_gather *)
                                 list_delete_head(&lio->glist[i]);
-                       if (g) {
-                               if (g->sg) {
-                                       dma_unmap_single(&lio->oct_dev->
-                                                        pci_dev->dev,
-                                                        g->sg_dma_ptr,
-                                                        g->sg_size,
-                                                        DMA_TO_DEVICE);
-                                       kfree((void *)((unsigned long)g->sg -
-                                                      g->adjust));
-                               }
+                       if (g)
                                 kfree(g);
-                       }
                 } while (g);
+
+               if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+                       lio_dma_free(lio->oct_dev,
+                                    lio->glist_entry_size * lio->tx_qsize,
+                                    lio->glists_virt_base[i],
+                                    lio->glists_dma_base[i]);
+               }
         }
  
-       kfree((void *)lio->glist);
-       kfree((void *)lio->glist_lock);
+       kfree(lio->glists_virt_base);
+       lio->glists_virt_base = NULL;
+
+       kfree(lio->glists_dma_base);
+       lio->glists_dma_base = NULL;
+
+       kfree(lio->glist);
+       lio->glist = NULL;
  }
  
  /**
@@ -772,13 +778,30 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
         lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
                                   GFP_KERNEL);
         if (!lio->glist_lock)
-               return 1;
+               return -ENOMEM;
  
         lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
                              GFP_KERNEL);
         if (!lio->glist) {
-               kfree((void *)lio->glist_lock);
-               return 1;
+               kfree(lio->glist_lock);
+               lio->glist_lock = NULL;
+               return -ENOMEM;
+       }
+
+       lio->glist_entry_size =
+               ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+       /* allocate memory to store virtual and dma base address of
+        * per glist consistent memory
+        */
+       lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+                                       GFP_KERNEL);
+       lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+                                      GFP_KERNEL);
+
+       if (!lio->glists_virt_base || !lio->glists_dma_base) {
+               delete_glists(lio);
+               return -ENOMEM;
         }
  
         for (i = 0; i < num_iqs; i++) {
@@ -788,6 +811,16 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
  
                 INIT_LIST_HEAD(&lio->glist[i]);
  
+               lio->glists_virt_base[i] =
+                       lio_dma_alloc(oct,
+                                     lio->glist_entry_size * lio->tx_qsize,
+                                     &lio->glists_dma_base[i]);
+
+               if (!lio->glists_virt_base[i]) {
+                       delete_glists(lio);
+                       return -ENOMEM;
+               }
+
                 for (j = 0; j < lio->tx_qsize; j++) {
                         g = kzalloc_node(sizeof(*g), GFP_KERNEL,
                                          numa_node);
@@ -796,43 +829,18 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
                         if (!g)
                                 break;
  
-                       g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
-                                     OCT_SG_ENTRY_SIZE);
+                       g->sg = lio->glists_virt_base[i] +
+                               (j * lio->glist_entry_size);
  
-                       g->sg = kmalloc_node(g->sg_size + 8,
-                                            GFP_KERNEL, numa_node);
-                       if (!g->sg)
-                               g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
-                       if (!g->sg) {
-                               kfree(g);
-                               break;
-                       }
-
-                       /* The gather component should be aligned on 64-bit
-                        * boundary
-                        */
-                       if (((unsigned long)g->sg) & 7) {
-                               g->adjust = 8 - (((unsigned long)g->sg) & 7);
-                               g->sg = (struct octeon_sg_entry *)
-                                       ((unsigned long)g->sg + g->adjust);
-                       }
-                       g->sg_dma_ptr = dma_map_single(&oct->pci_dev->dev,
-                                                      g->sg, g->sg_size,
-                                                      DMA_TO_DEVICE);
-                       if (dma_mapping_error(&oct->pci_dev->dev,
-                                             g->sg_dma_ptr)) {
-                               kfree((void *)((unsigned long)g->sg -
-                                              g->adjust));
-                               kfree(g);
-                               break;
-                       }
+                       g->sg_dma_ptr = lio->glists_dma_base[i] +
+                                       (j * lio->glist_entry_size);
  
                         list_add_tail(&g->list, &lio->glist[i]);
                 }
  
                 if (j != lio->tx_qsize) {
                         delete_glists(lio);
-                       return 1;
+                       return -ENOMEM;
                 }
         }
  
@@ -1885,9 +1893,6 @@ static void free_netsgbuf(void *buf)
                 i++;
         }
  
-       dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
-                               g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
         iq = skb_iq(lio, skb);
         spin_lock(&lio->glist_lock[iq]);
         list_add_tail(&g->list, &lio->glist[iq]);
@@ -1933,9 +1938,6 @@ static void free_netsgbuf_with_resp(void *buf)
                 i++;
         }
  
-       dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
-                               g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
         iq = skb_iq(lio, skb);
  
         spin_lock(&lio->glist_lock[iq]);
@@ -3273,8 +3275,6 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
                         i++;
                 }
  
-               dma_sync_single_for_device(&oct->pci_dev->dev, g->sg_dma_ptr,
-                                          g->sg_size, DMA_TO_DEVICE);
                 dptr = g->sg_dma_ptr;
  
                 if (OCTEON_CN23XX_PF(oct))
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c

index 9d5e03502c76cbfe3c8372a5d3e73c67e07e3a03..7b83be4ce1fe0ce5cab0c7ff889edbf334a5a065 100644 (file)
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -108,6 +108,8 @@ struct octnic_gather {
          * received from the IP layer.
          */
         struct octeon_sg_entry *sg;
+
+       dma_addr_t sg_dma_ptr;
  };
  
  struct octeon_device_priv {
@@ -490,6 +492,9 @@ static void delete_glists(struct lio *lio)
         struct octnic_gather *g;
         int i;
  
+       kfree(lio->glist_lock);
+       lio->glist_lock = NULL;
+
         if (!lio->glist)
                 return;
  
@@ -497,17 +502,26 @@ static void delete_glists(struct lio *lio)
                 do {
                         g = (struct octnic_gather *)
                             list_delete_head(&lio->glist[i]);
-                       if (g) {
-                               if (g->sg)
-                                       kfree((void *)((unsigned long)g->sg -
-                                                       g->adjust));
+                       if (g)
                                 kfree(g);
-                       }
                 } while (g);
+
+               if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+                       lio_dma_free(lio->oct_dev,
+                                    lio->glist_entry_size * lio->tx_qsize,
+                                    lio->glists_virt_base[i],
+                                    lio->glists_dma_base[i]);
+               }
         }
  
+       kfree(lio->glists_virt_base);
+       lio->glists_virt_base = NULL;
+
+       kfree(lio->glists_dma_base);
+       lio->glists_dma_base = NULL;
+
         kfree(lio->glist);
-       kfree(lio->glist_lock);
+       lio->glist = NULL;
  }
  
  /**
@@ -522,13 +536,30 @@ static int setup_glists(struct lio *lio, int num_iqs)
         lio->glist_lock =
             kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
         if (!lio->glist_lock)
-               return 1;
+               return -ENOMEM;
  
         lio->glist =
             kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
         if (!lio->glist) {
                 kfree(lio->glist_lock);
-               return 1;
+               lio->glist_lock = NULL;
+               return -ENOMEM;
+       }
+
+       lio->glist_entry_size =
+               ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+       /* allocate memory to store virtual and dma base address of
+        * per glist consistent memory
+        */
+       lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+                                       GFP_KERNEL);
+       lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+                                      GFP_KERNEL);
+
+       if (!lio->glists_virt_base || !lio->glists_dma_base) {
+               delete_glists(lio);
+               return -ENOMEM;
         }
  
         for (i = 0; i < num_iqs; i++) {
@@ -536,34 +567,33 @@ static int setup_glists(struct lio *lio, int num_iqs)
  
                 INIT_LIST_HEAD(&lio->glist[i]);
  
+               lio->glists_virt_base[i] =
+                       lio_dma_alloc(lio->oct_dev,
+                                     lio->glist_entry_size * lio->tx_qsize,
+                                     &lio->glists_dma_base[i]);
+
+               if (!lio->glists_virt_base[i]) {
+                       delete_glists(lio);
+                       return -ENOMEM;
+               }
+
                 for (j = 0; j < lio->tx_qsize; j++) {
                         g = kzalloc(sizeof(*g), GFP_KERNEL);
                         if (!g)
                                 break;
  
-                       g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
-                                     OCT_SG_ENTRY_SIZE);
+                       g->sg = lio->glists_virt_base[i] +
+                               (j * lio->glist_entry_size);
  
-                       g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
-                       if (!g->sg) {
-                               kfree(g);
-                               break;
-                       }
+                       g->sg_dma_ptr = lio->glists_dma_base[i] +
+                                       (j * lio->glist_entry_size);
  
-                       /* The gather component should be aligned on 64-bit
-                        * boundary
-                        */
-                       if (((unsigned long)g->sg) & 7) {
-                               g->adjust = 8 - (((unsigned long)g->sg) & 7);
-                               g->sg = (struct octeon_sg_entry *)
-                                       ((unsigned long)g->sg + g->adjust);
-                       }
                         list_add_tail(&g->list, &lio->glist[i]);
                 }
  
                 if (j != lio->tx_qsize) {
                         delete_glists(lio);
-                       return 1;
+                       return -ENOMEM;
                 }
         }
  
@@ -1324,10 +1354,6 @@ static void free_netsgbuf(void *buf)
                 i++;
         }
  
-       dma_unmap_single(&lio->oct_dev->pci_dev->dev,
-                        finfo->dptr, g->sg_size,
-                        DMA_TO_DEVICE);
-
         iq = skb_iq(lio, skb);
  
         spin_lock(&lio->glist_lock[iq]);
@@ -1374,10 +1400,6 @@ static void free_netsgbuf_with_resp(void *buf)
                 i++;
         }
  
-       dma_unmap_single(&lio->oct_dev->pci_dev->dev,
-                        finfo->dptr, g->sg_size,
-                        DMA_TO_DEVICE);
-
         iq = skb_iq(lio, skb);
  
         spin_lock(&lio->glist_lock[iq]);
@@ -2382,23 +2404,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
                         i++;
                 }
  
-               dptr = dma_map_single(&oct->pci_dev->dev,
-                                     g->sg, g->sg_size,
-                                     DMA_TO_DEVICE);
-               if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
-                       dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n",
-                               __func__);
-                       dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
-                                        skb->len - skb->data_len,
-                                        DMA_TO_DEVICE);
-                       for (j = 1; j <= frags; j++) {
-                               frag = &skb_shinfo(skb)->frags[j - 1];
-                               dma_unmap_page(&oct->pci_dev->dev,
-                                              g->sg[j >> 2].ptr[j & 3],
-                                              frag->size, DMA_TO_DEVICE);
-                       }
-                       return NETDEV_TX_BUSY;
-               }
+               dptr = g->sg_dma_ptr;
  
                 ndata.cmd.cmd3.dptr = dptr;
                 finfo->dptr = dptr;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h

index b3dc2e9651a8e205d7e6e451109f98e96065de2c..d29ebc531151f0fe85cb83826d3af9b069d75f52 100644 (file)
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
@@ -71,17 +71,17 @@
  #define   CN23XX_MAX_RINGS_PER_VF          8
  
  #define   CN23XX_MAX_INPUT_QUEUES      CN23XX_MAX_RINGS_PER_PF
-#define   CN23XX_MAX_IQ_DESCRIPTORS    2048
+#define   CN23XX_MAX_IQ_DESCRIPTORS    512
  #define   CN23XX_DB_MIN                 1
  #define   CN23XX_DB_MAX                 8
  #define   CN23XX_DB_TIMEOUT             1
  
  #define   CN23XX_MAX_OUTPUT_QUEUES     CN23XX_MAX_RINGS_PER_PF
-#define   CN23XX_MAX_OQ_DESCRIPTORS    2048
+#define   CN23XX_MAX_OQ_DESCRIPTORS    512
  #define   CN23XX_OQ_BUF_SIZE           1536
  #define   CN23XX_OQ_PKTSPER_INTR       128
  /*#define CAVIUM_ONLY_CN23XX_RX_PERF*/
-#define   CN23XX_OQ_REFIL_THRESHOLD    128
+#define   CN23XX_OQ_REFIL_THRESHOLD    16
  
  #define   CN23XX_OQ_INTR_PKT           64
  #define   CN23XX_OQ_INTR_TIME          100
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c

index 0be87d119a979ea70117e13b2213987460da2a81..79f809479af6e7d865cc7c280c84232622af982e 100644 (file)
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -155,11 +155,6 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct,
                         recv_buffer_destroy(droq->recv_buf_list[i].buffer,
                                             pg_info);
  
-               if (droq->desc_ring && droq->desc_ring[i].info_ptr)
-                       lio_unmap_ring_info(oct->pci_dev,
-                                           (u64)droq->
-                                           desc_ring[i].info_ptr,
-                                           OCT_DROQ_INFO_SIZE);
                 droq->recv_buf_list[i].buffer = NULL;
         }
  
@@ -211,10 +206,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no)
         vfree(droq->recv_buf_list);
  
         if (droq->info_base_addr)
-               cnnic_free_aligned_dma(oct->pci_dev, droq->info_list,
-                                      droq->info_alloc_size,
-                                      droq->info_base_addr,
-                                      droq->info_list_dma);
+               lio_free_info_buffer(oct, droq);
  
         if (droq->desc_ring)
                 lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
@@ -294,12 +286,7 @@ int octeon_init_droq(struct octeon_device *oct,
         dev_dbg(&oct->pci_dev->dev, "droq[%d]: num_desc: %d\n", q_no,
                 droq->max_count);
  
-       droq->info_list =
-               cnnic_numa_alloc_aligned_dma((droq->max_count *
-                                             OCT_DROQ_INFO_SIZE),
-                                            &droq->info_alloc_size,
-                                            &droq->info_base_addr,
-                                            numa_node);
+       droq->info_list = lio_alloc_info_buffer(oct, droq);
         if (!droq->info_list) {
                 dev_err(&oct->pci_dev->dev, "Cannot allocate memory for info list.\n");
                 lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h

index e62074090681d3597f973f54fadb2133b21b2931..6982c0af5eccb7129123fcbb4ba8363bb7f9710a 100644 (file)
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -325,10 +325,10 @@ struct octeon_droq {
         size_t desc_ring_dma;
  
         /** Info ptr list are allocated at this virtual address. */
-       size_t info_base_addr;
+       void *info_base_addr;
  
         /** DMA mapped address of the info list */
-       size_t info_list_dma;
+       dma_addr_t info_list_dma;
  
         /** Allocated size of info list. */
         u32 info_alloc_size;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h

index aa36e9ae7676556e562a8bb4c9cb46aacc709890..bed9ef17bc26b4526cf3c57dcb4823cdc2586491 100644 (file)
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -140,48 +140,6 @@ err_release_region:
         return 1;
  }
  
-static inline void *
-cnnic_numa_alloc_aligned_dma(u32 size,
-                            u32 *alloc_size,
-                            size_t *orig_ptr,
-                            int numa_node)
-{
-       int retries = 0;
-       void *ptr = NULL;
-
-#define OCTEON_MAX_ALLOC_RETRIES     1
-       do {
-               struct page *page = NULL;
-
-               page = alloc_pages_node(numa_node,
-                                       GFP_KERNEL,
-                                       get_order(size));
-               if (!page)
-                       page = alloc_pages(GFP_KERNEL,
-                                          get_order(size));
-               ptr = (void *)page_address(page);
-               if ((unsigned long)ptr & 0x07) {
-                       __free_pages(page, get_order(size));
-                       ptr = NULL;
-                       /* Increment the size required if the first
-                        * attempt failed.
-                        */
-                       if (!retries)
-                               size += 7;
-               }
-               retries++;
-       } while ((retries <= OCTEON_MAX_ALLOC_RETRIES) && !ptr);
-
-       *alloc_size = size;
-       *orig_ptr = (unsigned long)ptr;
-       if ((unsigned long)ptr & 0x07)
-               ptr = (void *)(((unsigned long)ptr + 7) & ~(7UL));
-       return ptr;
-}
-
-#define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \
-               free_pages(orig_ptr, get_order(size))
-
  static inline int
  sleep_cond(wait_queue_head_t *wait_queue, int *condition)
  {
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h

index 6bb89419006eb5635cc65c415ad6183f5296f6d3..eef2a1e8a7e3f96b26f004ec0eec93e447a5d61f 100644 (file)
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -62,6 +62,9 @@ struct lio {
  
         /** Array of gather component linked lists */
         struct list_head *glist;
+       void **glists_virt_base;
+       dma_addr_t *glists_dma_base;
+       u32 glist_entry_size;
  
         /** Pointer to the NIC properties for the Octeon device this network
          *  interface is associated with.
@@ -344,6 +347,29 @@ static inline void tx_buffer_free(void *buffer)
  #define lio_dma_free(oct, size, virt_addr, dma_addr) \
         dma_free_coherent(&(oct)->pci_dev->dev, size, virt_addr, dma_addr)
  
+static inline void *
+lio_alloc_info_buffer(struct octeon_device *oct,
+                     struct octeon_droq *droq)
+{
+       void *virt_ptr;
+
+       virt_ptr = lio_dma_alloc(oct, (droq->max_count * OCT_DROQ_INFO_SIZE),
+                                &droq->info_list_dma);
+       if (virt_ptr) {
+               droq->info_alloc_size = droq->max_count * OCT_DROQ_INFO_SIZE;
+               droq->info_base_addr = virt_ptr;
+       }
+
+       return virt_ptr;
+}
+
+static inline void lio_free_info_buffer(struct octeon_device *oct,
+                                       struct octeon_droq *droq)
+{
+       lio_dma_free(oct, droq->info_alloc_size, droq->info_base_addr,
+                    droq->info_list_dma);
+}
+
  static inline
  void *get_rbd(struct sk_buff *skb)
  {
@@ -359,22 +385,7 @@ void *get_rbd(struct sk_buff *skb)
  static inline u64
  lio_map_ring_info(struct octeon_droq *droq, u32 i)
  {
-       dma_addr_t dma_addr;
-       struct octeon_device *oct = droq->oct_dev;
-
-       dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i],
-                                 OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE);
-
-       WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr));
-
-       return (u64)dma_addr;
-}
-
-static inline void
-lio_unmap_ring_info(struct pci_dev *pci_dev,
-                   u64 info_ptr, u32 size)
-{
-       dma_unmap_single(&pci_dev->dev, info_ptr, size, DMA_FROM_DEVICE);
+       return droq->info_list_dma + (i * sizeof(struct octeon_droq_info));
  }
  
  static inline u64
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h

index e739c715356283553f4ace131a251bc4b30d6de2..2269ff562d9562eede1d1d02c2e66ee3dcfed89b 100644 (file)
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -269,6 +269,7 @@ struct nicvf {
  #define        MAX_QUEUES_PER_QSET                     8
         struct queue_set        *qs;
         struct nicvf_cq_poll    *napi[8];
+       void                    *iommu_domain;
         u8                      vf_id;
         u8                      sqs_id;
         bool                    sqs_mode;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c

index 6feaa24bcfd42bb9647298a0b665e6bf3b11d496..24017588f5317107142897ca77aa0668046532d5 100644 (file)
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -16,6 +16,7 @@
  #include <linux/log2.h>
  #include <linux/prefetch.h>
  #include <linux/irq.h>
+#include <linux/iommu.h>
  
  #include "nic_reg.h"
  #include "nic.h"
@@ -525,7 +526,12 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
                         /* Get actual TSO descriptors and free them */
                         tso_sqe =
                          (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+                       nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+                                                tso_sqe->subdesc_cnt);
                         nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1);
+               } else {
+                       nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr,
+                                                hdr->subdesc_cnt);
                 }
                 nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
                 prefetch(skb);
@@ -576,6 +582,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
  {
         struct sk_buff *skb;
         struct nicvf *nic = netdev_priv(netdev);
+       struct nicvf *snic = nic;
         int err = 0;
         int rq_idx;
  
@@ -592,7 +599,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
         if (err && !cqe_rx->rb_cnt)
                 return;
  
-       skb = nicvf_get_rcv_skb(nic, cqe_rx);
+       skb = nicvf_get_rcv_skb(snic, cqe_rx);
         if (!skb) {
                 netdev_dbg(nic->netdev, "Packet not received\n");
                 return;
@@ -1643,6 +1650,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
         if (!pass1_silicon(nic->pdev))
                 nic->hw_tso = true;
  
+       /* Get iommu domain for iova to physical addr conversion */
+       nic->iommu_domain = iommu_get_domain_for_dev(dev);
+
         pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
         if (sdevid == 0xA134)
                 nic->t88 = true;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c

index ac0390be3b126e957071bde64daebdd29b536c34..f13289f0d2386d09e348b13c310346d42e20b74d 100644 (file)
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -10,6 +10,7 @@
  #include <linux/netdevice.h>
  #include <linux/ip.h>
  #include <linux/etherdevice.h>
+#include <linux/iommu.h>
  #include <net/ip.h>
  #include <net/tso.h>
  
@@ -18,6 +19,16 @@
  #include "q_struct.h"
  #include "nicvf_queues.h"
  
+#define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0)
+
+static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr)
+{
+       /* Translation is installed only when IOMMU is present */
+       if (nic->iommu_domain)
+               return iommu_iova_to_phys(nic->iommu_domain, dma_addr);
+       return dma_addr;
+}
+
  static void nicvf_get_page(struct nicvf *nic)
  {
         if (!nic->rb_pageref || !nic->rb_page)
@@ -87,7 +98,7 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
  static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
                                          u32 buf_len, u64 **rbuf)
  {
-       int order = (PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0;
+       int order = NICVF_PAGE_ORDER;
  
         /* Check if request can be accomodated in previous allocated page */
         if (nic->rb_page &&
@@ -97,22 +108,27 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
         }
  
         nicvf_get_page(nic);
-       nic->rb_page = NULL;
  
         /* Allocate a new page */
+       nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
+                                  order);
         if (!nic->rb_page) {
-               nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
-                                          order);
-               if (!nic->rb_page) {
-                       this_cpu_inc(nic->pnicvf->drv_stats->
-                                    rcv_buffer_alloc_failures);
-                       return -ENOMEM;
-               }
-               nic->rb_page_offset = 0;
+               this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
+               return -ENOMEM;
         }
-
+       nic->rb_page_offset = 0;
  ret:
-       *rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset);
+       /* HW will ensure data coherency, CPU sync not required */
+       *rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
+                                               nic->rb_page_offset, buf_len,
+                                               DMA_FROM_DEVICE,
+                                               DMA_ATTR_SKIP_CPU_SYNC));
+       if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
+               if (!nic->rb_page_offset)
+                       __free_pages(nic->rb_page, order);
+               nic->rb_page = NULL;
+               return -ENOMEM;
+       }
         nic->rb_page_offset += buf_len;
  
         return 0;
@@ -158,16 +174,21 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
         rbdr->dma_size = buf_size;
         rbdr->enable = true;
         rbdr->thresh = RBDR_THRESH;
+       rbdr->head = 0;
+       rbdr->tail = 0;
  
         nic->rb_page = NULL;
         for (idx = 0; idx < ring_len; idx++) {
                 err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN,
                                              &rbuf);
-               if (err)
+               if (err) {
+                       /* To free already allocated and mapped ones */
+                       rbdr->tail = idx - 1;
                         return err;
+               }
  
                 desc = GET_RBDR_DESC(rbdr, idx);
-               desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+               desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
         }
  
         nicvf_get_page(nic);
@@ -179,7 +200,7 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
  static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
  {
         int head, tail;
-       u64 buf_addr;
+       u64 buf_addr, phys_addr;
         struct rbdr_entry_t *desc;
  
         if (!rbdr)
@@ -192,18 +213,26 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
         head = rbdr->head;
         tail = rbdr->tail;
  
-       /* Free SKBs */
+       /* Release page references */
         while (head != tail) {
                 desc = GET_RBDR_DESC(rbdr, head);
-               buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
-               put_page(virt_to_page(phys_to_virt(buf_addr)));
+               buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+               phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+               dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+                                    DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+               if (phys_addr)
+                       put_page(virt_to_page(phys_to_virt(phys_addr)));
                 head++;
                 head &= (rbdr->dmem.q_len - 1);
         }
-       /* Free SKB of tail desc */
+       /* Release buffer of tail desc */
         desc = GET_RBDR_DESC(rbdr, tail);
-       buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
-       put_page(virt_to_page(phys_to_virt(buf_addr)));
+       buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+       phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+       dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+                            DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+       if (phys_addr)
+               put_page(virt_to_page(phys_to_virt(phys_addr)));
  
         /* Free RBDR ring */
         nicvf_free_q_desc_mem(nic, &rbdr->dmem);
@@ -250,7 +279,7 @@ refill:
                         break;
  
                 desc = GET_RBDR_DESC(rbdr, tail);
-               desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+               desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
                 refill_rb_cnt--;
                 new_rb++;
         }
@@ -361,9 +390,29 @@ static int nicvf_init_snd_queue(struct nicvf *nic,
         return 0;
  }
  
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+                             int hdr_sqe, u8 subdesc_cnt)
+{
+       u8 idx;
+       struct sq_gather_subdesc *gather;
+
+       /* Unmap DMA mapped skb data buffers */
+       for (idx = 0; idx < subdesc_cnt; idx++) {
+               hdr_sqe++;
+               hdr_sqe &= (sq->dmem.q_len - 1);
+               gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
+               /* HW will ensure data coherency, CPU sync not required */
+               dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
+                                    gather->size, DMA_TO_DEVICE,
+                                    DMA_ATTR_SKIP_CPU_SYNC);
+       }
+}
+
  static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
  {
         struct sk_buff *skb;
+       struct sq_hdr_subdesc *hdr;
+       struct sq_hdr_subdesc *tso_sqe;
  
         if (!sq)
                 return;
@@ -379,8 +428,22 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
         smp_rmb();
         while (sq->head != sq->tail) {
                 skb = (struct sk_buff *)sq->skbuff[sq->head];
-               if (skb)
-                       dev_kfree_skb_any(skb);
+               if (!skb)
+                       goto next;
+               hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
+               /* Check for dummy descriptor used for HW TSO offload on 88xx */
+               if (hdr->dont_send) {
+                       /* Get actual TSO descriptors and unmap them */
+                       tso_sqe =
+                        (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+                       nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+                                                tso_sqe->subdesc_cnt);
+               } else {
+                       nicvf_unmap_sndq_buffers(nic, sq, sq->head,
+                                                hdr->subdesc_cnt);
+               }
+               dev_kfree_skb_any(skb);
+next:
                 sq->head++;
                 sq->head &= (sq->dmem.q_len - 1);
         }
@@ -559,9 +622,11 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
         nicvf_send_msg_to_pf(nic, &mbx);
  
         if (!nic->sqs_mode && (qidx == 0)) {
-               /* Enable checking L3/L4 length and TCP/UDP checksums */
+               /* Enable checking L3/L4 length and TCP/UDP checksums
+                * Also allow IPv6 pkts with zero UDP checksum.
+                */
                 nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
-                                     (BIT(24) | BIT(23) | BIT(21)));
+                                     (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
                 nicvf_config_vlan_stripping(nic, nic->netdev->features);
         }
  
@@ -882,6 +947,14 @@ static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
         return qentry;
  }
  
+/* Rollback to previous tail pointer when descriptors not used */
+static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
+                                         int qentry, int desc_cnt)
+{
+       sq->tail = qentry;
+       atomic_add(desc_cnt, &sq->free_cnt);
+}
+
  /* Free descriptor back to SQ for future use */
  void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
  {
@@ -1207,8 +1280,9 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
                         struct sk_buff *skb, u8 sq_num)
  {
         int i, size;
-       int subdesc_cnt, tso_sqe = 0;
+       int subdesc_cnt, hdr_sqe = 0;
         int qentry;
+       u64 dma_addr;
  
         subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
         if (subdesc_cnt > atomic_read(&sq->free_cnt))
@@ -1223,12 +1297,21 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
         /* Add SQ header subdesc */
         nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
                                  skb, skb->len);
-       tso_sqe = qentry;
+       hdr_sqe = qentry;
  
         /* Add SQ gather subdescs */
         qentry = nicvf_get_nxt_sqentry(sq, qentry);
         size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
-       nicvf_sq_add_gather_subdesc(sq, qentry, size, virt_to_phys(skb->data));
+       /* HW will ensure data coherency, CPU sync not required */
+       dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
+                                     offset_in_page(skb->data), size,
+                                     DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+       if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+               nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+               return 0;
+       }
+
+       nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
  
         /* Check for scattered buffer */
         if (!skb_is_nonlinear(skb))
@@ -1241,15 +1324,26 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
  
                 qentry = nicvf_get_nxt_sqentry(sq, qentry);
                 size = skb_frag_size(frag);
-               nicvf_sq_add_gather_subdesc(sq, qentry, size,
-                                           virt_to_phys(
-                                           skb_frag_address(frag)));
+               dma_addr = dma_map_page_attrs(&nic->pdev->dev,
+                                             skb_frag_page(frag),
+                                             frag->page_offset, size,
+                                             DMA_TO_DEVICE,
+                                             DMA_ATTR_SKIP_CPU_SYNC);
+               if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+                       /* Free entire chain of mapped buffers
+                        * here 'i' = frags mapped + above mapped skb->data
+                        */
+                       nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
+                       nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+                       return 0;
+               }
+               nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
         }
  
  doorbell:
         if (nic->t88 && skb_shinfo(skb)->gso_size) {
                 qentry = nicvf_get_nxt_sqentry(sq, qentry);
-               nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb);
+               nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
         }
  
         nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
@@ -1282,6 +1376,7 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
         int offset;
         u16 *rb_lens = NULL;
         u64 *rb_ptrs = NULL;
+       u64 phys_addr;
  
         rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
         /* Except 88xx pass1 on all other chips CQE_RX2_S is added to
@@ -1296,15 +1391,23 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
         else
                 rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
  
-       netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n",
-                  __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
-
         for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
                 payload_len = rb_lens[frag_num(frag)];
+               phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
+               if (!phys_addr) {
+                       if (skb)
+                               dev_kfree_skb_any(skb);
+                       return NULL;
+               }
+
                 if (!frag) {
                         /* First fragment */
+                       dma_unmap_page_attrs(&nic->pdev->dev,
+                                            *rb_ptrs - cqe_rx->align_pad,
+                                            RCV_FRAG_LEN, DMA_FROM_DEVICE,
+                                            DMA_ATTR_SKIP_CPU_SYNC);
                         skb = nicvf_rb_ptr_to_skb(nic,
-                                                 *rb_ptrs - cqe_rx->align_pad,
+                                                 phys_addr - cqe_rx->align_pad,
                                                   payload_len);
                         if (!skb)
                                 return NULL;
@@ -1312,8 +1415,11 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
                         skb_put(skb, payload_len);
                 } else {
                         /* Add fragments */
-                       page = virt_to_page(phys_to_virt(*rb_ptrs));
-                       offset = phys_to_virt(*rb_ptrs) - page_address(page);
+                       dma_unmap_page_attrs(&nic->pdev->dev, *rb_ptrs,
+                                            RCV_FRAG_LEN, DMA_FROM_DEVICE,
+                                            DMA_ATTR_SKIP_CPU_SYNC);
+                       page = virt_to_page(phys_to_virt(phys_addr));
+                       offset = phys_to_virt(phys_addr) - page_address(page);
                         skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
                                         offset, payload_len, RCV_FRAG_LEN);
                 }
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h

index 5cb84da99a2de5bc594464db8759c8359d20447f..10cb4b84625b14a0446996776689ae6733f4ccee 100644 (file)
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -87,7 +87,7 @@
  #define RCV_BUF_COUNT          (1ULL << (RBDR_SIZE + 13))
  #define MAX_RCV_BUF_COUNT      (1ULL << (RBDR_SIZE6 + 13))
  #define RBDR_THRESH            (RCV_BUF_COUNT / 2)
-#define DMA_BUFFER_LEN         2048 /* In multiples of 128bytes */
+#define DMA_BUFFER_LEN         1536 /* In multiples of 128bytes */
  #define RCV_FRAG_LEN    (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
                          SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
  
@@ -301,6 +301,8 @@ struct queue_set {
  
  #define        CQ_ERR_MASK     (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT)
  
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+                             int hdr_sqe, u8 subdesc_cnt);
  void nicvf_config_vlan_stripping(struct nicvf *nic,
                                  netdev_features_t features);
  int nicvf_set_qset_resources(struct nicvf *nic);
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c

index 4c8e8cf730bbc2ee1d488d42d9d42163d442fb75..a0ca68ce3fbb164ea6e6a2c75a36a8c1ae54172d 100644 (file)
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -123,14 +123,45 @@ static int bgx_poll_reg(struct bgx *bgx, u8 lmac, u64 reg, u64 mask, bool zero)
         return 1;
  }
  
+static int max_bgx_per_node;
+static void set_max_bgx_per_node(struct pci_dev *pdev)
+{
+       u16 sdevid;
+
+       if (max_bgx_per_node)
+               return;
+
+       pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
+       switch (sdevid) {
+       case PCI_SUBSYS_DEVID_81XX_BGX:
+       case PCI_SUBSYS_DEVID_81XX_RGX:
+               max_bgx_per_node = MAX_BGX_PER_CN81XX;
+               break;
+       case PCI_SUBSYS_DEVID_83XX_BGX:
+               max_bgx_per_node = MAX_BGX_PER_CN83XX;
+               break;
+       case PCI_SUBSYS_DEVID_88XX_BGX:
+       default:
+               max_bgx_per_node = MAX_BGX_PER_CN88XX;
+               break;
+       }
+}
+
+static struct bgx *get_bgx(int node, int bgx_idx)
+{
+       int idx = (node * max_bgx_per_node) + bgx_idx;
+
+       return bgx_vnic[idx];
+}
+
  /* Return number of BGX present in HW */
  unsigned bgx_get_map(int node)
  {
         int i;
         unsigned map = 0;
  
-       for (i = 0; i < MAX_BGX_PER_NODE; i++) {
-               if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i])
+       for (i = 0; i < max_bgx_per_node; i++) {
+               if (bgx_vnic[(node * max_bgx_per_node) + i])
                         map |= (1 << i);
         }
  
@@ -143,7 +174,7 @@ int bgx_get_lmac_count(int node, int bgx_idx)
  {
         struct bgx *bgx;
  
-       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       bgx = get_bgx(node, bgx_idx);
         if (bgx)
                 return bgx->lmac_count;
  
@@ -158,7 +189,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status)
         struct bgx *bgx;
         struct lmac *lmac;
  
-       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       bgx = get_bgx(node, bgx_idx);
         if (!bgx)
                 return;
  
@@ -172,7 +203,7 @@ EXPORT_SYMBOL(bgx_get_lmac_link_state);
  
  const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid)
  {
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       struct bgx *bgx = get_bgx(node, bgx_idx);
  
         if (bgx)
                 return bgx->lmac[lmacid].mac;
@@ -183,7 +214,7 @@ EXPORT_SYMBOL(bgx_get_lmac_mac);
  
  void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
  {
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       struct bgx *bgx = get_bgx(node, bgx_idx);
  
         if (!bgx)
                 return;
@@ -194,7 +225,7 @@ EXPORT_SYMBOL(bgx_set_lmac_mac);
  
  void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
  {
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       struct bgx *bgx = get_bgx(node, bgx_idx);
         struct lmac *lmac;
         u64 cfg;
  
@@ -217,7 +248,7 @@ EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
  void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause)
  {
         struct pfc *pfc = (struct pfc *)pause;
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       struct bgx *bgx = get_bgx(node, bgx_idx);
         struct lmac *lmac;
         u64 cfg;
  
@@ -237,7 +268,7 @@ EXPORT_SYMBOL(bgx_lmac_get_pfc);
  void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause)
  {
         struct pfc *pfc = (struct pfc *)pause;
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       struct bgx *bgx = get_bgx(node, bgx_idx);
         struct lmac *lmac;
         u64 cfg;
  
@@ -369,7 +400,7 @@ u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx)
  {
         struct bgx *bgx;
  
-       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       bgx = get_bgx(node, bgx_idx);
         if (!bgx)
                 return 0;
  
@@ -383,7 +414,7 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx)
  {
         struct bgx *bgx;
  
-       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       bgx = get_bgx(node, bgx_idx);
         if (!bgx)
                 return 0;
  
@@ -411,7 +442,7 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx,
         struct lmac *lmac;
         u64    cfg;
  
-       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       bgx = get_bgx(node, bgx_idx);
         if (!bgx)
                 return;
  
@@ -1011,12 +1042,6 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid)
                         dev_info(dev, "%s: 40G_KR4\n", (char *)str);
                 break;
         case BGX_MODE_QSGMII:
-               if ((lmacid == 0) &&
-                   (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid))
-                       return;
-               if ((lmacid == 2) &&
-                   (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid))
-                       return;
                 dev_info(dev, "%s: QSGMII\n", (char *)str);
                 break;
         case BGX_MODE_RGMII:
@@ -1334,11 +1359,13 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                 goto err_release_regions;
         }
  
+       set_max_bgx_per_node(pdev);
+
         pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid);
         if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) {
                 bgx->bgx_id = (pci_resource_start(pdev,
                         PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK;
-               bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE;
+               bgx->bgx_id += nic_get_node_id(pdev) * max_bgx_per_node;
                 bgx->max_lmac = MAX_LMAC_PER_BGX;
                 bgx_vnic[bgx->bgx_id] = bgx;
         } else {
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h

index a60f189429bb658cb5ab8383982f86ddd9090fc3..6b7fe6fdd13b9b27b4a1573e8c7b3869b17bcc19 100644 (file)
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -16,13 +16,13 @@
  /* Subsystem device IDs */
  #define PCI_SUBSYS_DEVID_88XX_BGX              0xA126
  #define PCI_SUBSYS_DEVID_81XX_BGX              0xA226
+#define PCI_SUBSYS_DEVID_81XX_RGX              0xA254
  #define PCI_SUBSYS_DEVID_83XX_BGX              0xA326
  
  #define    MAX_BGX_THUNDER                     8 /* Max 2 nodes, 4 per node */
  #define    MAX_BGX_PER_CN88XX                  2
  #define    MAX_BGX_PER_CN81XX                  3 /* 2 BGXs + 1 RGX */
  #define    MAX_BGX_PER_CN83XX                  4
-#define    MAX_BGX_PER_NODE                    4
  #define    MAX_LMAC_PER_BGX                    4
  #define    MAX_BGX_CHANS_PER_LMAC              16
  #define    MAX_DMAC_PER_LMAC                   8
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c

index 30e855004c57592f9ab6c0cea2eb73f63b59b7ca..02dd5246dfae9a99b20f2bb4b2b13185d3239a3c 100644 (file)
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -4939,8 +4939,9 @@ static int
  __be_cmd_set_logical_link_config(struct be_adapter *adapter,
                                  int link_state, int version, u8 domain)
  {
-       struct be_mcc_wrb *wrb;
         struct be_cmd_req_set_ll_link *req;
+       struct be_mcc_wrb *wrb;
+       u32 link_config = 0;
         int status;
  
         mutex_lock(&adapter->mcc_lock);
@@ -4962,10 +4963,12 @@ __be_cmd_set_logical_link_config(struct be_adapter *adapter,
  
         if (link_state == IFLA_VF_LINK_STATE_ENABLE ||
             link_state == IFLA_VF_LINK_STATE_AUTO)
-               req->link_config |= PLINK_ENABLE;
+               link_config |= PLINK_ENABLE;
  
         if (link_state == IFLA_VF_LINK_STATE_AUTO)
-               req->link_config |= PLINK_TRACK;
+               link_config |= PLINK_TRACK;
+
+       req->link_config = cpu_to_le32(link_config);
  
         status = be_mcc_notify_wait(adapter);
  err:
diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c

index 992ebe973d25bfbccff7b5c42dc1801ea41fc9ea..f819843e2bae73564e090b8fd9c7a8dfcec1fa12 100644 (file)
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -189,11 +189,9 @@ static int nps_enet_poll(struct napi_struct *napi, int budget)
  
         nps_enet_tx_handler(ndev);
         work_done = nps_enet_rx_handler(ndev);
-       if (work_done < budget) {
+       if ((work_done < budget) && napi_complete_done(napi, work_done)) {
                 u32 buf_int_enable_value = 0;
  
-               napi_complete_done(napi, work_done);
-
                 /* set tx_done and rx_rdy bits */
                 buf_int_enable_value |= NPS_ENET_ENABLE << RX_RDY_SHIFT;
                 buf_int_enable_value |= NPS_ENET_ENABLE << TX_DONE_SHIFT;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c

index 928b0df2b8e033e2b784759e32a0218e0b7e16f2..ade6b3e4ed1326a42aa39d52b2465f1b271c02f8 100644 (file)
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -28,8 +28,10 @@
  #include <linux/io.h>
  #include <linux/module.h>
  #include <linux/netdevice.h>
+#include <linux/of.h>
  #include <linux/phy.h>
  #include <linux/platform_device.h>
+#include <linux/property.h>
  #include <net/ip.h>
  #include <net/ncsi.h>
  
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c

index 3239d27143b935dc0056490b32f700093163c74a..bdd8cdd732fb588930f2cc085b7a0fddd9f1a263 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -82,9 +82,12 @@ void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status)
         else
                 *link_status = 0;
  
-       ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb, &sfp_prsnt);
-       if (!ret)
-               *link_status = *link_status && sfp_prsnt;
+       if (mac_cb->media_type == HNAE_MEDIA_TYPE_FIBER) {
+               ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb,
+                                                              &sfp_prsnt);
+               if (!ret)
+                       *link_status = *link_status && sfp_prsnt;
+       }
  
         mac_cb->link = *link_status;
  }
@@ -855,7 +858,7 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
                 of_node_put(np);
  
                 np = of_parse_phandle(to_of_node(mac_cb->fw_port),
-                                       "serdes-syscon", 0);
+                                     "serdes-syscon", 0);
                 syscon = syscon_node_to_regmap(np);
                 of_node_put(np);
                 if (IS_ERR_OR_NULL(syscon)) {
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c

index 90dbda7926144a41120d18c28a2c7d033f245f8c..403ea9db6dbd15a6384ed845480fe8cf18c52163 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -1519,6 +1519,7 @@ static void hns_dsaf_set_mac_key(
         mac_key->high.bits.mac_3 = addr[3];
         mac_key->low.bits.mac_4 = addr[4];
         mac_key->low.bits.mac_5 = addr[5];
+       mac_key->low.bits.port_vlan = 0;
         dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_VLAN_M,
                        DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id);
         dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M,
@@ -2924,10 +2925,11 @@ void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev,
         /* find the tcam entry index for promisc */
         entry_index = dsaf_promisc_tcam_entry(port);
  
+       memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data));
+       memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask));
+
         /* config key mask */
         if (enable) {
-               memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data));
-               memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask));
                 dsaf_set_field(tbl_tcam_data.low.bits.port_vlan,
                                DSAF_TBL_TCAM_KEY_PORT_M,
                                DSAF_TBL_TCAM_KEY_PORT_S, port);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c

index a2c22d084ce90cb03337ee09e4b4f1b723046ef4..e13aa064a8e943da7c9538e88bc425f299e944ca 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -461,6 +461,32 @@ int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
         return 0;
  }
  
+int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
+{
+       union acpi_object *obj;
+       union acpi_object obj_args, argv4;
+
+       obj_args.integer.type = ACPI_TYPE_INTEGER;
+       obj_args.integer.value = mac_cb->mac_id;
+
+       argv4.type = ACPI_TYPE_PACKAGE,
+       argv4.package.count = 1,
+       argv4.package.elements = &obj_args,
+
+       obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
+                               hns_dsaf_acpi_dsm_uuid, 0,
+                               HNS_OP_GET_SFP_STAT_FUNC, &argv4);
+
+       if (!obj || obj->type != ACPI_TYPE_INTEGER)
+               return -ENODEV;
+
+       *sfp_prsnt = obj->integer.value;
+
+       ACPI_FREE(obj);
+
+       return 0;
+}
+
  /**
   * hns_mac_config_sds_loopback - set loop back for serdes
   * @mac_cb: mac control block
@@ -592,7 +618,7 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev)
                 misc_op->hns_dsaf_roce_srst = hns_dsaf_roce_srst_acpi;
  
                 misc_op->get_phy_if = hns_mac_get_phy_if_acpi;
-               misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt;
+               misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt_acpi;
  
                 misc_op->cfg_serdes_loopback = hns_mac_config_sds_loopback_acpi;
         } else {
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c

index 275c2e2349ad92de224df1939769238d83ea5f3b..c44036d5761a4cbec301afb63393d49b47bac867 100644 (file)
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2589,8 +2589,6 @@ static int emac_dt_mdio_probe(struct emac_instance *dev)
  static int emac_dt_phy_connect(struct emac_instance *dev,
                                struct device_node *phy_handle)
  {
-       int res;
-
         dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def),
                                     GFP_KERNEL);
         if (!dev->phy.def)
@@ -2617,7 +2615,7 @@ static int emac_dt_phy_probe(struct emac_instance *dev)
  {
         struct device_node *np = dev->ofdev->dev.of_node;
         struct device_node *phy_handle;
-       int res = 0;
+       int res = 1;
  
         phy_handle = of_parse_phandle(np, "phy-handle", 0);
  
@@ -2714,13 +2712,24 @@ static int emac_init_phy(struct emac_instance *dev)
         if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) {
                 int res = emac_dt_phy_probe(dev);
  
-               mutex_unlock(&emac_phy_map_lock);
-               if (!res)
+               switch (res) {
+               case 1:
+                       /* No phy-handle property configured.
+                        * Continue with the existing phy probe
+                        * and setup code.
+                        */
+                       break;
+
+               case 0:
+                       mutex_unlock(&emac_phy_map_lock);
                         goto init_phy;
  
-               dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
-                       res);
-               return res;
+               default:
+                       mutex_unlock(&emac_phy_map_lock);
+                       dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
+                               res);
+                       return res;
+               }
         }
  
         if (dev->phy_address != 0xffffffff)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c

index 9198e6bd5160f9559075f348a6d595252ea88819..b23d6545f83562b8d9d12b4a885514dfd5257743 100644 (file)
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -404,7 +404,7 @@ static int ibmvnic_open(struct net_device *netdev)
         send_map_query(adapter);
         for (i = 0; i < rxadd_subcrqs; i++) {
                 init_rx_pool(adapter, &adapter->rx_pool[i],
-                            IBMVNIC_BUFFS_PER_POOL, i,
+                            adapter->req_rx_add_entries_per_subcrq, i,
                              be64_to_cpu(size_array[i]), 1);
                 if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) {
                         dev_err(dev, "Couldn't alloc rx pool\n");
@@ -419,23 +419,23 @@ static int ibmvnic_open(struct net_device *netdev)
         for (i = 0; i < tx_subcrqs; i++) {
                 tx_pool = &adapter->tx_pool[i];
                 tx_pool->tx_buff =
-                   kcalloc(adapter->max_tx_entries_per_subcrq,
+                   kcalloc(adapter->req_tx_entries_per_subcrq,
                             sizeof(struct ibmvnic_tx_buff), GFP_KERNEL);
                 if (!tx_pool->tx_buff)
                         goto tx_pool_alloc_failed;
  
                 if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
-                                        adapter->max_tx_entries_per_subcrq *
+                                        adapter->req_tx_entries_per_subcrq *
                                          adapter->req_mtu))
                         goto tx_ltb_alloc_failed;
  
                 tx_pool->free_map =
-                   kcalloc(adapter->max_tx_entries_per_subcrq,
+                   kcalloc(adapter->req_tx_entries_per_subcrq,
                             sizeof(int), GFP_KERNEL);
                 if (!tx_pool->free_map)
                         goto tx_fm_alloc_failed;
  
-               for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++)
+               for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
                         tx_pool->free_map[j] = j;
  
                 tx_pool->consumer_index = 0;
@@ -705,6 +705,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
         u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
         struct device *dev = &adapter->vdev->dev;
         struct ibmvnic_tx_buff *tx_buff = NULL;
+       struct ibmvnic_sub_crq_queue *tx_scrq;
         struct ibmvnic_tx_pool *tx_pool;
         unsigned int tx_send_failed = 0;
         unsigned int tx_map_failed = 0;
@@ -724,6 +725,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
         int ret = 0;
  
         tx_pool = &adapter->tx_pool[queue_num];
+       tx_scrq = adapter->tx_scrq[queue_num];
         txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
         handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
                                    be32_to_cpu(adapter->login_rsp_buf->
@@ -744,7 +746,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
  
         tx_pool->consumer_index =
             (tx_pool->consumer_index + 1) %
-               adapter->max_tx_entries_per_subcrq;
+               adapter->req_tx_entries_per_subcrq;
  
         tx_buff = &tx_pool->tx_buff[index];
         tx_buff->skb = skb;
@@ -817,7 +819,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
  
                 if (tx_pool->consumer_index == 0)
                         tx_pool->consumer_index =
-                               adapter->max_tx_entries_per_subcrq - 1;
+                               adapter->req_tx_entries_per_subcrq - 1;
                 else
                         tx_pool->consumer_index--;
  
@@ -826,6 +828,14 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
                 ret = NETDEV_TX_BUSY;
                 goto out;
         }
+
+       atomic_inc(&tx_scrq->used);
+
+       if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) {
+               netdev_info(netdev, "Stopping queue %d\n", queue_num);
+               netif_stop_subqueue(netdev, queue_num);
+       }
+
         tx_packets++;
         tx_bytes += skb->len;
         txq->trans_start = jiffies;
@@ -1213,6 +1223,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
         scrq->adapter = adapter;
         scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
         scrq->cur = 0;
+       atomic_set(&scrq->used, 0);
         scrq->rx_skb_top = NULL;
         spin_lock_init(&scrq->lock);
  
@@ -1246,6 +1257,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
                                 release_sub_crq_queue(adapter,
                                                       adapter->tx_scrq[i]);
                         }
+               kfree(adapter->tx_scrq);
                 adapter->tx_scrq = NULL;
         }
  
@@ -1258,6 +1270,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
                                 release_sub_crq_queue(adapter,
                                                       adapter->rx_scrq[i]);
                         }
+               kfree(adapter->rx_scrq);
                 adapter->rx_scrq = NULL;
         }
  }
@@ -1355,14 +1368,28 @@ restart_loop:
                                                  DMA_TO_DEVICE);
                         }
  
-                       if (txbuff->last_frag)
+                       if (txbuff->last_frag) {
+                               atomic_dec(&scrq->used);
+
+                               if (atomic_read(&scrq->used) <=
+                                   (adapter->req_tx_entries_per_subcrq / 2) &&
+                                   netif_subqueue_stopped(adapter->netdev,
+                                                          txbuff->skb)) {
+                                       netif_wake_subqueue(adapter->netdev,
+                                                           scrq->pool_index);
+                                       netdev_dbg(adapter->netdev,
+                                                  "Started queue %d\n",
+                                                  scrq->pool_index);
+                               }
+
                                 dev_kfree_skb_any(txbuff->skb);
+                       }
  
                         adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
                                                      producer_index] = index;
                         adapter->tx_pool[pool].producer_index =
                             (adapter->tx_pool[pool].producer_index + 1) %
-                           adapter->max_tx_entries_per_subcrq;
+                           adapter->req_tx_entries_per_subcrq;
                 }
                 /* remove tx_comp scrq*/
                 next->tx_comp.first = 0;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h

index 422824f1f42a8accdbbe7a97c70baa9882fbcce8..1993b42666f73d659773b6b88bcd8e8552ac97b7 100644 (file)
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -863,6 +863,7 @@ struct ibmvnic_sub_crq_queue {
         spinlock_t lock;
         struct sk_buff *rx_skb_top;
         struct ibmvnic_adapter *adapter;
+       atomic_t used;
  };
  
  struct ibmvnic_long_term_buff {
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c

index 2175cced402f7fe84dd260eecc5f3f0ab712132a..e9af89ad039c6f0e227878b9de85ea7819cd19d9 100644 (file)
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6274,8 +6274,8 @@ static int e1000e_pm_freeze(struct device *dev)
                 /* Quiesce the device without resetting the hardware */
                 e1000e_down(adapter, false);
                 e1000_free_irq(adapter);
-               e1000e_reset_interrupt_capability(adapter);
         }
+       e1000e_reset_interrupt_capability(adapter);
  
         /* Allow time for pending master requests to run */
         e1000e_disable_pcie_master(&adapter->hw);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c

index e8a8351c8ea998a141bd8fb5f27d619b5b477b67..82a95cc2c8ee386c725dfd01e5367bc95e26ca0a 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4438,8 +4438,12 @@ static void i40e_napi_enable_all(struct i40e_vsi *vsi)
         if (!vsi->netdev)
                 return;
  
-       for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
-               napi_enable(&vsi->q_vectors[q_idx]->napi);
+       for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+               struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+               if (q_vector->rx.ring || q_vector->tx.ring)
+                       napi_enable(&q_vector->napi);
+       }
  }
  
  /**
@@ -4453,8 +4457,12 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi)
         if (!vsi->netdev)
                 return;
  
-       for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
-               napi_disable(&vsi->q_vectors[q_idx]->napi);
+       for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+               struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+               if (q_vector->rx.ring || q_vector->tx.ring)
+                       napi_disable(&q_vector->napi);
+       }
  }
  
  /**
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c

index 9e757684816d48b903f62cdac2d6a1123e6c3305..93949139e62cf92a7e593b36d6a7399edbd1b6e6 100644 (file)
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -613,7 +613,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
         struct mtk_mac *mac = netdev_priv(dev);
         struct mtk_eth *eth = mac->hw;
         struct mtk_tx_dma *itxd, *txd;
-       struct mtk_tx_buf *tx_buf;
+       struct mtk_tx_buf *itx_buf, *tx_buf;
         dma_addr_t mapped_addr;
         unsigned int nr_frags;
         int i, n_desc = 1;
@@ -627,8 +627,8 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
         fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT;
         txd4 |= fport;
  
-       tx_buf = mtk_desc_to_tx_buf(ring, itxd);
-       memset(tx_buf, 0, sizeof(*tx_buf));
+       itx_buf = mtk_desc_to_tx_buf(ring, itxd);
+       memset(itx_buf, 0, sizeof(*itx_buf));
  
         if (gso)
                 txd4 |= TX_DMA_TSO;
@@ -647,9 +647,11 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
                 return -ENOMEM;
  
         WRITE_ONCE(itxd->txd1, mapped_addr);
-       tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
-       dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
-       dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
+       itx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
+       itx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
+                         MTK_TX_FLAGS_FPORT1;
+       dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr);
+       dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb));
  
         /* TX SG offload */
         txd = itxd;
@@ -685,11 +687,13 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
                                                last_frag * TX_DMA_LS0));
                         WRITE_ONCE(txd->txd4, fport);
  
-                       tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
                         tx_buf = mtk_desc_to_tx_buf(ring, txd);
                         memset(tx_buf, 0, sizeof(*tx_buf));
-
+                       tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
                         tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
+                       tx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
+                                        MTK_TX_FLAGS_FPORT1;
+
                         dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
                         dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
                         frag_size -= frag_map_size;
@@ -698,7 +702,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
         }
  
         /* store skb to cleanup */
-       tx_buf->skb = skb;
+       itx_buf->skb = skb;
  
         WRITE_ONCE(itxd->txd4, txd4);
         WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
@@ -1012,17 +1016,16 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
  
         while ((cpu != dma) && budget) {
                 u32 next_cpu = desc->txd2;
-               int mac;
+               int mac = 0;
  
                 desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
                 if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
                         break;
  
-               mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) &
-                      TX_DMA_FPORT_MASK;
-               mac--;
-
                 tx_buf = mtk_desc_to_tx_buf(ring, desc);
+               if (tx_buf->flags & MTK_TX_FLAGS_FPORT1)
+                       mac = 1;
+
                 skb = tx_buf->skb;
                 if (!skb) {
                         condition = 1;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h

index 99b1c8e9f16f981a0603f906280dcd98f7fa1b54..08285a96ff7077f83b5ac530e8f59266922819d1 100644 (file)
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -406,12 +406,18 @@ struct mtk_hw_stats {
         struct u64_stats_sync   syncp;
  };
  
-/* PDMA descriptor can point at 1-2 segments. This enum allows us to track how
- * memory was allocated so that it can be freed properly
- */
  enum mtk_tx_flags {
+       /* PDMA descriptor can point at 1-2 segments. This enum allows us to
+        * track how memory was allocated so that it can be freed properly.
+        */
         MTK_TX_FLAGS_SINGLE0    = 0x01,
         MTK_TX_FLAGS_PAGE0      = 0x02,
+
+       /* MTK_TX_FLAGS_FPORTx allows tracking which port the transmitted
+        * SKB out instead of looking up through hardware TX descriptor.
+        */
+       MTK_TX_FLAGS_FPORT0     = 0x04,
+       MTK_TX_FLAGS_FPORT1     = 0x08,
  };
  
  /* This enum allows us to identify how the clock is defined on the array of the
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c

index e8c105164931f31ff0cf5ed12acef455d0010eda..0e0fa70305659521ed50d1cf1bc40fd38aa3ad04 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2305,6 +2305,17 @@ static int sync_toggles(struct mlx4_dev *dev)
                 rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
                 if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
                         /* PCI might be offline */
+
+                       /* If device removal has been requested,
+                        * do not continue retrying.
+                        */
+                       if (dev->persist->interface_state &
+                           MLX4_INTERFACE_STATE_NOWAIT) {
+                               mlx4_warn(dev,
+                                         "communication channel is offline\n");
+                               return -EIO;
+                       }
+
                         msleep(100);
                         wr_toggle = swab32(readl(&priv->mfunc.comm->
                                            slave_write));
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c

index 21377c315083b686d8db25033583dd020d7e50a6..703205475524d689cd2762f2d2ce3abfd2b6ebcb 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1940,6 +1940,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev)
                                (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
                 if (!offline_bit)
                         return 0;
+
+               /* If device removal has been requested,
+                * do not continue retrying.
+                */
+               if (dev->persist->interface_state &
+                   MLX4_INTERFACE_STATE_NOWAIT)
+                       break;
+
                 /* There are cases as part of AER/Reset flow that PF needs
                  * around 100 msec to load. We therefore sleep for 100 msec
                  * to allow other tasks to make use of that CPU during this
@@ -3955,6 +3963,9 @@ static void mlx4_remove_one(struct pci_dev *pdev)
         struct devlink *devlink = priv_to_devlink(priv);
         int active_vfs = 0;
  
+       if (mlx4_is_slave(dev))
+               persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
+
         mutex_lock(&persist->interface_state_mutex);
         persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
         mutex_unlock(&persist->interface_state_mutex);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig

index ddb4ca4ff930a74b38a97b04b98ad54262da1a7b..117170014e8897f0f91cfc25464e3a03aba044ec 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -14,6 +14,7 @@ config MLX5_CORE
  config MLX5_CORE_EN
         bool "Mellanox Technologies ConnectX-4 Ethernet support"
         depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+       depends on IPV6=y || IPV6=n || MLX5_CORE=m
         imply PTP_1588_CLOCK
         default n
         ---help---
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c

index caa837e5e2b991fc3666776d2050fe20b1c6c7f6..a380353a78c2d349291e33aa3a1ef7af837111c2 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -361,6 +361,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
         case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
         case MLX5_CMD_OP_ALLOC_Q_COUNTER:
         case MLX5_CMD_OP_QUERY_Q_COUNTER:
+       case MLX5_CMD_OP_SET_RATE_LIMIT:
+       case MLX5_CMD_OP_QUERY_RATE_LIMIT:
         case MLX5_CMD_OP_ALLOC_PD:
         case MLX5_CMD_OP_ALLOC_UAR:
         case MLX5_CMD_OP_CONFIG_INT_MODERATION:
@@ -497,6 +499,8 @@ const char *mlx5_command_str(int command)
         MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
         MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
         MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+       MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
+       MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
         MLX5_COMMAND_STR_CASE(ALLOC_PD);
         MLX5_COMMAND_STR_CASE(DEALLOC_PD);
         MLX5_COMMAND_STR_CASE(ALLOC_UAR);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h

index f6a6ded204f61cda53c6233d80b3db7cde678c6e..3d9490cd2db19720d2b06f90d4cd322a3c87f4b0 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -90,7 +90,7 @@
  #define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX)
  
  #define MLX5_UMR_ALIGN                         (2048)
-#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD      (128)
+#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD      (256)
  
  #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
  #define MLX5E_DEFAULT_LRO_TIMEOUT                       32
@@ -928,10 +928,6 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
  int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
  void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
  u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
-void mlx5e_add_vxlan_port(struct net_device *netdev,
-                         struct udp_tunnel_info *ti);
-void mlx5e_del_vxlan_port(struct net_device *netdev,
-                         struct udp_tunnel_info *ti);
  
  int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
                             void *sp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c

index 0523ed47f597c715296c5ea843245625bf3dac62..8fa23f6a1f67f6494168455a58c5a7b1ee35cae5 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -302,6 +302,9 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
         struct mlx5e_priv *priv = netdev_priv(dev);
         struct mlx5e_dcbx *dcbx = &priv->dcbx;
  
+       if (mode & DCB_CAP_DCBX_LLD_MANAGED)
+               return 1;
+
         if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
                 if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO)
                         return 0;
@@ -315,13 +318,10 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
                 return 1;
         }
  
-       if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+       if (!(mode & DCB_CAP_DCBX_HOST))
                 return 1;
  
-       if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
-           !(mode & DCB_CAP_DCBX_VER_CEE) ||
-           !(mode & DCB_CAP_DCBX_VER_IEEE) ||
-           !(mode & DCB_CAP_DCBX_HOST))
+       if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
                 return 1;
  
         return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c

index d55fff0ba388f746809ac601fc3863e94309fc12..26fc77e80f7b38d45e52911233d5678da6334eb1 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -564,6 +564,7 @@ int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *i
         int idx = 0;
         int err = 0;
  
+       info->data = MAX_NUM_OF_ETHTOOL_RULES;
         while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
                 err = mlx5e_ethtool_get_flow(priv, info, location);
                 if (!err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

index 8ef64c4db2c21ad6a752338cb32b054a5e5f3968..15cc7b469d2ed9c066ee11a94a17a8ea5c9709f5 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -174,7 +174,7 @@ unlock:
  
  static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
  {
-       struct mlx5e_sw_stats *s = &priv->stats.sw;
+       struct mlx5e_sw_stats temp, *s = &temp;
         struct mlx5e_rq_stats *rq_stats;
         struct mlx5e_sq_stats *sq_stats;
         u64 tx_offload_none = 0;
@@ -229,6 +229,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
         s->link_down_events_phy = MLX5_GET(ppcnt_reg,
                                 priv->stats.pport.phy_counters,
                                 counter_set.phys_layer_cntrs.link_down_events);
+       memcpy(&priv->stats.sw, s, sizeof(*s));
  }
  
  static void mlx5e_update_vport_counters(struct mlx5e_priv *priv)
@@ -243,7 +244,6 @@ static void mlx5e_update_vport_counters(struct mlx5e_priv *priv)
         MLX5_SET(query_vport_counter_in, in, op_mod, 0);
         MLX5_SET(query_vport_counter_in, in, other_vport, 0);
  
-       memset(out, 0, outlen);
         mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
  }
  
@@ -3100,8 +3100,8 @@ static int mlx5e_get_vf_stats(struct net_device *dev,
                                             vf_stats);
  }
  
-void mlx5e_add_vxlan_port(struct net_device *netdev,
-                         struct udp_tunnel_info *ti)
+static void mlx5e_add_vxlan_port(struct net_device *netdev,
+                                struct udp_tunnel_info *ti)
  {
         struct mlx5e_priv *priv = netdev_priv(netdev);
  
@@ -3114,8 +3114,8 @@ void mlx5e_add_vxlan_port(struct net_device *netdev,
         mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1);
  }
  
-void mlx5e_del_vxlan_port(struct net_device *netdev,
-                         struct udp_tunnel_info *ti)
+static void mlx5e_del_vxlan_port(struct net_device *netdev,
+                                struct udp_tunnel_info *ti)
  {
         struct mlx5e_priv *priv = netdev_priv(netdev);
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c

index 2c864574a9d5faeaa3b329f3bc0ab0d4e0cc7b55..f621373bd7a564aca3ddd1247117467daffc72df 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -393,8 +393,6 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
         .ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
         .ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
         .ndo_get_stats64         = mlx5e_rep_get_stats,
-       .ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
-       .ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
         .ndo_has_offload_stats   = mlx5e_has_offload_stats,
         .ndo_get_offload_stats   = mlx5e_get_offload_stats,
  };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index 3d371688fbbbf3544170468840829e15095ea3a0..bafcb349a50c6d3809aa329d43382e51dbd3ffe4 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -601,6 +601,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
         if (lro_num_seg > 1) {
                 mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
                 skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
+               /* Subtract one since we already counted this as one
+                * "regular" packet in mlx5e_complete_rx_cqe()
+                */
+               rq->stats.packets += lro_num_seg - 1;
                 rq->stats.lro_packets++;
                 rq->stats.lro_bytes += cqe_bcnt;
         }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c

index 31e3cb7ee5feeb35ce383c5582bc800733b81643..5621dcfda4f1868c6bccdff9cd220b7a43d46dd3 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -204,9 +204,6 @@ mlx5e_test_loopback_validate(struct sk_buff *skb,
         struct iphdr *iph;
  
         /* We are only going to peek, no need to clone the SKB */
-       if (skb->protocol != htons(ETH_P_IP))
-               goto out;
-
         if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
                 goto out;
  
@@ -249,7 +246,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
         lbtp->loopback_ok = false;
         init_completion(&lbtp->comp);
  
-       lbtp->pt.type = htons(ETH_P_ALL);
+       lbtp->pt.type = htons(ETH_P_IP);
         lbtp->pt.func = mlx5e_test_loopback_validate;
         lbtp->pt.dev = priv->netdev;
         lbtp->pt.af_packet_priv = lbtp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

index 44406a5ec15d96a6ca45d30b609864f8cccb07e1..5436866798f447eef43dac9af242decae4ed6017 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -48,9 +48,14 @@
  #include "eswitch.h"
  #include "vxlan.h"
  
+enum {
+       MLX5E_TC_FLOW_ESWITCH   = BIT(0),
+};
+
  struct mlx5e_tc_flow {
         struct rhash_head       node;
         u64                     cookie;
+       u8                      flags;
         struct mlx5_flow_handle *rule;
         struct list_head        encap; /* flows sharing the same encap */
         struct mlx5_esw_flow_attr *attr;
@@ -128,6 +133,23 @@ err_create_ft:
         return rule;
  }
  
+static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
+                                 struct mlx5e_tc_flow *flow)
+{
+       struct mlx5_fc *counter = NULL;
+
+       if (!IS_ERR(flow->rule)) {
+               counter = mlx5_flow_rule_counter(flow->rule);
+               mlx5_del_flow_rules(flow->rule);
+               mlx5_fc_destroy(priv->mdev, counter);
+       }
+
+       if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
+               mlx5_destroy_flow_table(priv->fs.tc.t);
+               priv->fs.tc.t = NULL;
+       }
+}
+
  static struct mlx5_flow_handle *
  mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
                       struct mlx5_flow_spec *spec,
@@ -144,7 +166,24 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
  }
  
  static void mlx5e_detach_encap(struct mlx5e_priv *priv,
-                              struct mlx5e_tc_flow *flow) {
+                              struct mlx5e_tc_flow *flow);
+
+static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
+                                 struct mlx5e_tc_flow *flow)
+{
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+       mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->attr);
+
+       mlx5_eswitch_del_vlan_action(esw, flow->attr);
+
+       if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+               mlx5e_detach_encap(priv, flow);
+}
+
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+                              struct mlx5e_tc_flow *flow)
+{
         struct list_head *next = flow->encap.next;
  
         list_del(&flow->encap);
@@ -168,25 +207,10 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
  static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
                               struct mlx5e_tc_flow *flow)
  {
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       struct mlx5_fc *counter = NULL;
-
-       if (!IS_ERR(flow->rule)) {
-               counter = mlx5_flow_rule_counter(flow->rule);
-               mlx5_del_flow_rules(flow->rule);
-               mlx5_fc_destroy(priv->mdev, counter);
-       }
-
-       if (esw && esw->mode == SRIOV_OFFLOADS) {
-               mlx5_eswitch_del_vlan_action(esw, flow->attr);
-               if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
-                       mlx5e_detach_encap(priv, flow);
-       }
-
-       if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
-               mlx5_destroy_flow_table(priv->fs.tc.t);
-               priv->fs.tc.t = NULL;
-       }
+       if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+               mlx5e_tc_del_fdb_flow(priv, flow);
+       else
+               mlx5e_tc_del_nic_flow(priv, flow);
  }
  
  static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
@@ -243,12 +267,15 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
                         skb_flow_dissector_target(f->dissector,
                                                   FLOW_DISSECTOR_KEY_ENC_PORTS,
                                                   f->mask);
+               struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+               struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
+               struct mlx5e_priv *up_priv = netdev_priv(up_dev);
  
                 /* Full udp dst port must be given */
                 if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
                         goto vxlan_match_offload_err;
  
-               if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
+               if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) &&
                     MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
                         parse_vxlan_attr(spec, f);
                 else {
@@ -598,6 +625,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
  }
  
  static int parse_cls_flower(struct mlx5e_priv *priv,
+                           struct mlx5e_tc_flow *flow,
                             struct mlx5_flow_spec *spec,
                             struct tc_cls_flower_offload *f)
  {
@@ -609,9 +637,10 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
  
         err = __parse_cls_flower(priv, spec, f, &min_inline);
  
-       if (!err && esw->mode == SRIOV_OFFLOADS &&
+       if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
             rep->vport != FDB_UPLINK_VPORT) {
-               if (min_inline > esw->offloads.inline_mode) {
+               if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
+                   esw->offloads.inline_mode < min_inline) {
                         netdev_warn(priv->netdev,
                                     "Flow is not offloaded due to min inline setting, required %d actual %d\n",
                                     min_inline, esw->offloads.inline_mode);
@@ -757,16 +786,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
         return 0;
  }
  
-static int gen_vxlan_header_ipv4(struct net_device *out_dev,
-                                char buf[],
-                                unsigned char h_dest[ETH_ALEN],
-                                int ttl,
-                                __be32 daddr,
-                                __be32 saddr,
-                                __be16 udp_dst_port,
-                                __be32 vx_vni)
+static void gen_vxlan_header_ipv4(struct net_device *out_dev,
+                                 char buf[], int encap_size,
+                                 unsigned char h_dest[ETH_ALEN],
+                                 int ttl,
+                                 __be32 daddr,
+                                 __be32 saddr,
+                                 __be16 udp_dst_port,
+                                 __be32 vx_vni)
  {
-       int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN;
         struct ethhdr *eth = (struct ethhdr *)buf;
         struct iphdr  *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
         struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
@@ -789,20 +817,17 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev,
         udp->dest = udp_dst_port;
         vxh->vx_flags = VXLAN_HF_VNI;
         vxh->vx_vni = vxlan_vni_field(vx_vni);
-
-       return encap_size;
  }
  
-static int gen_vxlan_header_ipv6(struct net_device *out_dev,
-                                char buf[],
-                                unsigned char h_dest[ETH_ALEN],
-                                int ttl,
-                                struct in6_addr *daddr,
-                                struct in6_addr *saddr,
-                                __be16 udp_dst_port,
-                                __be32 vx_vni)
+static void gen_vxlan_header_ipv6(struct net_device *out_dev,
+                                 char buf[], int encap_size,
+                                 unsigned char h_dest[ETH_ALEN],
+                                 int ttl,
+                                 struct in6_addr *daddr,
+                                 struct in6_addr *saddr,
+                                 __be16 udp_dst_port,
+                                 __be32 vx_vni)
  {
-       int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN;
         struct ethhdr *eth = (struct ethhdr *)buf;
         struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
         struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
@@ -824,8 +849,6 @@ static int gen_vxlan_header_ipv6(struct net_device *out_dev,
         udp->dest = udp_dst_port;
         vxh->vx_flags = VXLAN_HF_VNI;
         vxh->vx_vni = vxlan_vni_field(vx_vni);
-
-       return encap_size;
  }
  
  static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
@@ -834,13 +857,20 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
                                           struct net_device **out_dev)
  {
         int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+       int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
         struct ip_tunnel_key *tun_key = &e->tun_info.key;
-       int encap_size, ttl, err;
         struct neighbour *n = NULL;
         struct flowi4 fl4 = {};
         char *encap_header;
+       int ttl, err;
  
-       encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+       if (max_encap_size < ipv4_encap_size) {
+               mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+                              ipv4_encap_size, max_encap_size);
+               return -EOPNOTSUPP;
+       }
+
+       encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
         if (!encap_header)
                 return -ENOMEM;
  
@@ -875,11 +905,11 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
  
         switch (e->tunnel_type) {
         case MLX5_HEADER_TYPE_VXLAN:
-               encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
-                                                  e->h_dest, ttl,
-                                                  fl4.daddr,
-                                                  fl4.saddr, tun_key->tp_dst,
-                                                  tunnel_id_to_key32(tun_key->tun_id));
+               gen_vxlan_header_ipv4(*out_dev, encap_header,
+                                     ipv4_encap_size, e->h_dest, ttl,
+                                     fl4.daddr,
+                                     fl4.saddr, tun_key->tp_dst,
+                                     tunnel_id_to_key32(tun_key->tun_id));
                 break;
         default:
                 err = -EOPNOTSUPP;
@@ -887,7 +917,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
         }
  
         err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-                              encap_size, encap_header, &e->encap_id);
+                              ipv4_encap_size, encap_header, &e->encap_id);
  out:
         if (err && n)
                 neigh_release(n);
@@ -902,13 +932,20 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
  
  {
         int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+       int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
         struct ip_tunnel_key *tun_key = &e->tun_info.key;
-       int encap_size, err, ttl = 0;
         struct neighbour *n = NULL;
         struct flowi6 fl6 = {};
         char *encap_header;
+       int err, ttl = 0;
+
+       if (max_encap_size < ipv6_encap_size) {
+               mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+                              ipv6_encap_size, max_encap_size);
+               return -EOPNOTSUPP;
+       }
  
-       encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+       encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
         if (!encap_header)
                 return -ENOMEM;
  
@@ -944,11 +981,11 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
  
         switch (e->tunnel_type) {
         case MLX5_HEADER_TYPE_VXLAN:
-               encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header,
-                                                  e->h_dest, ttl,
-                                                  &fl6.daddr,
-                                                  &fl6.saddr, tun_key->tp_dst,
-                                                  tunnel_id_to_key32(tun_key->tun_id));
+               gen_vxlan_header_ipv6(*out_dev, encap_header,
+                                     ipv6_encap_size, e->h_dest, ttl,
+                                     &fl6.daddr,
+                                     &fl6.saddr, tun_key->tp_dst,
+                                     tunnel_id_to_key32(tun_key->tun_id));
                 break;
         default:
                 err = -EOPNOTSUPP;
@@ -956,7 +993,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
         }
  
         err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-                              encap_size, encap_header, &e->encap_id);
+                              ipv6_encap_size, encap_header, &e->encap_id);
  out:
         if (err && n)
                 neigh_release(n);
@@ -970,6 +1007,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
                               struct mlx5_esw_flow_attr *attr)
  {
         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
+       struct mlx5e_priv *up_priv = netdev_priv(up_dev);
         unsigned short family = ip_tunnel_info_af(tun_info);
         struct ip_tunnel_key *key = &tun_info->key;
         struct mlx5_encap_entry *e;
@@ -990,7 +1029,7 @@ vxlan_encap_offload_err:
                 return -EOPNOTSUPP;
         }
  
-       if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
+       if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) &&
             MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
                 tunnel_type = MLX5_HEADER_TYPE_VXLAN;
         } else {
@@ -1106,14 +1145,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                 }
  
                 if (is_tcf_vlan(a)) {
-                       if (tcf_vlan_action(a) == VLAN_F_POP) {
+                       if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
                                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
-                       } else if (tcf_vlan_action(a) == VLAN_F_PUSH) {
+                       } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
                                 if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
                                         return -EOPNOTSUPP;
  
                                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
                                 attr->vlan = tcf_vlan_push_vid(a);
+                       } else { /* action is TCA_VLAN_ACT_MODIFY */
+                               return -EOPNOTSUPP;
                         }
                         continue;
                 }
@@ -1132,23 +1173,19 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
                            struct tc_cls_flower_offload *f)
  {
         struct mlx5e_tc_table *tc = &priv->fs.tc;
-       int err = 0;
-       bool fdb_flow = false;
+       int err, attr_size = 0;
         u32 flow_tag, action;
         struct mlx5e_tc_flow *flow;
         struct mlx5_flow_spec *spec;
         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       u8 flow_flags = 0;
  
-       if (esw && esw->mode == SRIOV_OFFLOADS)
-               fdb_flow = true;
-
-       if (fdb_flow)
-               flow = kzalloc(sizeof(*flow) +
-                              sizeof(struct mlx5_esw_flow_attr),
-                              GFP_KERNEL);
-       else
-               flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+       if (esw && esw->mode == SRIOV_OFFLOADS) {
+               flow_flags = MLX5E_TC_FLOW_ESWITCH;
+               attr_size  = sizeof(struct mlx5_esw_flow_attr);
+       }
  
+       flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
         spec = mlx5_vzalloc(sizeof(*spec));
         if (!spec || !flow) {
                 err = -ENOMEM;
@@ -1156,12 +1193,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
         }
  
         flow->cookie = f->cookie;
+       flow->flags = flow_flags;
  
-       err = parse_cls_flower(priv, spec, f);
+       err = parse_cls_flower(priv, flow, spec, f);
         if (err < 0)
                 goto err_free;
  
-       if (fdb_flow) {
+       if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
                 flow->attr  = (struct mlx5_esw_flow_attr *)(flow + 1);
                 err = parse_tc_fdb_actions(priv, f->exts, flow);
                 if (err < 0)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

index f193128bac4b8c18504ec1f5905def3baa5c4633..57f5e2d7ebd1a91a1a1cf618e60caf1f015914a3 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -274,15 +274,18 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
                         sq->stats.tso_bytes += skb->len - ihs;
                 }
  
+               sq->stats.packets += skb_shinfo(skb)->gso_segs;
                 num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
         } else {
                 bf = sq->bf_budget &&
                      !skb->xmit_more &&
                      !skb_shinfo(skb)->nr_frags;
                 ihs = mlx5e_get_inline_hdr_size(sq, skb, bf);
+               sq->stats.packets++;
                 num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
         }
  
+       sq->stats.bytes += num_bytes;
         wi->num_bytes = num_bytes;
  
         ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
@@ -381,8 +384,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
         if (bf)
                 sq->bf_budget--;
  
-       sq->stats.packets++;
-       sq->stats.bytes += num_bytes;
         return NETDEV_TX_OK;
  
  dma_unmap_wqe_err:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h

index 5b78883d565413ec59a00ecba4ddb483e4eecd3f..ad329b1680b455ddd4cdfda56bf512bfbea37529 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -209,6 +209,7 @@ struct mlx5_esw_offload {
         struct mlx5_eswitch_rep *vport_reps;
         DECLARE_HASHTABLE(encap_tbl, 8);
         u8 inline_mode;
+       u64 num_flows;
  };
  
  struct mlx5_eswitch {
@@ -271,6 +272,11 @@ struct mlx5_flow_handle *
  mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                                 struct mlx5_flow_spec *spec,
                                 struct mlx5_esw_flow_attr *attr);
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+                               struct mlx5_flow_handle *rule,
+                               struct mlx5_esw_flow_attr *attr);
+
  struct mlx5_flow_handle *
  mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

index 4f5b0d47d5f38237129a7c90a1240b8615615d32..d111cebca9f1ea57d57a70b108a436af1adcc6aa 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -93,10 +93,27 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                                    spec, &flow_act, dest, i);
         if (IS_ERR(rule))
                 mlx5_fc_destroy(esw->dev, counter);
+       else
+               esw->offloads.num_flows++;
  
         return rule;
  }
  
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+                               struct mlx5_flow_handle *rule,
+                               struct mlx5_esw_flow_attr *attr)
+{
+       struct mlx5_fc *counter = NULL;
+
+       if (!IS_ERR(rule)) {
+               counter = mlx5_flow_rule_counter(rule);
+               mlx5_del_flow_rules(rule);
+               mlx5_fc_destroy(esw->dev, counter);
+               esw->offloads.num_flows--;
+       }
+}
+
  static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
  {
         struct mlx5_eswitch_rep *rep;
@@ -894,8 +911,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
         struct mlx5_core_dev *dev = devlink_priv(devlink);
         struct mlx5_eswitch *esw = dev->priv.eswitch;
         int num_vports = esw->enabled_vports;
-       int err;
-       int vport;
+       int err, vport;
         u8 mlx5_mode;
  
         if (!MLX5_CAP_GEN(dev, vport_group_manager))
@@ -904,9 +920,22 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
         if (esw->mode == SRIOV_NONE)
                 return -EOPNOTSUPP;
  
-       if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-           MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+       switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+       case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+               if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
+                       return 0;
+               /* fall through */
+       case MLX5_CAP_INLINE_MODE_L2:
+               esw_warn(dev, "Inline mode can't be set\n");
                 return -EOPNOTSUPP;
+       case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+               break;
+       }
+
+       if (esw->offloads.num_flows > 0) {
+               esw_warn(dev, "Can't set inline mode when flows are configured\n");
+               return -EOPNOTSUPP;
+       }
  
         err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
         if (err)
@@ -944,18 +973,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
         if (esw->mode == SRIOV_NONE)
                 return -EOPNOTSUPP;
  
-       if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-           MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
-               return -EOPNOTSUPP;
-
         return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
  }
  
  int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
  {
+       u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
         struct mlx5_core_dev *dev = esw->dev;
         int vport;
-       u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
  
         if (!MLX5_CAP_GEN(dev, vport_group_manager))
                 return -EOPNOTSUPP;
@@ -963,10 +988,18 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
         if (esw->mode == SRIOV_NONE)
                 return -EOPNOTSUPP;
  
-       if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-           MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
-               return -EOPNOTSUPP;
+       switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+       case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+               mlx5_mode = MLX5_INLINE_MODE_NONE;
+               goto out;
+       case MLX5_CAP_INLINE_MODE_L2:
+               mlx5_mode = MLX5_INLINE_MODE_L2;
+               goto out;
+       case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+               goto query_vports;
+       }
  
+query_vports:
         for (vport = 1; vport <= nvfs; vport++) {
                 mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
                 if (vport > 1 && prev_mlx5_mode != mlx5_mode)
@@ -974,6 +1007,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
                 prev_mlx5_mode = mlx5_mode;
         }
  
+out:
         *mode = mlx5_mode;
         return 0;
  }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c

index 2478516a61e2ea547f5ae8af0c3aae7228e64db9..ded27bb9a3b6049ff4bad1606443dbeff53be8f1 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1136,7 +1136,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
                                                 u32 *match_criteria)
  {
         int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       struct list_head *prev = ft->node.children.prev;
+       struct list_head *prev = &ft->node.children;
         unsigned int candidate_index = 0;
         struct mlx5_flow_group *fg;
         void *match_criteria_addr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c

index 55957246c0e844826a5a7f18c42c4678fb6c5be5..b5d5519542e87380b064de5578e327f0d55ba9cf 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -294,7 +294,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
                                          struct netdev_notifier_changeupper_info *info)
  {
         struct net_device *upper = info->upper_dev, *ndev_tmp;
-       struct netdev_lag_upper_info *lag_upper_info;
+       struct netdev_lag_upper_info *lag_upper_info = NULL;
         bool is_bonded;
         int bond_status = 0;
         int num_slaves = 0;
@@ -303,7 +303,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
         if (!netif_is_lag_master(upper))
                 return 0;
  
-       lag_upper_info = info->upper_info;
+       if (info->linking)
+               lag_upper_info = info->upper_info;
  
         /* The event may still be of interest if the slave does not belong to
          * us, but is enslaved to a master which has one or more of our netdevs
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c

index c4242a4e81309f0d90a0cae8bdfc09fd39da5649..0ad66324247f71c212f44f98679c80f05a2650d3 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -87,7 +87,7 @@ static struct mlx5_profile profile[] = {
         [2] = {
                 .mask           = MLX5_PROF_MASK_QP_SIZE |
                                   MLX5_PROF_MASK_MR_CACHE,
-               .log_max_qp     = 17,
+               .log_max_qp     = 18,
                 .mr_cache[0]    = {
                         .size   = 500,
                         .limit  = 250
@@ -1029,7 +1029,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
         if (err) {
                 dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
                         FW_INIT_TIMEOUT_MILI);
-               goto out_err;
+               goto err_cmd_cleanup;
         }
  
         err = mlx5_core_enable_hca(dev, 0);
@@ -1352,6 +1352,7 @@ static int init_one(struct pci_dev *pdev,
         if (err)
                 goto clean_load;
  
+       pci_save_state(pdev);
         return 0;
  
  clean_load:
@@ -1407,9 +1408,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
  
         mlx5_enter_error_state(dev);
         mlx5_unload_one(dev, priv, false);
-       /* In case of kernel call save the pci state and drain the health wq */
+       /* In case of kernel call drain the health wq */
         if (state) {
-               pci_save_state(pdev);
                 mlx5_drain_health_wq(dev);
                 mlx5_pci_disable_device(dev);
         }
@@ -1461,6 +1461,7 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
  
         pci_set_master(pdev);
         pci_restore_state(pdev);
+       pci_save_state(pdev);
  
         if (wait_vital(pdev)) {
                 dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c

index 2e6b0f290ddc2cbf3beeb2f5c1fe813378691c69..222b25908d012614639b4e9ef2a8a7727a82cd18 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -87,6 +87,7 @@ static void up_rel_func(struct kref *kref)
         struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count);
  
         list_del(&up->list);
+       iounmap(up->map);
         if (mlx5_cmd_free_uar(up->mdev, up->index))
                 mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index);
         kfree(up->reg_bitmap);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h

index 0899e2d310e26269a5c3d025b7afeeb1516bf21e..d9616daf8a705645eb5b14e5af889e0b5020ab0d 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -769,7 +769,7 @@ static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid)
  #define MLXSW_REG_SPVM_ID 0x200F
  #define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */
  #define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVM_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVM_REC_MAX_COUNT 255
  #define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN +  \
                     MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT)
  
@@ -1702,7 +1702,7 @@ static inline void mlxsw_reg_sfmr_pack(char *payload,
  #define MLXSW_REG_SPVMLR_ID 0x2020
  #define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */
  #define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255
  #define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \
                               MLXSW_REG_SPVMLR_REC_LEN * \
                               MLXSW_REG_SPVMLR_REC_MAX_COUNT)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c

index 22ab429253778d2a22e4c59c742f8f6778e57f40..ae6cccc666e4619bbb1a8360b5cc090d303da60b 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -303,11 +303,11 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
         ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev,
                                            ingress,
                                            MLXSW_SP_ACL_PROFILE_FLOWER);
-       if (WARN_ON(IS_ERR(ruleset)))
+       if (IS_ERR(ruleset))
                 return;
  
         rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie);
-       if (!WARN_ON(!rule)) {
+       if (rule) {
                 mlxsw_sp_acl_rule_del(mlxsw_sp, rule);
                 mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule);
         }
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c

index 06c9f4100cb9bd8c0abecada5fa922c7e779fc51..6ad44be08b3307ac147b6f25c8d04bc0706f65a5 100644 (file)
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -25,6 +25,7 @@
  #include <linux/of_irq.h>
  #include <linux/crc32.h>
  #include <linux/crc32c.h>
+#include <linux/circ_buf.h>
  
  #include "moxart_ether.h"
  
@@ -278,6 +279,13 @@ rx_next:
         return rx;
  }
  
+static int moxart_tx_queue_space(struct net_device *ndev)
+{
+       struct moxart_mac_priv_t *priv = netdev_priv(ndev);
+
+       return CIRC_SPACE(priv->tx_head, priv->tx_tail, TX_DESC_NUM);
+}
+
  static void moxart_tx_finished(struct net_device *ndev)
  {
         struct moxart_mac_priv_t *priv = netdev_priv(ndev);
@@ -297,6 +305,9 @@ static void moxart_tx_finished(struct net_device *ndev)
                 tx_tail = TX_NEXT(tx_tail);
         }
         priv->tx_tail = tx_tail;
+       if (netif_queue_stopped(ndev) &&
+           moxart_tx_queue_space(ndev) >= TX_WAKE_THRESHOLD)
+               netif_wake_queue(ndev);
  }
  
  static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id)
@@ -324,13 +335,18 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
         struct moxart_mac_priv_t *priv = netdev_priv(ndev);
         void *desc;
         unsigned int len;
-       unsigned int tx_head = priv->tx_head;
+       unsigned int tx_head;
         u32 txdes1;
         int ret = NETDEV_TX_BUSY;
  
+       spin_lock_irq(&priv->txlock);
+
+       tx_head = priv->tx_head;
         desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head);
  
-       spin_lock_irq(&priv->txlock);
+       if (moxart_tx_queue_space(ndev) == 1)
+               netif_stop_queue(ndev);
+
         if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
                 net_dbg_ratelimited("no TX space for packet\n");
                 priv->stats.tx_dropped++;
diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h

index 93a9563ac7c6730eec8240ac187a86b9831c9741..afc32ec998c043957f0b472080d22d01600edf2a 100644 (file)
--- a/drivers/net/ethernet/moxa/moxart_ether.h
+++ b/drivers/net/ethernet/moxa/moxart_ether.h
@@ -59,6 +59,7 @@
  #define TX_NEXT(N)             (((N) + 1) & (TX_DESC_NUM_MASK))
  #define TX_BUF_SIZE            1600
  #define TX_BUF_SIZE_MAX                (TX_DESC1_BUF_SIZE_MASK+1)
+#define TX_WAKE_THRESHOLD      16
  
  #define RX_DESC_NUM            64
  #define RX_DESC_NUM_MASK       (RX_DESC_NUM-1)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c

index 9179a99563afa86f4ed7bbcb41b045c2568243de..a41377e26c07d038aa6c8716ef52d7152e92bcf0 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3275,9 +3275,10 @@ void nfp_net_netdev_clean(struct net_device *netdev)
  {
         struct nfp_net *nn = netdev_priv(netdev);
  
+       unregister_netdev(nn->netdev);
+
         if (nn->xdp_prog)
                 bpf_prog_put(nn->xdp_prog);
         if (nn->bpf_offload_xdp)
                 nfp_net_xdp_offload(nn, NULL);
-       unregister_netdev(nn->netdev);
  }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c

index d42d03df751acbb32dd725c769bbf5d56aeb72e4..7e3a6fed3da6d94fe47139aef697563b56726950 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -422,8 +422,9 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn,
                 u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val;
                 u32 cxt_size = CONN_CXT_SIZE(p_hwfn);
                 u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size;
+               u32 align = elems_per_page * DQ_RANGE_ALIGN;
  
-               p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page);
+               p_conn->cid_count = roundup(p_conn->cid_count, align);
         }
  }
  
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c

index 5bd36a4a8fcdfd201b40321c7fefb82776cd347d..cfdadb658ade0fe73551f6f822ec8ae1805f83de 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -64,11 +64,11 @@
         ((u32)(prio_tc_tbl >> ((7 - prio) * 4)) & 0x7)
  
  static const struct qed_dcbx_app_metadata qed_dcbx_app_update[] = {
-       {DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_DEFAULT},
-       {DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_DEFAULT},
-       {DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_DEFAULT},
-       {DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_DEFAULT},
-       {DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH}
+       {DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_ISCSI},
+       {DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_FCOE},
+       {DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_ETH_ROCE},
+       {DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_ETH_ROCE},
+       {DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH},
  };
  
  static bool qed_dcbx_app_ethtype(u32 app_info_bitmap)
@@ -583,6 +583,13 @@ qed_dcbx_get_ets_data(struct qed_hwfn *p_hwfn,
                    p_params->ets_cbs,
                    p_ets->pri_tc_tbl[0], p_params->max_ets_tc);
  
+       if (p_params->ets_enabled && !p_params->max_ets_tc) {
+               p_params->max_ets_tc = QED_MAX_PFC_PRIORITIES;
+               DP_VERBOSE(p_hwfn, QED_MSG_DCB,
+                          "ETS params: max_ets_tc is forced to %d\n",
+               p_params->max_ets_tc);
+       }
+
         /* 8 bit tsa and bw data corresponding to each of the 8 TC's are
          * encoded in a type u32 array of size 2.
          */
@@ -1001,6 +1008,8 @@ qed_dcbx_set_pfc_data(struct qed_hwfn *p_hwfn,
         u8 pfc_map = 0;
         int i;
  
+       *pfc &= ~DCBX_PFC_ERROR_MASK;
+
         if (p_params->pfc.willing)
                 *pfc |= DCBX_PFC_WILLING_MASK;
         else
@@ -1255,7 +1264,7 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn,
  {
         struct qed_dcbx_get *dcbx_info;
  
-       dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL);
+       dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_ATOMIC);
         if (!dcbx_info)
                 return NULL;
  
@@ -2073,6 +2082,8 @@ static int qed_dcbnl_ieee_setpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
         for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++)
                 dcbx_set.config.params.pfc.prio[i] = !!(pfc->pfc_en & BIT(i));
  
+       dcbx_set.config.params.pfc.max_tc = pfc->pfc_cap;
+
         ptt = qed_ptt_acquire(hwfn);
         if (!ptt)
                 return -EINVAL;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c

index e2a081ceaf520c429b90e1fcc1e2b6cb7d3b10aa..e518f914eab13f52d8f82a8e1a29a5a80a2f2b24 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2389,9 +2389,8 @@ qed_chain_alloc_sanity_check(struct qed_dev *cdev,
          * size/capacity fields are of a u32 type.
          */
         if ((cnt_type == QED_CHAIN_CNT_TYPE_U16 &&
-            chain_size > 0x10000) ||
-           (cnt_type == QED_CHAIN_CNT_TYPE_U32 &&
-            chain_size > 0x100000000ULL)) {
+            chain_size > ((u32)U16_MAX + 1)) ||
+           (cnt_type == QED_CHAIN_CNT_TYPE_U32 && chain_size > U32_MAX)) {
                 DP_NOTICE(cdev,
                           "The actual chain size (0x%llx) is larger than the maximal possible value\n",
                           chain_size);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c

index 3a44d6b395fac9500841f5ac1bc73b4c11d4d188..098766f7fe88a6e0a131712330cfa3b144c32738 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -190,6 +190,9 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
         p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring;
         p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring;
         p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring;
+       p_init->ooo_enable = p_params->ooo_enable;
+       p_init->ll2_rx_queue_id = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] +
+                                 p_params->ll2_ooo_queue_id;
         p_init->func_params.log_page_size = p_params->log_page_size;
         val = p_params->num_tasks;
         p_init->func_params.num_tasks = cpu_to_le16(val);
@@ -786,6 +789,23 @@ static void qed_iscsi_release_connection(struct qed_hwfn *p_hwfn,
         spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
  }
  
+void qed_iscsi_free_connection(struct qed_hwfn *p_hwfn,
+                              struct qed_iscsi_conn *p_conn)
+{
+       qed_chain_free(p_hwfn->cdev, &p_conn->xhq);
+       qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
+       qed_chain_free(p_hwfn->cdev, &p_conn->r2tq);
+       dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+                         sizeof(struct tcp_upload_params),
+                         p_conn->tcp_upload_params_virt_addr,
+                         p_conn->tcp_upload_params_phys_addr);
+       dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+                         sizeof(struct scsi_terminate_extra_params),
+                         p_conn->queue_cnts_virt_addr,
+                         p_conn->queue_cnts_phys_addr);
+       kfree(p_conn);
+}
+
  struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
  {
         struct qed_iscsi_info *p_iscsi_info;
@@ -807,6 +827,17 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
  void qed_iscsi_free(struct qed_hwfn *p_hwfn,
                     struct qed_iscsi_info *p_iscsi_info)
  {
+       struct qed_iscsi_conn *p_conn = NULL;
+
+       while (!list_empty(&p_hwfn->p_iscsi_info->free_list)) {
+               p_conn = list_first_entry(&p_hwfn->p_iscsi_info->free_list,
+                                         struct qed_iscsi_conn, list_entry);
+               if (p_conn) {
+                       list_del(&p_conn->list_entry);
+                       qed_iscsi_free_connection(p_hwfn, p_conn);
+               }
+       }
+
         kfree(p_iscsi_info);
  }
  
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c

index 9a0b9af10a572f3e3c2a5d086e9e731b040e4d6b..0d3cef409c96d0849c7860e8f03a920b3b8966f1 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -211,6 +211,8 @@ static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
         /* If need to reuse or there's no replacement buffer, repost this */
         if (rc)
                 goto out_post;
+       dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
+                        cdev->ll2->rx_size, DMA_FROM_DEVICE);
  
         skb = build_skb(buffer->data, 0);
         if (!skb) {
@@ -474,7 +476,7 @@ qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn,
  static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
                                       struct qed_ll2_info *p_ll2_conn,
                                       union core_rx_cqe_union *p_cqe,
-                                     unsigned long lock_flags,
+                                     unsigned long *p_lock_flags,
                                       bool b_last_cqe)
  {
         struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
@@ -495,10 +497,10 @@ static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
                           "Mismatch between active_descq and the LL2 Rx chain\n");
         list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
  
-       spin_unlock_irqrestore(&p_rx->lock, lock_flags);
+       spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
         qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id,
                                     p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe);
-       spin_lock_irqsave(&p_rx->lock, lock_flags);
+       spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
  
         return 0;
  }
@@ -538,7 +540,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
                         break;
                 case CORE_RX_CQE_TYPE_REGULAR:
                         rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn,
-                                                       cqe, flags, b_last_cqe);
+                                                       cqe, &flags,
+                                                       b_last_cqe);
                         break;
                 default:
                         rc = -EIO;
@@ -968,7 +971,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev,
  {
         struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
         u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
-       struct qed_ll2_conn ll2_info;
+       struct qed_ll2_conn ll2_info = { 0 };
         int rc;
  
         ll2_info.conn_type = QED_LL2_TYPE_ISCSI_OOO;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c

index 7d731c6cb8923dd927a7bbaafa3a3a97237ba652..378afce58b3f0abd4c2a3bd43f403a50e1a9a1da 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
@@ -159,6 +159,8 @@ struct qed_ooo_info *qed_ooo_alloc(struct qed_hwfn *p_hwfn)
         if (!p_ooo_info->ooo_history.p_cqes)
                 goto no_history_mem;
  
+       p_ooo_info->ooo_history.num_of_cqes = QED_MAX_NUM_OOO_HISTORY_ENTRIES;
+
         return p_ooo_info;
  
  no_history_mem:
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h

index 6d31f92ef2b6340642eca02039266aace70aefdd..84ac50f92c9c5167adfc5e295139a7a2d42a1eb3 100644 (file)
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -1162,8 +1162,8 @@ struct ob_mac_tso_iocb_rsp {
  struct ib_mac_iocb_rsp {
         u8 opcode;              /* 0x20 */
         u8 flags1;
-#define IB_MAC_IOCB_RSP_OI     0x01    /* Overide intr delay */
-#define IB_MAC_IOCB_RSP_I      0x02    /* Disble Intr Generation */
+#define IB_MAC_IOCB_RSP_OI     0x01    /* Override intr delay */
+#define IB_MAC_IOCB_RSP_I      0x02    /* Disable Intr Generation */
  #define IB_MAC_CSUM_ERR_MASK 0x1c      /* A mask to use for csum errs */
  #define IB_MAC_IOCB_RSP_TE     0x04    /* Checksum error */
  #define IB_MAC_IOCB_RSP_NU     0x08    /* No checksum rcvd */
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c

index 8cfc4a54f2dc69240ae1fc42195ef854e0b1c2c9..3cd7989c007dfe46947e2ddb366a904f1af90198 100644 (file)
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1516,11 +1516,12 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                 spin_unlock_irqrestore(&priv->lock, flags);
                 return NETDEV_TX_BUSY;
         }
-       entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC);
-       priv->tx_skb[q][entry / NUM_TX_DESC] = skb;
  
         if (skb_put_padto(skb, ETH_ZLEN))
-               goto drop;
+               goto exit;
+
+       entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC);
+       priv->tx_skb[q][entry / NUM_TX_DESC] = skb;
  
         buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) +
                  entry / NUM_TX_DESC * DPTR_ALIGN;
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c

index 54248775f227b062addf85044f486ad4512039f5..f68c4db656eda84691b411cee940528a01a2bb62 100644 (file)
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1127,12 +1127,70 @@ static struct mdiobb_ops bb_ops = {
         .get_mdio_data = sh_get_mdio,
  };
  
+/* free Tx skb function */
+static int sh_eth_tx_free(struct net_device *ndev, bool sent_only)
+{
+       struct sh_eth_private *mdp = netdev_priv(ndev);
+       struct sh_eth_txdesc *txdesc;
+       int free_num = 0;
+       int entry;
+       bool sent;
+
+       for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) {
+               entry = mdp->dirty_tx % mdp->num_tx_ring;
+               txdesc = &mdp->tx_ring[entry];
+               sent = !(txdesc->status & cpu_to_le32(TD_TACT));
+               if (sent_only && !sent)
+                       break;
+               /* TACT bit must be checked before all the following reads */
+               dma_rmb();
+               netif_info(mdp, tx_done, ndev,
+                          "tx entry %d status 0x%08x\n",
+                          entry, le32_to_cpu(txdesc->status));
+               /* Free the original skb. */
+               if (mdp->tx_skbuff[entry]) {
+                       dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr),
+                                        le32_to_cpu(txdesc->len) >> 16,
+                                        DMA_TO_DEVICE);
+                       dev_kfree_skb_irq(mdp->tx_skbuff[entry]);
+                       mdp->tx_skbuff[entry] = NULL;
+                       free_num++;
+               }
+               txdesc->status = cpu_to_le32(TD_TFP);
+               if (entry >= mdp->num_tx_ring - 1)
+                       txdesc->status |= cpu_to_le32(TD_TDLE);
+
+               if (sent) {
+                       ndev->stats.tx_packets++;
+                       ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16;
+               }
+       }
+       return free_num;
+}
+
  /* free skb and descriptor buffer */
  static void sh_eth_ring_free(struct net_device *ndev)
  {
         struct sh_eth_private *mdp = netdev_priv(ndev);
         int ringsize, i;
  
+       if (mdp->rx_ring) {
+               for (i = 0; i < mdp->num_rx_ring; i++) {
+                       if (mdp->rx_skbuff[i]) {
+                               struct sh_eth_rxdesc *rxdesc = &mdp->rx_ring[i];
+
+                               dma_unmap_single(&ndev->dev,
+                                                le32_to_cpu(rxdesc->addr),
+                                                ALIGN(mdp->rx_buf_sz, 32),
+                                                DMA_FROM_DEVICE);
+                       }
+               }
+               ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring;
+               dma_free_coherent(NULL, ringsize, mdp->rx_ring,
+                                 mdp->rx_desc_dma);
+               mdp->rx_ring = NULL;
+       }
+
         /* Free Rx skb ringbuffer */
         if (mdp->rx_skbuff) {
                 for (i = 0; i < mdp->num_rx_ring; i++)
@@ -1141,27 +1199,18 @@ static void sh_eth_ring_free(struct net_device *ndev)
         kfree(mdp->rx_skbuff);
         mdp->rx_skbuff = NULL;
  
-       /* Free Tx skb ringbuffer */
-       if (mdp->tx_skbuff) {
-               for (i = 0; i < mdp->num_tx_ring; i++)
-                       dev_kfree_skb(mdp->tx_skbuff[i]);
-       }
-       kfree(mdp->tx_skbuff);
-       mdp->tx_skbuff = NULL;
-
-       if (mdp->rx_ring) {
-               ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring;
-               dma_free_coherent(NULL, ringsize, mdp->rx_ring,
-                                 mdp->rx_desc_dma);
-               mdp->rx_ring = NULL;
-       }
-
         if (mdp->tx_ring) {
+               sh_eth_tx_free(ndev, false);
+
                 ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring;
                 dma_free_coherent(NULL, ringsize, mdp->tx_ring,
                                   mdp->tx_desc_dma);
                 mdp->tx_ring = NULL;
         }
+
+       /* Free Tx skb ringbuffer */
+       kfree(mdp->tx_skbuff);
+       mdp->tx_skbuff = NULL;
  }
  
  /* format skb and descriptor buffer */
@@ -1409,43 +1458,6 @@ static void sh_eth_dev_exit(struct net_device *ndev)
         update_mac_address(ndev);
  }
  
-/* free Tx skb function */
-static int sh_eth_txfree(struct net_device *ndev)
-{
-       struct sh_eth_private *mdp = netdev_priv(ndev);
-       struct sh_eth_txdesc *txdesc;
-       int free_num = 0;
-       int entry;
-
-       for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) {
-               entry = mdp->dirty_tx % mdp->num_tx_ring;
-               txdesc = &mdp->tx_ring[entry];
-               if (txdesc->status & cpu_to_le32(TD_TACT))
-                       break;
-               /* TACT bit must be checked before all the following reads */
-               dma_rmb();
-               netif_info(mdp, tx_done, ndev,
-                          "tx entry %d status 0x%08x\n",
-                          entry, le32_to_cpu(txdesc->status));
-               /* Free the original skb. */
-               if (mdp->tx_skbuff[entry]) {
-                       dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr),
-                                        le32_to_cpu(txdesc->len) >> 16,
-                                        DMA_TO_DEVICE);
-                       dev_kfree_skb_irq(mdp->tx_skbuff[entry]);
-                       mdp->tx_skbuff[entry] = NULL;
-                       free_num++;
-               }
-               txdesc->status = cpu_to_le32(TD_TFP);
-               if (entry >= mdp->num_tx_ring - 1)
-                       txdesc->status |= cpu_to_le32(TD_TDLE);
-
-               ndev->stats.tx_packets++;
-               ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16;
-       }
-       return free_num;
-}
-
  /* Packet receive function */
  static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
  {
@@ -1690,7 +1702,7 @@ static void sh_eth_error(struct net_device *ndev, u32 intr_status)
                            intr_status, mdp->cur_tx, mdp->dirty_tx,
                            (u32)ndev->state, edtrr);
                 /* dirty buffer free */
-               sh_eth_txfree(ndev);
+               sh_eth_tx_free(ndev, true);
  
                 /* SH7712 BUG */
                 if (edtrr ^ sh_eth_get_edtrr_trns(mdp)) {
@@ -1751,7 +1763,7 @@ static irqreturn_t sh_eth_interrupt(int irq, void *netdev)
                 /* Clear Tx interrupts */
                 sh_eth_write(ndev, intr_status & cd->tx_check, EESR);
  
-               sh_eth_txfree(ndev);
+               sh_eth_tx_free(ndev, true);
                 netif_wake_queue(ndev);
         }
  
@@ -2412,7 +2424,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev)
  
         spin_lock_irqsave(&mdp->lock, flags);
         if ((mdp->cur_tx - mdp->dirty_tx) >= (mdp->num_tx_ring - 4)) {
-               if (!sh_eth_txfree(ndev)) {
+               if (!sh_eth_tx_free(ndev, true)) {
                         netif_warn(mdp, tx_queued, ndev, "TxFD exhausted.\n");
                         netif_stop_queue(ndev);
                         spin_unlock_irqrestore(&mdp->lock, flags);
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c

index 7cd76b6b5cb9f6c1c05f09b509be7e11a79b0478..2ae85245478087d2d640617bd79bfbfabd5f0763 100644 (file)
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -2216,18 +2216,15 @@ static int ofdpa_port_stp_update(struct ofdpa_port *ofdpa_port,
  {
         bool want[OFDPA_CTRL_MAX] = { 0, };
         bool prev_ctrls[OFDPA_CTRL_MAX];
-       u8 uninitialized_var(prev_state);
+       u8 prev_state;
         int err;
         int i;
  
-       if (switchdev_trans_ph_prepare(trans)) {
-               memcpy(prev_ctrls, ofdpa_port->ctrls, sizeof(prev_ctrls));
-               prev_state = ofdpa_port->stp_state;
-       }
-
-       if (ofdpa_port->stp_state == state)
+       prev_state = ofdpa_port->stp_state;
+       if (prev_state == state)
                 return 0;
  
+       memcpy(prev_ctrls, ofdpa_port->ctrls, sizeof(prev_ctrls));
         ofdpa_port->stp_state = state;
  
         switch (state) {
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c

index 334bcc6df6b2ba90a43da4baf7b44cc5ebfa1bac..b9cb697b281847a83aa511c577a6c790516f8012 100644 (file)
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1371,6 +1371,13 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
                 free_cpumask_var(thread_mask);
         }
  
+       if (count > EFX_MAX_RX_QUEUES) {
+               netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
+                              "Reducing number of rx queues from %u to %u.\n",
+                              count, EFX_MAX_RX_QUEUES);
+               count = EFX_MAX_RX_QUEUES;
+       }
+
         /* If RSS is requested for the PF *and* VFs then we can't write RSS
          * table entries that are inaccessible to VFs
          */
@@ -2404,7 +2411,7 @@ static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *t
         tnl.type = (u16)efx_tunnel_type;
         tnl.port = ti->port;
  
-       if (efx->type->udp_tnl_add_port)
+       if (efx->type->udp_tnl_del_port)
                 (void)efx->type->udp_tnl_del_port(efx, tnl);
  }
  
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h

index ee14662415c5dfc827a02cb577794af495b0433c..a0c52e3281024b566dfe4b58e3014f26aadb0eaf 100644 (file)
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -74,7 +74,10 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
  #define EFX_RXQ_MIN_ENT                128U
  #define EFX_TXQ_MIN_ENT(efx)   (2 * efx_tx_max_skb_descs(efx))
  
-#define EFX_TXQ_MAX_ENT(efx)   (EFX_WORKAROUND_35388(efx) ? \
+/* All EF10 architecture NICs steal one bit of the DMAQ size for various
+ * other purposes when counting TxQ entries, so we halve the queue size.
+ */
+#define EFX_TXQ_MAX_ENT(efx)   (EFX_WORKAROUND_EF10(efx) ? \
                                  EFX_MAX_DMAQ_SIZE / 2 : EFX_MAX_DMAQ_SIZE)
  
  static inline bool efx_rss_enabled(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c

index f5e5cd1659a148fb63ce2078ef13a5ae12d048bc..29614da91cbf919f91841d8e644ab4b246741ec7 100644 (file)
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -1354,6 +1354,13 @@ static unsigned int ef4_wanted_parallelism(struct ef4_nic *efx)
                 free_cpumask_var(thread_mask);
         }
  
+       if (count > EF4_MAX_RX_QUEUES) {
+               netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
+                              "Reducing number of rx queues from %u to %u.\n",
+                              count, EF4_MAX_RX_QUEUES);
+               count = EF4_MAX_RX_QUEUES;
+       }
+
         return count;
  }
  
diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h

index 103f827a16231e058160ea8e283adc51823a1928..c67fa18b8121091de9396b29eea68f9134206ce2 100644 (file)
--- a/drivers/net/ethernet/sfc/workarounds.h
+++ b/drivers/net/ethernet/sfc/workarounds.h
@@ -16,6 +16,7 @@
   */
  
  #define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0)
+#define EFX_WORKAROUND_EF10(efx) (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
  #define EFX_WORKAROUND_10G(efx) 1
  
  /* Bit-bashed I2C reads cause performance drop */
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c

index 65077c77082a2f042117a0889c2b15099c58eae5..91e9bd7159ab37cab5731fef122345cb59341ba2 100644 (file)
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -1535,32 +1535,33 @@ static int smc_close(struct net_device *dev)
   * Ethtool support
   */
  static int
-smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_get_link_ksettings(struct net_device *dev,
+                              struct ethtool_link_ksettings *cmd)
  {
         struct smc_local *lp = netdev_priv(dev);
         int ret;
  
-       cmd->maxtxpkt = 1;
-       cmd->maxrxpkt = 1;
-
         if (lp->phy_type != 0) {
                 spin_lock_irq(&lp->lock);
-               ret = mii_ethtool_gset(&lp->mii, cmd);
+               ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
                 spin_unlock_irq(&lp->lock);
         } else {
-               cmd->supported = SUPPORTED_10baseT_Half |
+               u32 supported = SUPPORTED_10baseT_Half |
                                  SUPPORTED_10baseT_Full |
                                  SUPPORTED_TP | SUPPORTED_AUI;
  
                 if (lp->ctl_rspeed == 10)
-                       ethtool_cmd_speed_set(cmd, SPEED_10);
+                       cmd->base.speed = SPEED_10;
                 else if (lp->ctl_rspeed == 100)
-                       ethtool_cmd_speed_set(cmd, SPEED_100);
+                       cmd->base.speed = SPEED_100;
+
+               cmd->base.autoneg = AUTONEG_DISABLE;
+               cmd->base.port = 0;
+               cmd->base.duplex = lp->tcr_cur_mode & TCR_SWFDUP ?
+                       DUPLEX_FULL : DUPLEX_HALF;
  
-               cmd->autoneg = AUTONEG_DISABLE;
-               cmd->transceiver = XCVR_INTERNAL;
-               cmd->port = 0;
-               cmd->duplex = lp->tcr_cur_mode & TCR_SWFDUP ? DUPLEX_FULL : DUPLEX_HALF;
+               ethtool_convert_legacy_u32_to_link_mode(
+                       cmd->link_modes.supported, supported);
  
                 ret = 0;
         }
@@ -1569,24 +1570,26 @@ smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
  }
  
  static int
-smc_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_set_link_ksettings(struct net_device *dev,
+                              const struct ethtool_link_ksettings *cmd)
  {
         struct smc_local *lp = netdev_priv(dev);
         int ret;
  
         if (lp->phy_type != 0) {
                 spin_lock_irq(&lp->lock);
-               ret = mii_ethtool_sset(&lp->mii, cmd);
+               ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd);
                 spin_unlock_irq(&lp->lock);
         } else {
-               if (cmd->autoneg != AUTONEG_DISABLE ||
-                   cmd->speed != SPEED_10 ||
-                   (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) ||
-                   (cmd->port != PORT_TP && cmd->port != PORT_AUI))
+               if (cmd->base.autoneg != AUTONEG_DISABLE ||
+                   cmd->base.speed != SPEED_10 ||
+                   (cmd->base.duplex != DUPLEX_HALF &&
+                    cmd->base.duplex != DUPLEX_FULL) ||
+                   (cmd->base.port != PORT_TP && cmd->base.port != PORT_AUI))
                         return -EINVAL;
  
-//             lp->port = cmd->port;
-               lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL;
+//             lp->port = cmd->base.port;
+               lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL;
  
  //             if (netif_running(dev))
  //                     smc_set_port(dev);
@@ -1744,8 +1747,6 @@ static int smc_ethtool_seteeprom(struct net_device *dev,
  
  
  static const struct ethtool_ops smc_ethtool_ops = {
-       .get_settings   = smc_ethtool_getsettings,
-       .set_settings   = smc_ethtool_setsettings,
         .get_drvinfo    = smc_ethtool_getdrvinfo,
  
         .get_msglevel   = smc_ethtool_getmsglevel,
@@ -1755,6 +1756,8 @@ static const struct ethtool_ops smc_ethtool_ops = {
         .get_eeprom_len = smc_ethtool_geteeprom_len,
         .get_eeprom     = smc_ethtool_geteeprom,
         .set_eeprom     = smc_ethtool_seteeprom,
+       .get_link_ksettings     = smc_ethtool_get_link_ksettings,
+       .set_link_ksettings     = smc_ethtool_set_link_ksettings,
  };
  
  static const struct net_device_ops smc_netdev_ops = {
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig

index 296c8efd0038c8f66f41e9a58e30920462272192..48a541eb0af20f9a0db46c0b7a94cdcc709b0b04 100644 (file)
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -74,15 +74,23 @@ config TI_CPSW
           will be called cpsw.
  
  config TI_CPTS
-       tristate "TI Common Platform Time Sync (CPTS) Support"
+       bool "TI Common Platform Time Sync (CPTS) Support"
         depends on TI_CPSW || TI_KEYSTONE_NETCP
-       imply PTP_1588_CLOCK
+       depends on POSIX_TIMERS
         ---help---
           This driver supports the Common Platform Time Sync unit of
           the CPSW Ethernet Switch and Keystone 2 1g/10g Switch Subsystem.
           The unit can time stamp PTP UDP/IPv4 and Layer 2 packets, and the
           driver offers a PTP Hardware Clock.
  
+config TI_CPTS_MOD
+       tristate
+       depends on TI_CPTS
+       default y if TI_CPSW=y || TI_KEYSTONE_NETCP=y
+       select NET_PTP_CLASSIFY
+       imply PTP_1588_CLOCK
+       default m
+
  config TI_KEYSTONE_NETCP
         tristate "TI Keystone NETCP Core Support"
         select TI_CPSW_ALE
diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile

index 1e7c10bf87132cda8e9c7ef2afc118ba1725388e..10e6b0ce51baf3115b8c72d40e933f10873186f0 100644 (file)
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile
@@ -12,7 +12,7 @@ obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o
  obj-$(CONFIG_TI_DAVINCI_CPDMA) += davinci_cpdma.o
  obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o
  obj-$(CONFIG_TI_CPSW_ALE) += cpsw_ale.o
-obj-$(CONFIG_TI_CPTS) += cpts.o
+obj-$(CONFIG_TI_CPTS_MOD) += cpts.o
  obj-$(CONFIG_TI_CPSW) += ti_cpsw.o
  ti_cpsw-y := cpsw.o
  
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c

index 9f3d9c67e3fe0f50b2d1119e74b7eac4b93e8bae..fa674a8bda0c8ff43d19699fefdd0ba718e75c90 100644 (file)
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1267,6 +1267,7 @@ static void soft_reset_slave(struct cpsw_slave *slave)
  static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
  {
         u32 slave_port;
+       struct phy_device *phy;
         struct cpsw_common *cpsw = priv->cpsw;
  
         soft_reset_slave(slave);
@@ -1300,27 +1301,28 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
                                    1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
  
         if (slave->data->phy_node) {
-               slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node,
+               phy = of_phy_connect(priv->ndev, slave->data->phy_node,
                                  &cpsw_adjust_link, 0, slave->data->phy_if);
-               if (!slave->phy) {
+               if (!phy) {
                         dev_err(priv->dev, "phy \"%s\" not found on slave %d\n",
                                 slave->data->phy_node->full_name,
                                 slave->slave_num);
                         return;
                 }
         } else {
-               slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
+               phy = phy_connect(priv->ndev, slave->data->phy_id,
                                  &cpsw_adjust_link, slave->data->phy_if);
-               if (IS_ERR(slave->phy)) {
+               if (IS_ERR(phy)) {
                         dev_err(priv->dev,
                                 "phy \"%s\" not found on slave %d, err %ld\n",
                                 slave->data->phy_id, slave->slave_num,
-                               PTR_ERR(slave->phy));
-                       slave->phy = NULL;
+                               PTR_ERR(phy));
                         return;
                 }
         }
  
+       slave->phy = phy;
+
         phy_attached_info(slave->phy);
  
         phy_start(slave->phy);
@@ -1817,6 +1819,8 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev)
         }
  
         cpsw_intr_enable(cpsw);
+       netif_trans_update(ndev);
+       netif_tx_wake_all_queues(ndev);
  }
  
  static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c

index a45f98fa4aa70a6ce0c693bef2fda248754a313b..3dadee1080b9e2e541d4b1a335671eb40d8c8205 100644 (file)
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -1017,8 +1017,8 @@ tc35815_free_queues(struct net_device *dev)
                         BUG_ON(lp->tx_skbs[i].skb != skb);
  #endif
                         if (skb) {
-                               dev_kfree_skb(skb);
                                 pci_unmap_single(lp->pci_dev, lp->tx_skbs[i].skb_dma, skb->len, PCI_DMA_TODEVICE);
+                               dev_kfree_skb(skb);
                                 lp->tx_skbs[i].skb = NULL;
                                 lp->tx_skbs[i].skb_dma = 0;
                         }
diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c

index b75d9cdcfb0c415c7abeaa97cebeacbc917e1abd..ae48c809bac9fe13b0a92e086f0a1c6a4cf6feaf 100644 (file)
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -45,6 +45,8 @@ MODULE_DESCRIPTION("FUJITSU Extended Socket Network Device Driver");
  MODULE_LICENSE("GPL");
  MODULE_VERSION(DRV_VERSION);
  
+#define ACPI_MOTHERBOARD_RESOURCE_HID "PNP0C02"
+
  static int fjes_request_irq(struct fjes_adapter *);
  static void fjes_free_irq(struct fjes_adapter *);
  
@@ -78,7 +80,7 @@ static void fjes_rx_irq(struct fjes_adapter *, int);
  static int fjes_poll(struct napi_struct *, int);
  
  static const struct acpi_device_id fjes_acpi_ids[] = {
-       {"PNP0C02", 0},
+       {ACPI_MOTHERBOARD_RESOURCE_HID, 0},
         {"", 0},
  };
  MODULE_DEVICE_TABLE(acpi, fjes_acpi_ids);
@@ -115,18 +117,17 @@ static struct resource fjes_resource[] = {
         },
  };
  
-static int fjes_acpi_add(struct acpi_device *device)
+static bool is_extended_socket_device(struct acpi_device *device)
  {
         struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
         char str_buf[sizeof(FJES_ACPI_SYMBOL) + 1];
-       struct platform_device *plat_dev;
         union acpi_object *str;
         acpi_status status;
         int result;
  
         status = acpi_evaluate_object(device->handle, "_STR", NULL, &buffer);
         if (ACPI_FAILURE(status))
-               return -ENODEV;
+               return false;
  
         str = buffer.pointer;
         result = utf16s_to_utf8s((wchar_t *)str->string.pointer,
@@ -136,10 +137,42 @@ static int fjes_acpi_add(struct acpi_device *device)
  
         if (strncmp(FJES_ACPI_SYMBOL, str_buf, strlen(FJES_ACPI_SYMBOL)) != 0) {
                 kfree(buffer.pointer);
-               return -ENODEV;
+               return false;
         }
         kfree(buffer.pointer);
  
+       return true;
+}
+
+static int acpi_check_extended_socket_status(struct acpi_device *device)
+{
+       unsigned long long sta;
+       acpi_status status;
+
+       status = acpi_evaluate_integer(device->handle, "_STA", NULL, &sta);
+       if (ACPI_FAILURE(status))
+               return -ENODEV;
+
+       if (!((sta & ACPI_STA_DEVICE_PRESENT) &&
+             (sta & ACPI_STA_DEVICE_ENABLED) &&
+             (sta & ACPI_STA_DEVICE_UI) &&
+             (sta & ACPI_STA_DEVICE_FUNCTIONING)))
+               return -ENODEV;
+
+       return 0;
+}
+
+static int fjes_acpi_add(struct acpi_device *device)
+{
+       struct platform_device *plat_dev;
+       acpi_status status;
+
+       if (!is_extended_socket_device(device))
+               return -ENODEV;
+
+       if (acpi_check_extended_socket_status(device))
+               return -ENODEV;
+
         status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
                                      fjes_get_acpi_resource, fjes_resource);
         if (ACPI_FAILURE(status))
@@ -1316,7 +1349,7 @@ static void fjes_netdev_setup(struct net_device *netdev)
         netdev->min_mtu = fjes_support_mtu[0];
         netdev->max_mtu = fjes_support_mtu[3];
         netdev->flags |= IFF_BROADCAST;
-       netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER;
+       netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
  }
  
  static void fjes_irq_watch_task(struct work_struct *work)
@@ -1473,11 +1506,44 @@ static void fjes_watch_unshare_task(struct work_struct *work)
         }
  }
  
+static acpi_status
+acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level,
+                                void *context, void **return_value)
+{
+       struct acpi_device *device;
+       bool *found = context;
+       int result;
+
+       result = acpi_bus_get_device(obj_handle, &device);
+       if (result)
+               return AE_OK;
+
+       if (strcmp(acpi_device_hid(device), ACPI_MOTHERBOARD_RESOURCE_HID))
+               return AE_OK;
+
+       if (!is_extended_socket_device(device))
+               return AE_OK;
+
+       if (acpi_check_extended_socket_status(device))
+               return AE_OK;
+
+       *found = true;
+       return AE_CTRL_TERMINATE;
+}
+
  /* fjes_init_module - Driver Registration Routine */
  static int __init fjes_init_module(void)
  {
+       bool found = false;
         int result;
  
+       acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
+                           acpi_find_extended_socket_device, NULL, &found,
+                           NULL);
+
+       if (!found)
+               return -ENODEV;
+
         pr_info("%s - version %s - %s\n",
                 fjes_driver_string, fjes_driver_version, fjes_copyright);
  
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h

index d3e73ac158aee6d3958b18618b85f0180f22abf7..db23cb36ae5cb9ec50493b00329f276ecdd22ae9 100644 (file)
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -700,6 +700,8 @@ struct net_device_context {
  
         u32 tx_checksum_mask;
  
+       u32 tx_send_table[VRSS_SEND_TAB_SIZE];
+
         /* Ethtool settings */
         u8 duplex;
         u32 speed;
@@ -749,7 +751,6 @@ struct netvsc_device {
         u32 send_section_cnt;
         u32 send_section_size;
         unsigned long *send_section_map;
-       int map_words;
  
         /* Used for NetVSP initialization protocol */
         struct completion channel_init_wait;
@@ -757,7 +758,6 @@ struct netvsc_device {
  
         struct nvsp_message revoke_packet;
  
-       u32 send_table[VRSS_SEND_TAB_SIZE];
         u32 max_chn;
         u32 num_chn;
         spinlock_t sc_lock; /* Protects num_sc_offered variable */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c

index d35ebd993b385255eaa441fb67272c117e328e7f..15ef713d96c0887ec7929ff8d4be3ec3a6cac291 100644 (file)
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -236,6 +236,7 @@ static int netvsc_init_buf(struct hv_device *device)
         struct netvsc_device *net_device;
         struct nvsp_message *init_packet;
         struct net_device *ndev;
+       size_t map_words;
         int node;
  
         net_device = get_outbound_net_device(device);
@@ -401,11 +402,9 @@ static int netvsc_init_buf(struct hv_device *device)
                    net_device->send_section_size, net_device->send_section_cnt);
  
         /* Setup state for managing the send buffer. */
-       net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
-                                            BITS_PER_LONG);
+       map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
  
-       net_device->send_section_map = kcalloc(net_device->map_words,
-                                              sizeof(ulong), GFP_KERNEL);
+       net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
         if (net_device->send_section_map == NULL) {
                 ret = -ENOMEM;
                 goto cleanup;
@@ -683,7 +682,7 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
         unsigned long *map_addr = net_device->send_section_map;
         unsigned int i;
  
-       for_each_clear_bit(i, map_addr, net_device->map_words) {
+       for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
                 if (sync_test_and_set_bit(i, map_addr) == 0)
                         return i;
         }
@@ -1136,15 +1135,11 @@ static void netvsc_receive(struct net_device *ndev,
  static void netvsc_send_table(struct hv_device *hdev,
                               struct nvsp_message *nvmsg)
  {
-       struct netvsc_device *nvscdev;
         struct net_device *ndev = hv_get_drvdata(hdev);
+       struct net_device_context *net_device_ctx = netdev_priv(ndev);
         int i;
         u32 count, *tab;
  
-       nvscdev = get_outbound_net_device(hdev);
-       if (!nvscdev)
-               return;
-
         count = nvmsg->msg.v5_msg.send_table.count;
         if (count != VRSS_SEND_TAB_SIZE) {
                 netdev_err(ndev, "Received wrong send-table size:%u\n", count);
@@ -1155,7 +1150,7 @@ static void netvsc_send_table(struct hv_device *hdev,
                       nvmsg->msg.v5_msg.send_table.offset);
  
         for (i = 0; i < count; i++)
-               nvscdev->send_table[i] = tab[i];
+               net_device_ctx->tx_send_table[i] = tab[i];
  }
  
  static void netvsc_send_vf(struct net_device_context *net_device_ctx,
@@ -1235,8 +1230,11 @@ void netvsc_channel_cb(void *context)
                 return;
  
         net_device = net_device_to_netvsc_device(ndev);
-       if (unlikely(net_device->destroy) &&
-           netvsc_channel_idle(net_device, q_idx))
+       if (unlikely(!net_device))
+               return;
+
+       if (unlikely(net_device->destroy &&
+                    netvsc_channel_idle(net_device, q_idx)))
                 return;
  
         /* commit_rd_index() -> hv_signal_on_read() needs this. */
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c

index bc05c895d9589deccd24f1013831036da75e4d1b..5ede87f30463e8211ef2828a8f74d4951c4166a6 100644 (file)
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -206,17 +206,15 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
                         void *accel_priv, select_queue_fallback_t fallback)
  {
         struct net_device_context *net_device_ctx = netdev_priv(ndev);
-       struct netvsc_device *nvsc_dev = net_device_ctx->nvdev;
+       unsigned int num_tx_queues = ndev->real_num_tx_queues;
         struct sock *sk = skb->sk;
         int q_idx = sk_tx_queue_get(sk);
  
-       if (q_idx < 0 || skb->ooo_okay ||
-           q_idx >= ndev->real_num_tx_queues) {
+       if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) {
                 u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE);
                 int new_idx;
  
-               new_idx = nvsc_dev->send_table[hash]
-                       % nvsc_dev->num_chn;
+               new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues;
  
                 if (q_idx != new_idx && sk &&
                     sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
@@ -225,9 +223,6 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
                 q_idx = new_idx;
         }
  
-       if (unlikely(!nvsc_dev->chan_table[q_idx].channel))
-               q_idx = 0;
-
         return q_idx;
  }
  
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c

index ffedad2a360afb8154f0e1e8b9be0c1d388fa5d9..15b920086251633e2ee0ee0b26ba0046859e5299 100644 (file)
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -418,8 +418,9 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr
                 memset(rd, 0, sizeof(*rd));
                 rd->hw = hwmap + i;
                 rd->buf = kmalloc(len, GFP_KERNEL|GFP_DMA);
-               if (rd->buf == NULL ||
-                   !(busaddr = pci_map_single(pdev, rd->buf, len, dir))) {
+               if (rd->buf)
+                       busaddr = pci_map_single(pdev, rd->buf, len, dir);
+               if (rd->buf == NULL || pci_dma_mapping_error(pdev, busaddr)) {
                         if (rd->buf) {
                                 net_err_ratelimited("%s: failed to create PCI-MAP for %p\n",
                                                     __func__, rd->buf);
@@ -430,8 +431,7 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr
                                 rd = r->rd + j;
                                 busaddr = rd_get_addr(rd);
                                 rd_set_addr_status(rd, 0, 0);
-                               if (busaddr)
-                                       pci_unmap_single(pdev, busaddr, len, dir);
+                               pci_unmap_single(pdev, busaddr, len, dir);
                                 kfree(rd->buf);
                                 rd->buf = NULL;
                         }
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c

index ff0a5ed3ca803551a0350303af44976d0f47dcfc..49ce4e9f4a0f387dac1792252ecd5044b6373cd4 100644 (file)
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -617,7 +617,8 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err)
  
  static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm,
                                              unsigned char **iv,
-                                            struct scatterlist **sg)
+                                            struct scatterlist **sg,
+                                            int num_frags)
  {
         size_t size, iv_offset, sg_offset;
         struct aead_request *req;
@@ -629,7 +630,7 @@ static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm,
  
         size = ALIGN(size, __alignof__(struct scatterlist));
         sg_offset = size;
-       size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1);
+       size += sizeof(struct scatterlist) * num_frags;
  
         tmp = kmalloc(size, GFP_ATOMIC);
         if (!tmp)
@@ -649,6 +650,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
  {
         int ret;
         struct scatterlist *sg;
+       struct sk_buff *trailer;
         unsigned char *iv;
         struct ethhdr *eth;
         struct macsec_eth_header *hh;
@@ -723,7 +725,14 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
                 return ERR_PTR(-EINVAL);
         }
  
-       req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg);
+       ret = skb_cow_data(skb, 0, &trailer);
+       if (unlikely(ret < 0)) {
+               macsec_txsa_put(tx_sa);
+               kfree_skb(skb);
+               return ERR_PTR(ret);
+       }
+
+       req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg, ret);
         if (!req) {
                 macsec_txsa_put(tx_sa);
                 kfree_skb(skb);
@@ -732,7 +741,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
  
         macsec_fill_iv(iv, secy->sci, pn);
  
-       sg_init_table(sg, MAX_SKB_FRAGS + 1);
+       sg_init_table(sg, ret);
         skb_to_sgvec(skb, sg, 0, skb->len);
  
         if (tx_sc->encrypt) {
@@ -917,6 +926,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
  {
         int ret;
         struct scatterlist *sg;
+       struct sk_buff *trailer;
         unsigned char *iv;
         struct aead_request *req;
         struct macsec_eth_header *hdr;
@@ -927,7 +937,12 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
         if (!skb)
                 return ERR_PTR(-ENOMEM);
  
-       req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg);
+       ret = skb_cow_data(skb, 0, &trailer);
+       if (unlikely(ret < 0)) {
+               kfree_skb(skb);
+               return ERR_PTR(ret);
+       }
+       req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg, ret);
         if (!req) {
                 kfree_skb(skb);
                 return ERR_PTR(-ENOMEM);
@@ -936,7 +951,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
         hdr = (struct macsec_eth_header *)skb->data;
         macsec_fill_iv(iv, sci, ntohl(hdr->packet_number));
  
-       sg_init_table(sg, MAX_SKB_FRAGS + 1);
+       sg_init_table(sg, ret);
         skb_to_sgvec(skb, sg, 0, skb->len);
  
         if (hdr->tci_an & MACSEC_TCI_E) {
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c

index 9261722960a719a8e6d46f4ea5b39f500a0817e8..b34eaaae03fd3f289aab4a90b11c05c587858ad2 100644 (file)
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1139,6 +1139,7 @@ static int macvlan_port_create(struct net_device *dev)
  static void macvlan_port_destroy(struct net_device *dev)
  {
         struct macvlan_port *port = macvlan_port_get_rtnl(dev);
+       struct sk_buff *skb;
  
         dev->priv_flags &= ~IFF_MACVLAN_PORT;
         netdev_rx_handler_unregister(dev);
@@ -1147,7 +1148,15 @@ static void macvlan_port_destroy(struct net_device *dev)
          * but we need to cancel it and purge left skbs if any.
          */
         cancel_work_sync(&port->bc_work);
-       __skb_queue_purge(&port->bc_queue);
+
+       while ((skb = __skb_dequeue(&port->bc_queue))) {
+               const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src;
+
+               if (src)
+                       dev_put(src->dev);
+
+               kfree_skb(skb);
+       }
  
         kfree(port);
  }
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c

index e2460a57e4b1105ed398e207aa8cdfd84d03707d..ed0d10f54f2607533868dfd10e6bc9d0e09050de 100644 (file)
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -1438,8 +1438,6 @@ static bool dp83640_rxtstamp(struct phy_device *phydev,
                 skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT;
                 skb_queue_tail(&dp83640->rx_queue, skb);
                 schedule_delayed_work(&dp83640->ts_work, SKB_TIMESTAMP_TIMEOUT);
-       } else {
-               netif_rx_ni(skb);
         }
  
         return true;
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c

index f9d0fa315a47624409cb054e762a6c8b6537a7b6..272b051a019975110aa1d117da993cf18cb98816 100644 (file)
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1883,17 +1883,6 @@ static int m88e1510_probe(struct phy_device *phydev)
         return m88e1510_hwmon_probe(phydev);
  }
  
-static void marvell_remove(struct phy_device *phydev)
-{
-#ifdef CONFIG_HWMON
-
-       struct marvell_priv *priv = phydev->priv;
-
-       if (priv && priv->hwmon_dev)
-               hwmon_device_unregister(priv->hwmon_dev);
-#endif
-}
-
  static struct phy_driver marvell_drivers[] = {
         {
                 .phy_id = MARVELL_PHY_ID_88E1101,
@@ -1974,7 +1963,6 @@ static struct phy_driver marvell_drivers[] = {
                 .features = PHY_GBIT_FEATURES,
                 .flags = PHY_HAS_INTERRUPT,
                 .probe = &m88e1121_probe,
-               .remove = &marvell_remove,
                 .config_init = &m88e1121_config_init,
                 .config_aneg = &m88e1121_config_aneg,
                 .read_status = &marvell_read_status,
@@ -2087,7 +2075,6 @@ static struct phy_driver marvell_drivers[] = {
                 .features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE,
                 .flags = PHY_HAS_INTERRUPT,
                 .probe = &m88e1510_probe,
-               .remove = &marvell_remove,
                 .config_init = &m88e1510_config_init,
                 .config_aneg = &m88e1510_config_aneg,
                 .read_status = &marvell_read_status,
@@ -2109,7 +2096,6 @@ static struct phy_driver marvell_drivers[] = {
                 .features = PHY_GBIT_FEATURES,
                 .flags = PHY_HAS_INTERRUPT,
                 .probe = m88e1510_probe,
-               .remove = &marvell_remove,
                 .config_init = &marvell_config_init,
                 .config_aneg = &m88e1510_config_aneg,
                 .read_status = &marvell_read_status,
@@ -2127,7 +2113,6 @@ static struct phy_driver marvell_drivers[] = {
                 .phy_id_mask = MARVELL_PHY_ID_MASK,
                 .name = "Marvell 88E1545",
                 .probe = m88e1510_probe,
-               .remove = &marvell_remove,
                 .features = PHY_GBIT_FEATURES,
                 .flags = PHY_HAS_INTERRUPT,
                 .config_init = &marvell_config_init,
diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c

index 6b988f77da08fca5ba9e7efec8c4af354ad51ecc..61941e29daae85abe7cc8a5726d669eae3d29137 100644 (file)
--- a/drivers/net/phy/mdio-boardinfo.c
+++ b/drivers/net/phy/mdio-boardinfo.c
@@ -84,3 +84,4 @@ int mdiobus_register_board_info(const struct mdio_board_info *info,
  
         return 0;
  }
+EXPORT_SYMBOL(mdiobus_register_board_info);
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c

index 6742070ca676f57694a9a6cb11364941deb520a0..da5b392683703b9ece67a24ebcb59aadeba7cc8e 100644 (file)
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -297,17 +297,6 @@ static int kszphy_config_init(struct phy_device *phydev)
         if (priv->led_mode >= 0)
                 kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode);
  
-       if (phy_interrupt_is_valid(phydev)) {
-               int ctl = phy_read(phydev, MII_BMCR);
-
-               if (ctl < 0)
-                       return ctl;
-
-               ret = phy_write(phydev, MII_BMCR, ctl & ~BMCR_ANENABLE);
-               if (ret < 0)
-                       return ret;
-       }
-
         return 0;
  }
  
@@ -798,9 +787,6 @@ static struct phy_driver ksphy_driver[] = {
         .read_status    = genphy_read_status,
         .ack_interrupt  = kszphy_ack_interrupt,
         .config_intr    = kszphy_config_intr,
-       .get_sset_count = kszphy_get_sset_count,
-       .get_strings    = kszphy_get_strings,
-       .get_stats      = kszphy_get_stats,
         .suspend        = genphy_suspend,
         .resume         = genphy_resume,
  }, {
@@ -940,9 +926,6 @@ static struct phy_driver ksphy_driver[] = {
         .read_status    = genphy_read_status,
         .ack_interrupt  = kszphy_ack_interrupt,
         .config_intr    = kszphy_config_intr,
-       .get_sset_count = kszphy_get_sset_count,
-       .get_strings    = kszphy_get_strings,
-       .get_stats      = kszphy_get_stats,
         .suspend        = genphy_suspend,
         .resume         = genphy_resume,
  }, {
@@ -952,6 +935,7 @@ static struct phy_driver ksphy_driver[] = {
         .features       = PHY_GBIT_FEATURES,
         .flags          = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
         .driver_data    = &ksz9021_type,
+       .probe          = kszphy_probe,
         .config_init    = ksz9021_config_init,
         .config_aneg    = genphy_config_aneg,
         .read_status    = genphy_read_status,
@@ -971,6 +955,7 @@ static struct phy_driver ksphy_driver[] = {
         .features       = PHY_GBIT_FEATURES,
         .flags          = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
         .driver_data    = &ksz9021_type,
+       .probe          = kszphy_probe,
         .config_init    = ksz9031_config_init,
         .config_aneg    = genphy_config_aneg,
         .read_status    = ksz9031_read_status,
@@ -989,9 +974,6 @@ static struct phy_driver ksphy_driver[] = {
         .config_init    = kszphy_config_init,
         .config_aneg    = ksz8873mll_config_aneg,
         .read_status    = ksz8873mll_read_status,
-       .get_sset_count = kszphy_get_sset_count,
-       .get_strings    = kszphy_get_strings,
-       .get_stats      = kszphy_get_stats,
         .suspend        = genphy_suspend,
         .resume         = genphy_resume,
  }, {
@@ -1003,9 +985,6 @@ static struct phy_driver ksphy_driver[] = {
         .config_init    = kszphy_config_init,
         .config_aneg    = genphy_config_aneg,
         .read_status    = genphy_read_status,
-       .get_sset_count = kszphy_get_sset_count,
-       .get_strings    = kszphy_get_strings,
-       .get_stats      = kszphy_get_stats,
         .suspend        = genphy_suspend,
         .resume         = genphy_resume,
  }, {
@@ -1017,9 +996,6 @@ static struct phy_driver ksphy_driver[] = {
         .config_init    = kszphy_config_init,
         .config_aneg    = ksz8873mll_config_aneg,
         .read_status    = ksz8873mll_read_status,
-       .get_sset_count = kszphy_get_sset_count,
-       .get_strings    = kszphy_get_strings,
-       .get_stats      = kszphy_get_stats,
         .suspend        = genphy_suspend,
         .resume         = genphy_resume,
  } };
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c

index 1be69d8bc90948e82f92736b8f7ee9d274b9bd2b..97ff1278167bc455af890e9b6c16d7b9d659ea57 100644 (file)
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -591,16 +591,18 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
  EXPORT_SYMBOL(phy_mii_ioctl);
  
  /**
- * phy_start_aneg - start auto-negotiation for this PHY device
+ * phy_start_aneg_priv - start auto-negotiation for this PHY device
   * @phydev: the phy_device struct
+ * @sync: indicate whether we should wait for the workqueue cancelation
   *
   * Description: Sanitizes the settings (if we're not autonegotiating
   *   them), and then calls the driver's config_aneg function.
   *   If the PHYCONTROL Layer is operating, we change the state to
   *   reflect the beginning of Auto-negotiation or forcing.
   */
-int phy_start_aneg(struct phy_device *phydev)
+static int phy_start_aneg_priv(struct phy_device *phydev, bool sync)
  {
+       bool trigger = 0;
         int err;
  
         if (!phydev->drv)
@@ -628,10 +630,40 @@ int phy_start_aneg(struct phy_device *phydev)
                 }
         }
  
+       /* Re-schedule a PHY state machine to check PHY status because
+        * negotiation may already be done and aneg interrupt may not be
+        * generated.
+        */
+       if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) {
+               err = phy_aneg_done(phydev);
+               if (err > 0) {
+                       trigger = true;
+                       err = 0;
+               }
+       }
+
  out_unlock:
         mutex_unlock(&phydev->lock);
+
+       if (trigger)
+               phy_trigger_machine(phydev, sync);
+
         return err;
  }
+
+/**
+ * phy_start_aneg - start auto-negotiation for this PHY device
+ * @phydev: the phy_device struct
+ *
+ * Description: Sanitizes the settings (if we're not autonegotiating
+ *   them), and then calls the driver's config_aneg function.
+ *   If the PHYCONTROL Layer is operating, we change the state to
+ *   reflect the beginning of Auto-negotiation or forcing.
+ */
+int phy_start_aneg(struct phy_device *phydev)
+{
+       return phy_start_aneg_priv(phydev, true);
+}
  EXPORT_SYMBOL(phy_start_aneg);
  
  /**
@@ -659,7 +691,7 @@ void phy_start_machine(struct phy_device *phydev)
   *   state machine runs.
   */
  
-static void phy_trigger_machine(struct phy_device *phydev, bool sync)
+void phy_trigger_machine(struct phy_device *phydev, bool sync)
  {
         if (sync)
                 cancel_delayed_work_sync(&phydev->state_queue);
@@ -681,7 +713,7 @@ void phy_stop_machine(struct phy_device *phydev)
         cancel_delayed_work_sync(&phydev->state_queue);
  
         mutex_lock(&phydev->lock);
-       if (phydev->state > PHY_UP)
+       if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
                 phydev->state = PHY_UP;
         mutex_unlock(&phydev->lock);
  }
@@ -1154,7 +1186,7 @@ void phy_state_machine(struct work_struct *work)
         mutex_unlock(&phydev->lock);
  
         if (needs_aneg)
-               err = phy_start_aneg(phydev);
+               err = phy_start_aneg_priv(phydev, false);
         else if (do_suspend)
                 phy_suspend(phydev);
  
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c

index daec6555f3b10889f786912d286b316a9331480b..5198ccfa347f8b4bfb5ee5e0c69ee12fb44ec681 100644 (file)
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1864,7 +1864,7 @@ static struct phy_driver genphy_driver[] = {
         .phy_id         = 0xffffffff,
         .phy_id_mask    = 0xffffffff,
         .name           = "Generic PHY",
-       .soft_reset     = genphy_soft_reset,
+       .soft_reset     = genphy_no_soft_reset,
         .config_init    = genphy_config_init,
         .features       = PHY_GBIT_FEATURES | SUPPORTED_MII |
                           SUPPORTED_AUI | SUPPORTED_FIBRE |
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c

index 93ffedfa299412f78af2c72fedc991101a52a451..1e2d4f1179da31ed1e458af5e29cc77314f875ad 100644 (file)
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -491,13 +491,14 @@ static int ks8995_probe(struct spi_device *spi)
         if (err)
                 return err;
  
-       ks->regs_attr.size = ks->chip->regs_size;
         memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr));
+       ks->regs_attr.size = ks->chip->regs_size;
  
         err = ks8995_reset(ks);
         if (err)
                 return err;
  
+       sysfs_attr_init(&ks->regs_attr.attr);
         err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr);
         if (err) {
                 dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c

index 4a24b5d15f5a5dfe770d184533f70f7140d9e145..85c01247f2e3858f33e901de1a37d3eaae0941d7 100644 (file)
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -990,7 +990,7 @@ static void team_port_disable(struct team *team,
  #define TEAM_ENC_FEATURES      (NETIF_F_HW_CSUM | NETIF_F_SG | \
                                  NETIF_F_RXCSUM | NETIF_F_ALL_TSO)
  
-static void ___team_compute_features(struct team *team)
+static void __team_compute_features(struct team *team)
  {
         struct team_port *port;
         u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL;
@@ -1023,16 +1023,10 @@ static void ___team_compute_features(struct team *team)
                 team->dev->priv_flags |= IFF_XMIT_DST_RELEASE;
  }
  
-static void __team_compute_features(struct team *team)
-{
-       ___team_compute_features(team);
-       netdev_change_features(team->dev);
-}
-
  static void team_compute_features(struct team *team)
  {
         mutex_lock(&team->lock);
-       ___team_compute_features(team);
+       __team_compute_features(team);
         mutex_unlock(&team->lock);
         netdev_change_features(team->dev);
  }
@@ -1641,6 +1635,7 @@ static void team_uninit(struct net_device *dev)
         team_notify_peers_fini(team);
         team_queue_override_fini(team);
         mutex_unlock(&team->lock);
+       netdev_change_features(dev);
  }
  
  static void team_destructor(struct net_device *dev)
@@ -1928,6 +1923,10 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev)
         mutex_lock(&team->lock);
         err = team_port_add(team, port_dev);
         mutex_unlock(&team->lock);
+
+       if (!err)
+               netdev_change_features(dev);
+
         return err;
  }
  
@@ -1939,6 +1938,10 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
         mutex_lock(&team->lock);
         err = team_port_del(team, port_dev);
         mutex_unlock(&team->lock);
+
+       if (!err)
+               netdev_change_features(dev);
+
         return err;
  }
  
@@ -2072,6 +2075,7 @@ static int team_dev_type_check_change(struct net_device *dev,
  static void team_setup(struct net_device *dev)
  {
         ether_setup(dev);
+       dev->max_mtu = ETH_MAX_MTU;
  
         dev->netdev_ops = &team_netdev_ops;
         dev->ethtool_ops = &team_ethtool_ops;
@@ -2357,8 +2361,10 @@ start_again:
  
         hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI,
                           TEAM_CMD_OPTIONS_GET);
-       if (!hdr)
+       if (!hdr) {
+               nlmsg_free(skb);
                 return -EMSGSIZE;
+       }
  
         if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
                 goto nla_put_failure;
@@ -2630,8 +2636,10 @@ start_again:
  
         hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI,
                           TEAM_CMD_PORT_LIST_GET);
-       if (!hdr)
+       if (!hdr) {
+               nlmsg_free(skb);
                 return -EMSGSIZE;
+       }
  
         if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
                 goto nla_put_failure;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c

index dc1b1dd9157c16d1bbd3505751a8782e020ab71a..cc88cd7856f5e5ec4d3e3e309cbefe196c5b27ec 100644 (file)
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -822,7 +822,18 @@ static void tun_net_uninit(struct net_device *dev)
  /* Net device open. */
  static int tun_net_open(struct net_device *dev)
  {
+       struct tun_struct *tun = netdev_priv(dev);
+       int i;
+
         netif_tx_start_all_queues(dev);
+
+       for (i = 0; i < tun->numqueues; i++) {
+               struct tun_file *tfile;
+
+               tfile = rtnl_dereference(tun->tfiles[i]);
+               tfile->socket.sk->sk_write_space(tfile->socket.sk);
+       }
+
         return 0;
  }
  
@@ -1103,9 +1114,10 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
         if (!skb_array_empty(&tfile->tx_array))
                 mask |= POLLIN | POLLRDNORM;
  
-       if (sock_writeable(sk) ||
-           (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
-            sock_writeable(sk)))
+       if (tun->dev->flags & IFF_UP &&
+           (sock_writeable(sk) ||
+            (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+             sock_writeable(sk))))
                 mask |= POLLOUT | POLLWRNORM;
  
         if (tun->dev->reg_state != NETREG_REGISTERED)
@@ -1919,6 +1931,8 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
                 return -EINVAL;
  
         tun->set_features = features;
+       tun->dev->wanted_features &= ~TUN_USER_FEATURES;
+       tun->dev->wanted_features |= features;
         netdev_update_features(tun->dev);
  
         return 0;
@@ -2570,7 +2584,6 @@ static int __init tun_init(void)
         int ret = 0;
  
         pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
-       pr_info("%s\n", DRV_COPYRIGHT);
  
         ret = rtnl_link_register(&tun_link_ops);
         if (ret) {
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig

index 3dd490f53e485a399a4531c6681c030b1f5dcde4..f28bd74ac275a039a000cc01b0448d8866f14d1f 100644 (file)
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -369,7 +369,7 @@ config USB_NET_NET1080
           optionally with LEDs that indicate traffic
  
  config USB_NET_PLUSB
-       tristate "Prolific PL-2301/2302/25A1 based cables"
+       tristate "Prolific PL-2301/2302/25A1/27A1 based cables"
         # if the handshake/init/reset problems, from original 'plusb',
         # are ever resolved ... then remove "experimental"
         depends on USB_USBNET
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c

index f5552aaaa77a59bf558da6c22218a919bf99ec94..f3ae88fdf332e890ac8273e3df1ca7dd53092c07 100644 (file)
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -532,6 +532,7 @@ static const struct driver_info wwan_info = {
  #define LENOVO_VENDOR_ID       0x17ef
  #define NVIDIA_VENDOR_ID       0x0955
  #define HP_VENDOR_ID           0x03f0
+#define MICROSOFT_VENDOR_ID    0x045e
  
  static const struct usb_device_id      products[] = {
  /* BLACKLIST !!
@@ -761,6 +762,20 @@ static const struct usb_device_id  products[] = {
         .driver_info = 0,
  },
  
+/* Microsoft Surface 2 dock (based on Realtek RTL8152) */
+{
+       USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07ab, USB_CLASS_COMM,
+                       USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+       .driver_info = 0,
+},
+
+/* Microsoft Surface 3 dock (based on Realtek RTL8153) */
+{
+       USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07c6, USB_CLASS_COMM,
+                       USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+       .driver_info = 0,
+},
+
  /* WHITELIST!!!
   *
   * CDC Ether uses two interfaces, not necessarily consecutive.
diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c

index 8a40202c0a1732850da1b2eb64af21470b9c85b4..c4f1c363e24b89404c6834312074f8a4451ded50 100644 (file)
--- a/drivers/net/usb/ch9200.c
+++ b/drivers/net/usb/ch9200.c
@@ -254,14 +254,9 @@ static struct sk_buff *ch9200_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
         tx_overhead = 0x40;
  
         len = skb->len;
-       if (skb_headroom(skb) < tx_overhead) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_copy_expand(skb, tx_overhead, 0, flags);
+       if (skb_cow_head(skb, tx_overhead)) {
                 dev_kfree_skb_any(skb);
-               skb = skb2;
-               if (!skb)
-                       return NULL;
+               return NULL;
         }
  
         __skb_push(skb, tx_overhead);
diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c

index e221bfcee76b40a3ad7ba60ec4d348f4b8f4cc73..947bea81d924124c3827e87f75e732e35adb2acd 100644 (file)
--- a/drivers/net/usb/cx82310_eth.c
+++ b/drivers/net/usb/cx82310_eth.c
@@ -293,12 +293,9 @@ static struct sk_buff *cx82310_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
  {
         int len = skb->len;
  
-       if (skb_headroom(skb) < 2) {
-               struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags);
+       if (skb_cow_head(skb, 2)) {
                 dev_kfree_skb_any(skb);
-               skb = skb2;
-               if (!skb)
-                       return NULL;
+               return NULL;
         }
         skb_push(skb, 2);
  
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c

index 4f2e8141dbe2e53eb23a2b60124e2821b2897fce..00067a0c51ca45b736920a3c56ee42cb24b08271 100644 (file)
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2534,13 +2534,6 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
         SET_NETDEV_DEV(net, &interface->dev);
         SET_NETDEV_DEVTYPE(net, &hso_type);
  
-       /* registering our net device */
-       result = register_netdev(net);
-       if (result) {
-               dev_err(&interface->dev, "Failed to register device\n");
-               goto exit;
-       }
-
         /* start allocating */
         for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) {
                 hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL);
@@ -2560,6 +2553,13 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
  
         add_net_device(hso_dev);
  
+       /* registering our net device */
+       result = register_netdev(net);
+       if (result) {
+               dev_err(&interface->dev, "Failed to register device\n");
+               goto exit;
+       }
+
         hso_log_port(hso_dev);
  
         hso_create_rfkill(hso_dev, interface);
@@ -3279,9 +3279,9 @@ static void __exit hso_exit(void)
         pr_info("unloaded\n");
  
         tty_unregister_driver(tty_drv);
-       put_tty_driver(tty_drv);
         /* deregister the usb driver */
         usb_deregister(&hso_driver);
+       put_tty_driver(tty_drv);
  }
  
  /* Module definitions */
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c

index 876f02f4945eafdc2fb5cfa0f9dcb54d9b498af4..2a2c3edb6bad0b3bd257c3a101d100ad3b00cc59 100644 (file)
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -803,18 +803,12 @@ static netdev_tx_t kaweth_start_xmit(struct sk_buff *skb,
         }
  
         /* We now decide whether we can put our special header into the sk_buff */
-       if (skb_cloned(skb) || skb_headroom(skb) < 2) {
-               /* no such luck - we make our own */
-               struct sk_buff *copied_skb;
-               copied_skb = skb_copy_expand(skb, 2, 0, GFP_ATOMIC);
-               dev_kfree_skb_irq(skb);
-               skb = copied_skb;
-               if (!copied_skb) {
-                       kaweth->stats.tx_errors++;
-                       netif_start_queue(net);
-                       spin_unlock_irq(&kaweth->device_lock);
-                       return NETDEV_TX_OK;
-               }
+       if (skb_cow_head(skb, 2)) {
+               kaweth->stats.tx_errors++;
+               netif_start_queue(net);
+               spin_unlock_irq(&kaweth->device_lock);
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
         }
  
         private_header = (__le16 *)__skb_push(skb, 2);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c

index 9889a70ff4f6fece5bfabbfb45a3470f721a5a32..636f48f19d1eacae67c050de4fc3e651bffdf825 100644 (file)
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2607,14 +2607,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev,
  {
         u32 tx_cmd_a, tx_cmd_b;
  
-       if (skb_headroom(skb) < TX_OVERHEAD) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags);
+       if (skb_cow_head(skb, TX_OVERHEAD)) {
                 dev_kfree_skb_any(skb);
-               skb = skb2;
-               if (!skb)
-                       return NULL;
+               return NULL;
         }
  
         if (lan78xx_linearize(skb) < 0)
diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c

index 22e1a9a99a7d8cad77b22410973575ee17699b2b..6fe59373cba9b8bd1afce514265171dbbd43aa9e 100644 (file)
--- a/drivers/net/usb/plusb.c
+++ b/drivers/net/usb/plusb.c
@@ -102,7 +102,7 @@ static int pl_reset(struct usbnet *dev)
  }
  
  static const struct driver_info        prolific_info = {
-       .description =  "Prolific PL-2301/PL-2302/PL-25A1",
+       .description =  "Prolific PL-2301/PL-2302/PL-25A1/PL-27A1",
         .flags =        FLAG_POINTTOPOINT | FLAG_NO_SETINT,
                 /* some PL-2302 versions seem to fail usb_set_interface() */
         .reset =        pl_reset,
@@ -139,6 +139,17 @@ static const struct usb_device_id  products [] = {
                                          * Host-to-Host Cable
                                          */
         .driver_info =  (unsigned long) &prolific_info,
+
+},
+
+/* super speed cables */
+{
+       USB_DEVICE(0x067b, 0x27a1),     /* PL-27A1, no eeprom
+                                        * also: goobay Active USB 3.0
+                                        * Data Link,
+                                        * Unitek Y-3501
+                                        */
+       .driver_info =  (unsigned long) &prolific_info,
  },
  
         { },            // END
@@ -158,5 +169,5 @@ static struct usb_driver plusb_driver = {
  module_usb_driver(plusb_driver);
  
  MODULE_AUTHOR("David Brownell");
-MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1 USB Host to Host Link Driver");
+MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1/27A1 USB Host to Host Link Driver");
  MODULE_LICENSE("GPL");
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c

index 8056745506832867165f03ae0b24c2f1a578d849..2474618404f5e592c0fe56d38c30c8988e1ed8ef 100644 (file)
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -580,6 +580,10 @@ static const struct usb_device_id products[] = {
                 USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x69),
                 .driver_info        = (unsigned long)&qmi_wwan_info,
         },
+       {       /* Motorola Mapphone devices with MDM6600 */
+               USB_VENDOR_AND_INTERFACE_INFO(0x22b8, USB_CLASS_VENDOR_SPEC, 0xfb, 0xff),
+               .driver_info        = (unsigned long)&qmi_wwan_info,
+       },
  
         /* 2. Combined interface devices matching on class+protocol */
         {       /* Huawei E367 and possibly others in "Windows mode" */
@@ -904,7 +908,7 @@ static const struct usb_device_id products[] = {
         {QMI_FIXED_INTF(0x2357, 0x9000, 4)},    /* TP-LINK MA260 */
         {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */
         {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},    /* Telit LE920 */
-       {QMI_FIXED_INTF(0x1bc7, 0x1201, 2)},    /* Telit LE920 */
+       {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */
         {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)},    /* XS Stick W100-2 from 4G Systems */
         {QMI_FIXED_INTF(0x0b3c, 0xc000, 4)},    /* Olivetti Olicard 100 */
         {QMI_FIXED_INTF(0x0b3c, 0xc001, 4)},    /* Olivetti Olicard 120 */
@@ -925,6 +929,8 @@ static const struct usb_device_id products[] = {
         {QMI_FIXED_INTF(0x413c, 0x81a9, 8)},    /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
         {QMI_FIXED_INTF(0x413c, 0x81b1, 8)},    /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
         {QMI_FIXED_INTF(0x413c, 0x81b3, 8)},    /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
+       {QMI_FIXED_INTF(0x413c, 0x81b6, 8)},    /* Dell Wireless 5811e */
+       {QMI_FIXED_INTF(0x413c, 0x81b6, 10)},   /* Dell Wireless 5811e */
         {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},    /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
         {QMI_FIXED_INTF(0x22de, 0x9061, 3)},    /* WeTelecom WPD-600N */
         {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)},    /* SIMCom 7230E */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c

index 986243c932ccd6fe19c592805c1c63274f5e5555..07f788c49d573fe9d4dc15e24b8f29449b4ecbe2 100644 (file)
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -32,7 +32,7 @@
  #define NETNEXT_VERSION                "08"
  
  /* Information for net */
-#define NET_VERSION            "8"
+#define NET_VERSION            "9"
  
  #define DRIVER_VERSION         "v1." NETNEXT_VERSION "." NET_VERSION
  #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -501,6 +501,8 @@ enum rtl_register_content {
  #define RTL8153_RMS            RTL8153_MAX_PACKET
  #define RTL8152_TX_TIMEOUT     (5 * HZ)
  #define RTL8152_NAPI_WEIGHT    64
+#define rx_reserved_size(x)    ((x) + VLAN_ETH_HLEN + CRC_SIZE + \
+                                sizeof(struct rx_desc) + RX_ALIGN)
  
  /* rtl8152 flags */
  enum rtl8152_flags {
@@ -515,6 +517,7 @@ enum rtl8152_flags {
  
  /* Define these values to match your device */
  #define VENDOR_ID_REALTEK              0x0bda
+#define VENDOR_ID_MICROSOFT            0x045e
  #define VENDOR_ID_SAMSUNG              0x04e8
  #define VENDOR_ID_LENOVO               0x17ef
  #define VENDOR_ID_NVIDIA               0x0955
@@ -1292,6 +1295,7 @@ static void intr_callback(struct urb *urb)
                 }
         } else {
                 if (netif_carrier_ok(tp->netdev)) {
+                       netif_stop_queue(tp->netdev);
                         set_bit(RTL8152_LINK_CHG, &tp->flags);
                         schedule_delayed_work(&tp->schedule, 0);
                 }
@@ -1362,6 +1366,7 @@ static int alloc_all_mem(struct r8152 *tp)
         spin_lock_init(&tp->rx_lock);
         spin_lock_init(&tp->tx_lock);
         INIT_LIST_HEAD(&tp->tx_free);
+       INIT_LIST_HEAD(&tp->rx_done);
         skb_queue_head_init(&tp->tx_queue);
         skb_queue_head_init(&tp->rx_queue);
  
@@ -2252,8 +2257,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
  
  static void r8153_set_rx_early_size(struct r8152 *tp)
  {
-       u32 mtu = tp->netdev->mtu;
-       u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8;
+       u32 ocp_data = (agg_buf_sz - rx_reserved_size(tp->netdev->mtu)) / 4;
  
         ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
  }
@@ -2898,7 +2902,8 @@ static void r8153_first_init(struct r8152 *tp)
  
         rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX);
  
-       ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS);
+       ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + CRC_SIZE;
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data);
         ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_JUMBO);
  
         ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0);
@@ -2950,7 +2955,8 @@ static void r8153_enter_oob(struct r8152 *tp)
                 usleep_range(1000, 2000);
         }
  
-       ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS);
+       ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + CRC_SIZE;
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data);
  
         ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG);
         ocp_data &= ~TEREDO_WAKE_MASK;
@@ -3165,6 +3171,9 @@ static void set_carrier(struct r8152 *tp)
                         napi_enable(&tp->napi);
                         netif_wake_queue(netdev);
                         netif_info(tp, link, netdev, "carrier on\n");
+               } else if (netif_queue_stopped(netdev) &&
+                          skb_queue_len(&tp->tx_queue) < tp->tx_qlen) {
+                       netif_wake_queue(netdev);
                 }
         } else {
                 if (netif_carrier_ok(netdev)) {
@@ -3698,8 +3707,18 @@ static int rtl8152_resume(struct usb_interface *intf)
                         tp->rtl_ops.autosuspend_en(tp, false);
                         napi_disable(&tp->napi);
                         set_bit(WORK_ENABLE, &tp->flags);
-                       if (netif_carrier_ok(tp->netdev))
-                               rtl_start_rx(tp);
+
+                       if (netif_carrier_ok(tp->netdev)) {
+                               if (rtl8152_get_speed(tp) & LINK_STATUS) {
+                                       rtl_start_rx(tp);
+                               } else {
+                                       netif_carrier_off(tp->netdev);
+                                       tp->rtl_ops.disable(tp);
+                                       netif_info(tp, link, tp->netdev,
+                                                  "linking down\n");
+                               }
+                       }
+
                         napi_enable(&tp->napi);
                         clear_bit(SELECTIVE_SUSPEND, &tp->flags);
                         smp_mb__after_atomic();
@@ -4200,8 +4219,14 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu)
  
         dev->mtu = new_mtu;
  
-       if (netif_running(dev) && netif_carrier_ok(dev))
-               r8153_set_rx_early_size(tp);
+       if (netif_running(dev)) {
+               u32 rms = new_mtu + VLAN_ETH_HLEN + CRC_SIZE;
+
+               ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, rms);
+
+               if (netif_carrier_ok(dev))
+                       r8153_set_rx_early_size(tp);
+       }
  
         mutex_unlock(&tp->control);
  
@@ -4497,6 +4522,8 @@ static void rtl8152_disconnect(struct usb_interface *intf)
  static struct usb_device_id rtl8152_table[] = {
         {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)},
         {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)},
+       {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab)},
+       {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6)},
         {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)},
         {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x304f)},
         {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3062)},
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c

index 0b17b40d7a4fa2653caf21406c4a6b3b45d868b0..190de9a90f7387c5070c7f589aa18bb7d05ac5d7 100644 (file)
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -2203,13 +2203,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev,
  {
         u32 tx_cmd_a, tx_cmd_b;
  
-       if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) {
-               struct sk_buff *skb2 =
-                       skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags);
+       if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) {
                 dev_kfree_skb_any(skb);
-               skb = skb2;
-               if (!skb)
-                       return NULL;
+               return NULL;
         }
  
         tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS;
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c

index 831aa33d078ae7d2dd57fdded5de71d1eb915f99..5f19fb0f025d9449d0ba20958610e0d1f083f032 100644 (file)
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -2001,13 +2001,13 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
         /* We do not advertise SG, so skbs should be already linearized */
         BUG_ON(skb_shinfo(skb)->nr_frags);
  
-       if (skb_headroom(skb) < overhead) {
-               struct sk_buff *skb2 = skb_copy_expand(skb,
-                       overhead, 0, flags);
+       /* Make writable and expand header space by overhead if required */
+       if (skb_cow_head(skb, overhead)) {
+               /* Must deallocate here as returning NULL to indicate error
+                * means the skb won't be deallocated in the caller.
+                */
                 dev_kfree_skb_any(skb);
-               skb = skb2;
-               if (!skb)
-                       return NULL;
+               return NULL;
         }
  
         if (csum) {
diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c

index 4a1e9c489f1f455388ffee289d65e1d6b36cba42..aadfe1d1c37ee67e2a7d17c759db12dad248c41d 100644 (file)
--- a/drivers/net/usb/sr9700.c
+++ b/drivers/net/usb/sr9700.c
@@ -456,14 +456,9 @@ static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
  
         len = skb->len;
  
-       if (skb_headroom(skb) < SR_TX_OVERHEAD) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags);
+       if (skb_cow_head(skb, SR_TX_OVERHEAD)) {
                 dev_kfree_skb_any(skb);
-               skb = skb2;
-               if (!skb)
-                       return NULL;
+               return NULL;
         }
  
         __skb_push(skb, SR_TX_OVERHEAD);
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c

index 3de65ea6531a8add927c0a2d7c74e8923c0f3274..453244805c52e570394673d7a3ebd71cc62fd5ca 100644 (file)
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1929,7 +1929,7 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
                    " value=0x%04x index=0x%04x size=%d\n",
                    cmd, reqtype, value, index, size);
  
-       if (data) {
+       if (size) {
                 buf = kmalloc(size, GFP_KERNEL);
                 if (!buf)
                         goto out;
@@ -1938,8 +1938,13 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
         err = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
                               cmd, reqtype, value, index, buf, size,
                               USB_CTRL_GET_TIMEOUT);
-       if (err > 0 && err <= size)
-               memcpy(data, buf, err);
+       if (err > 0 && err <= size) {
+        if (data)
+            memcpy(data, buf, err);
+        else
+            netdev_dbg(dev->net,
+                "Huh? Data requested but thrown away.\n");
+    }
         kfree(buf);
  out:
         return err;
@@ -1960,7 +1965,13 @@ static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
                 buf = kmemdup(data, size, GFP_KERNEL);
                 if (!buf)
                         goto out;
-       }
+       } else {
+        if (size) {
+            WARN_ON_ONCE(1);
+            err = -EINVAL;
+            goto out;
+        }
+    }
  
         err = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
                               cmd, reqtype, value, index, buf, size,
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

index ea9890d619670e1abfba75fe608c2925d824cb1c..f36584616e7d6825c7e69137b4a31a3d55779688 100644 (file)
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2230,14 +2230,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
  #define MIN_MTU ETH_MIN_MTU
  #define MAX_MTU ETH_MAX_MTU
  
-static int virtnet_probe(struct virtio_device *vdev)
+static int virtnet_validate(struct virtio_device *vdev)
  {
-       int i, err;
-       struct net_device *dev;
-       struct virtnet_info *vi;
-       u16 max_queue_pairs;
-       int mtu;
-
         if (!vdev->config->get) {
                 dev_err(&vdev->dev, "%s failure: config access disabled\n",
                         __func__);
@@ -2247,6 +2241,25 @@ static int virtnet_probe(struct virtio_device *vdev)
         if (!virtnet_validate_features(vdev))
                 return -EINVAL;
  
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
+               int mtu = virtio_cread16(vdev,
+                                        offsetof(struct virtio_net_config,
+                                                 mtu));
+               if (mtu < MIN_MTU)
+                       __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
+       }
+
+       return 0;
+}
+
+static int virtnet_probe(struct virtio_device *vdev)
+{
+       int i, err;
+       struct net_device *dev;
+       struct virtnet_info *vi;
+       u16 max_queue_pairs;
+       int mtu;
+
         /* Find if host supports multiqueue virtio_net device */
         err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
                                    struct virtio_net_config,
@@ -2362,11 +2375,20 @@ static int virtnet_probe(struct virtio_device *vdev)
                                      offsetof(struct virtio_net_config,
                                               mtu));
                 if (mtu < dev->min_mtu) {
-                       __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
-               } else {
-                       dev->mtu = mtu;
-                       dev->max_mtu = mtu;
+                       /* Should never trigger: MTU was previously validated
+                        * in virtnet_validate.
+                        */
+                       dev_err(&vdev->dev, "device MTU appears to have changed "
+                               "it is now %d < %d", mtu, dev->min_mtu);
+                       goto free_stats;
                 }
+
+               dev->mtu = mtu;
+               dev->max_mtu = mtu;
+
+               /* TODO: size buffers correctly in this case. */
+               if (dev->mtu > ETH_DATA_LEN)
+                       vi->big_packets = true;
         }
  
         if (vi->any_header_sg)
@@ -2544,6 +2566,7 @@ static struct virtio_driver virtio_net_driver = {
         .driver.name =  KBUILD_MODNAME,
         .driver.owner = THIS_MODULE,
         .id_table =     id_table,
+       .validate =     virtnet_validate,
         .probe =        virtnet_probe,
         .remove =       virtnet_remove,
         .config_changed = virtnet_config_changed,
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c

index 22379da63400776ff70994097de6d472232ca908..7d909c8183e95a62b6f8a3182d3ce645a264e909 100644 (file)
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -340,6 +340,7 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
  
  static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
  {
+       int len = skb->len;
         netdev_tx_t ret = is_ip_tx_frame(skb, dev);
  
         if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
@@ -347,7 +348,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
  
                 u64_stats_update_begin(&dstats->syncp);
                 dstats->tx_pkts++;
-               dstats->tx_bytes += skb->len;
+               dstats->tx_bytes += len;
                 u64_stats_update_end(&dstats->syncp);
         } else {
                 this_cpu_inc(dev->dstats->tx_drps);
@@ -461,8 +462,10 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
         }
  
         if (rt6_local) {
-               if (rt6_local->rt6i_idev)
+               if (rt6_local->rt6i_idev) {
                         in6_dev_put(rt6_local->rt6i_idev);
+                       rt6_local->rt6i_idev = NULL;
+               }
  
                 dst = &rt6_local->dst;
                 dev_put(dst->dev);
@@ -1125,7 +1128,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
                 goto nla_put_failure;
  
         /* rule only needs to appear once */
-       nlh->nlmsg_flags &= NLM_F_EXCL;
+       nlh->nlmsg_flags |= NLM_F_EXCL;
  
         frh = nlmsg_data(nlh);
         memset(frh, 0, sizeof(*frh));
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c

index e375560cc74e5ffc09553ddab5c6b657fe1cb6f0..bdb6ae16d4a85bf9539199e189011bce104ba51a 100644 (file)
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2976,6 +2976,44 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
         return 0;
  }
  
+static int __vxlan_dev_create(struct net *net, struct net_device *dev,
+                             struct vxlan_config *conf)
+{
+       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+       struct vxlan_dev *vxlan = netdev_priv(dev);
+       int err;
+
+       err = vxlan_dev_configure(net, dev, conf, false);
+       if (err)
+               return err;
+
+       dev->ethtool_ops = &vxlan_ethtool_ops;
+
+       /* create an fdb entry for a valid default destination */
+       if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
+               err = vxlan_fdb_create(vxlan, all_zeros_mac,
+                                      &vxlan->default_dst.remote_ip,
+                                      NUD_REACHABLE | NUD_PERMANENT,
+                                      NLM_F_EXCL | NLM_F_CREATE,
+                                      vxlan->cfg.dst_port,
+                                      vxlan->default_dst.remote_vni,
+                                      vxlan->default_dst.remote_vni,
+                                      vxlan->default_dst.remote_ifindex,
+                                      NTF_SELF);
+               if (err)
+                       return err;
+       }
+
+       err = register_netdevice(dev);
+       if (err) {
+               vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
+               return err;
+       }
+
+       list_add(&vxlan->next, &vn->vxlan_list);
+       return 0;
+}
+
  static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
                          struct net_device *dev, struct vxlan_config *conf,
                          bool changelink)
@@ -3172,8 +3210,6 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
  static int vxlan_newlink(struct net *src_net, struct net_device *dev,
                          struct nlattr *tb[], struct nlattr *data[])
  {
-       struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
-       struct vxlan_dev *vxlan = netdev_priv(dev);
         struct vxlan_config conf;
         int err;
  
@@ -3181,36 +3217,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
         if (err)
                 return err;
  
-       err = vxlan_dev_configure(src_net, dev, &conf, false);
-       if (err)
-               return err;
-
-       dev->ethtool_ops = &vxlan_ethtool_ops;
-
-       /* create an fdb entry for a valid default destination */
-       if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
-               err = vxlan_fdb_create(vxlan, all_zeros_mac,
-                                      &vxlan->default_dst.remote_ip,
-                                      NUD_REACHABLE | NUD_PERMANENT,
-                                      NLM_F_EXCL | NLM_F_CREATE,
-                                      vxlan->cfg.dst_port,
-                                      vxlan->default_dst.remote_vni,
-                                      vxlan->default_dst.remote_vni,
-                                      vxlan->default_dst.remote_ifindex,
-                                      NTF_SELF);
-               if (err)
-                       return err;
-       }
-
-       err = register_netdevice(dev);
-       if (err) {
-               vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
-               return err;
-       }
-
-       list_add(&vxlan->next, &vn->vxlan_list);
-
-       return 0;
+       return __vxlan_dev_create(src_net, dev, &conf);
  }
  
  static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
@@ -3440,7 +3447,7 @@ struct net_device *vxlan_dev_create(struct net *net, const char *name,
         if (IS_ERR(dev))
                 return dev;
  
-       err = vxlan_dev_configure(net, dev, conf, false);
+       err = __vxlan_dev_create(net, dev, conf);
         if (err < 0) {
                 free_netdev(dev);
                 return ERR_PTR(err);
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c

index a5045b5279d70a92c827424be3ff7869c6193dc8..6742ae605660454e19406d11f5aff7e84a2527f3 100644 (file)
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -381,8 +381,8 @@ static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev)
         /* set bd status and length */
         bd_status = (bd_status & T_W_S) | T_R_S | T_I_S | T_L_S | T_TC_S;
  
-       iowrite16be(bd_status, &bd->status);
         iowrite16be(skb->len, &bd->length);
+       iowrite16be(bd_status, &bd->status);
  
         /* Move to next BD in the ring */
         if (!(bd_status & T_W_S))
@@ -457,7 +457,7 @@ static int hdlc_rx_done(struct ucc_hdlc_private *priv, int rx_work_limit)
         struct sk_buff *skb;
         hdlc_device *hdlc = dev_to_hdlc(dev);
         struct qe_bd *bd;
-       u32 bd_status;
+       u16 bd_status;
         u16 length, howmany = 0;
         u8 *bdbuffer;
         int i;
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c

index e7f5910a65191f4f013ae53db73d2a77510ae9e8..f8eb66ef2944ea9630237455cae4faf483b4f609 100644 (file)
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -467,6 +467,9 @@ int i2400mu_probe(struct usb_interface *iface,
         struct i2400mu *i2400mu;
         struct usb_device *usb_dev = interface_to_usbdev(iface);
  
+       if (iface->cur_altsetting->desc.bNumEndpoints < 4)
+               return -ENODEV;
+
         if (usb_dev->speed != USB_SPEED_HIGH)
                 dev_err(dev, "device not connected as high speed\n");
  
diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c

index 33fb26833cd0a6b259a131d66972bf7620606f5b..d9f37ee4bfdd3eee9e0a0c11e30165a8b6a7d5ea 100644 (file)
--- a/drivers/net/wireless/ath/ath10k/hw.c
+++ b/drivers/net/wireless/ath/ath10k/hw.c
@@ -51,7 +51,7 @@ const struct ath10k_hw_regs qca6174_regs = {
         .rtc_soc_base_address                   = 0x00000800,
         .rtc_wmac_base_address                  = 0x00001000,
         .soc_core_base_address                  = 0x0003a000,
-       .wlan_mac_base_address                  = 0x00020000,
+       .wlan_mac_base_address                  = 0x00010000,
         .ce_wrapper_base_address                = 0x00034000,
         .ce0_base_address                       = 0x00034400,
         .ce1_base_address                       = 0x00034800,
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c

index de19c7c92bc6c095b3b111abfc280fe228e588ac..85d949e03f79f7c9566c7b00a9bbe99853df7f16 100644 (file)
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -2238,14 +2238,16 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
         struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
         struct brcmf_p2p_info *p2p = &cfg->p2p;
         struct brcmf_cfg80211_vif *vif;
+       enum nl80211_iftype iftype;
         bool wait_for_disable = false;
         int err;
  
         brcmf_dbg(TRACE, "delete P2P vif\n");
         vif = container_of(wdev, struct brcmf_cfg80211_vif, wdev);
  
+       iftype = vif->wdev.iftype;
         brcmf_cfg80211_arm_vif_event(cfg, vif);
-       switch (vif->wdev.iftype) {
+       switch (iftype) {
         case NL80211_IFTYPE_P2P_CLIENT:
                 if (test_bit(BRCMF_VIF_STATUS_DISCONNECTING, &vif->sme_state))
                         wait_for_disable = true;
@@ -2275,7 +2277,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
                                             BRCMF_P2P_DISABLE_TIMEOUT);
  
         err = 0;
-       if (vif->wdev.iftype != NL80211_IFTYPE_P2P_DEVICE) {
+       if (iftype != NL80211_IFTYPE_P2P_DEVICE) {
                 brcmf_vif_clear_mgmt_ies(vif);
                 err = brcmf_p2p_release_p2p_if(vif);
         }
@@ -2291,7 +2293,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
         brcmf_remove_interface(vif->ifp, true);
  
         brcmf_cfg80211_arm_vif_event(cfg, NULL);
-       if (vif->wdev.iftype != NL80211_IFTYPE_P2P_DEVICE)
+       if (iftype != NL80211_IFTYPE_P2P_DEVICE)
                 p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif = NULL;
  
         return err;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c

index a260cd5032005bcbf520e98f8be188750c987439..077bfd8f4c0cd85505a3ef7de375bfbaf4cddef5 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -1056,6 +1056,8 @@ static ssize_t iwl_dbgfs_fw_dbg_collect_write(struct iwl_mvm *mvm,
  
         if (ret)
                 return ret;
+       if (count == 0)
+               return 0;
  
         iwl_mvm_fw_dbg_collect(mvm, FW_DBG_TRIGGER_USER, buf,
                                (count - 1), NULL);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c

index 99132ea16ede08e0e7ebd5f8734eeb0ab204e0fa..c5734e1a02d27ee3d15ec59ef6dd19430e6aed71 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -216,7 +216,8 @@ u32 iwl_mvm_mac_get_queues_mask(struct ieee80211_vif *vif)
                         qmask |= BIT(vif->hw_queue[ac]);
         }
  
-       if (vif->type == NL80211_IFTYPE_AP)
+       if (vif->type == NL80211_IFTYPE_AP ||
+           vif->type == NL80211_IFTYPE_ADHOC)
                 qmask |= BIT(vif->cab_queue);
  
         return qmask;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c

index d37b1695c64eac9096cfe7c11b3ef06764d4626a..486dcceed17a4f3a5b1ff0084d8a1f6b768199cd 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2319,7 +2319,7 @@ iwl_mvm_mac_release_buffered_frames(struct ieee80211_hw *hw,
  {
         struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
  
-       /* Called when we need to transmit (a) frame(s) from agg queue */
+       /* Called when we need to transmit (a) frame(s) from agg or dqa queue */
  
         iwl_mvm_sta_modify_sleep_tx_count(mvm, sta, reason, num_frames,
                                           tids, more_data, true);
@@ -2338,7 +2338,8 @@ static void __iwl_mvm_mac_sta_notify(struct ieee80211_hw *hw,
         for (tid = 0; tid < IWL_MAX_TID_COUNT; tid++) {
                 struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid];
  
-               if (tid_data->state != IWL_AGG_ON &&
+               if (!iwl_mvm_is_dqa_supported(mvm) &&
+                   tid_data->state != IWL_AGG_ON &&
                     tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA)
                         continue;
  
@@ -2400,7 +2401,7 @@ void iwl_mvm_sta_pm_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
                 return;
  
         rcu_read_lock();
-       sta = mvm->fw_id_to_mac_id[notif->sta_id];
+       sta = rcu_dereference(mvm->fw_id_to_mac_id[notif->sta_id]);
         if (WARN_ON(IS_ERR_OR_NULL(sta))) {
                 rcu_read_unlock();
                 return;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c

index bd1dcc863d8f338df994a9b177d498df2eef49cb..9d28db7f56aa2404825c530559df62afbca5d21a 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1806,7 +1806,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
                         iwl_mvm_get_wd_timeout(mvm, vif, false, false);
                 int queue;
  
-               if (vif->type == NL80211_IFTYPE_AP)
+               if (vif->type == NL80211_IFTYPE_AP ||
+                   vif->type == NL80211_IFTYPE_ADHOC)
                         queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
                 else if (vif->type == NL80211_IFTYPE_P2P_DEVICE)
                         queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE;
@@ -1837,7 +1838,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
          * enabled-cab_queue to the mask)
          */
         if (iwl_mvm_is_dqa_supported(mvm) &&
-           vif->type == NL80211_IFTYPE_AP) {
+           (vif->type == NL80211_IFTYPE_AP ||
+            vif->type == NL80211_IFTYPE_ADHOC)) {
                 struct iwl_trans_txq_scd_cfg cfg = {
                         .fifo = IWL_MVM_TX_FIFO_MCAST,
                         .sta_id = mvmvif->bcast_sta.sta_id,
@@ -1862,7 +1864,8 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm,
  
         lockdep_assert_held(&mvm->mutex);
  
-       if (vif->type == NL80211_IFTYPE_AP)
+       if (vif->type == NL80211_IFTYPE_AP ||
+           vif->type == NL80211_IFTYPE_ADHOC)
                 iwl_mvm_disable_txq(mvm, vif->cab_queue, vif->cab_queue,
                                     IWL_MAX_TID_COUNT, 0);
  
@@ -3135,7 +3138,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
                                        struct ieee80211_sta *sta,
                                        enum ieee80211_frame_release_type reason,
                                        u16 cnt, u16 tids, bool more_data,
-                                      bool agg)
+                                      bool single_sta_queue)
  {
         struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
         struct iwl_mvm_add_sta_cmd cmd = {
@@ -3155,14 +3158,14 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
         for_each_set_bit(tid, &_tids, IWL_MAX_TID_COUNT)
                 cmd.awake_acs |= BIT(tid_to_ucode_ac[tid]);
  
-       /* If we're releasing frames from aggregation queues then check if the
-        * all queues combined that we're releasing frames from have
+       /* If we're releasing frames from aggregation or dqa queues then check
+        * if all the queues that we're releasing frames from, combined, have:
          *  - more frames than the service period, in which case more_data
          *    needs to be set
          *  - fewer than 'cnt' frames, in which case we need to adjust the
          *    firmware command (but do that unconditionally)
          */
-       if (agg) {
+       if (single_sta_queue) {
                 int remaining = cnt;
                 int sleep_tx_count;
  
@@ -3172,7 +3175,8 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
                         u16 n_queued;
  
                         tid_data = &mvmsta->tid_data[tid];
-                       if (WARN(tid_data->state != IWL_AGG_ON &&
+                       if (WARN(!iwl_mvm_is_dqa_supported(mvm) &&
+                                tid_data->state != IWL_AGG_ON &&
                                  tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA,
                                  "TID %d state is %d\n",
                                  tid, tid_data->state)) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h

index 4be34f902278c8bb36521346dfcd7980976e8786..1927ce6077984fff4cde0f767ee97ea42de49761 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -547,7 +547,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
                                        struct ieee80211_sta *sta,
                                        enum ieee80211_frame_release_type reason,
                                        u16 cnt, u16 tids, bool more_data,
-                                      bool agg);
+                                      bool single_sta_queue);
  int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta,
                       bool drain);
  void iwl_mvm_sta_modify_disable_tx(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c

index dd2b4a30081993823634e18752187c576a656b79..1ba0a6f55503665d14b1c1e9a129e37503b9f33f 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -7,7 +7,7 @@
   *
   * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
   * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
- * Copyright(c) 2016        Intel Deutschland GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of version 2 of the GNU General Public License as
@@ -34,6 +34,7 @@
   *
   * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
   * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -505,6 +506,7 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm,
  
         switch (info->control.vif->type) {
         case NL80211_IFTYPE_AP:
+       case NL80211_IFTYPE_ADHOC:
                 /*
                  * Handle legacy hostapd as well, where station may be added
                  * only after assoc. Take care of the case where we send a
@@ -516,7 +518,8 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm,
                 if (info->hw_queue == info->control.vif->cab_queue)
                         return info->hw_queue;
  
-               WARN_ONCE(1, "fc=0x%02x", le16_to_cpu(fc));
+               WARN_ONCE(info->control.vif->type != NL80211_IFTYPE_ADHOC,
+                         "fc=0x%02x", le16_to_cpu(fc));
                 return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
         case NL80211_IFTYPE_P2P_DEVICE:
                 if (ieee80211_is_mgmt(fc))
@@ -583,7 +586,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
                         iwl_mvm_vif_from_mac80211(info.control.vif);
  
                 if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE ||
-                   info.control.vif->type == NL80211_IFTYPE_AP) {
+                   info.control.vif->type == NL80211_IFTYPE_AP ||
+                   info.control.vif->type == NL80211_IFTYPE_ADHOC) {
                         sta_id = mvmvif->bcast_sta.sta_id;
                         queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info,
                                                            hdr->frame_control);
@@ -628,8 +632,10 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
          * values.
          * Note that we don't need to make sure it isn't agg'd, since we're
          * TXing non-sta
+        * For DQA mode - we shouldn't increase it though
          */
-       atomic_inc(&mvm->pending_frames[sta_id]);
+       if (!iwl_mvm_is_dqa_supported(mvm))
+               atomic_inc(&mvm->pending_frames[sta_id]);
  
         return 0;
  }
@@ -1005,11 +1011,8 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
  
         spin_unlock(&mvmsta->lock);
  
-       /* Increase pending frames count if this isn't AMPDU */
-       if ((iwl_mvm_is_dqa_supported(mvm) &&
-            mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_ON &&
-            mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_STARTING) ||
-           (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu))
+       /* Increase pending frames count if this isn't AMPDU or DQA queue */
+       if (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu)
                 atomic_inc(&mvm->pending_frames[mvmsta->sta_id]);
  
         return 0;
@@ -1079,12 +1082,13 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm,
         lockdep_assert_held(&mvmsta->lock);
  
         if ((tid_data->state == IWL_AGG_ON ||
-            tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA) &&
+            tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA ||
+            iwl_mvm_is_dqa_supported(mvm)) &&
             iwl_mvm_tid_queued(tid_data) == 0) {
                 /*
-                * Now that this aggregation queue is empty tell mac80211 so it
-                * knows we no longer have frames buffered for the station on
-                * this TID (for the TIM bitmap calculation.)
+                * Now that this aggregation or DQA queue is empty tell
+                * mac80211 so it knows we no longer have frames buffered for
+                * the station on this TID (for the TIM bitmap calculation.)
                  */
                 ieee80211_sta_set_buffered(sta, tid, false);
         }
@@ -1257,7 +1261,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
         u8 skb_freed = 0;
         u16 next_reclaimed, seq_ctl;
         bool is_ndp = false;
-       bool txq_agg = false; /* Is this TXQ aggregated */
  
         __skb_queue_head_init(&skbs);
  
@@ -1283,6 +1286,10 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
                         info->flags |= IEEE80211_TX_STAT_ACK;
                         break;
                 case TX_STATUS_FAIL_DEST_PS:
+                       /* In DQA, the FW should have stopped the queue and not
+                        * return this status
+                        */
+                       WARN_ON(iwl_mvm_is_dqa_supported(mvm));
                         info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
                         break;
                 default:
@@ -1387,15 +1394,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
                         bool send_eosp_ndp = false;
  
                         spin_lock_bh(&mvmsta->lock);
-                       if (iwl_mvm_is_dqa_supported(mvm)) {
-                               enum iwl_mvm_agg_state state;
-
-                               state = mvmsta->tid_data[tid].state;
-                               txq_agg = (state == IWL_AGG_ON ||
-                                       state == IWL_EMPTYING_HW_QUEUE_DELBA);
-                       } else {
-                               txq_agg = txq_id >= mvm->first_agg_queue;
-                       }
  
                         if (!is_ndp) {
                                 tid_data->next_reclaimed = next_reclaimed;
@@ -1452,11 +1450,11 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
          * If the txq is not an AMPDU queue, there is no chance we freed
          * several skbs. Check that out...
          */
-       if (txq_agg)
+       if (iwl_mvm_is_dqa_supported(mvm) || txq_id >= mvm->first_agg_queue)
                 goto out;
  
         /* We can't free more than one frame at once on a shared queue */
-       WARN_ON(!iwl_mvm_is_dqa_supported(mvm) && (skb_freed > 1));
+       WARN_ON(skb_freed > 1);
  
         /* If we have still frames for this STA nothing to do here */
         if (!atomic_sub_and_test(skb_freed, &mvm->pending_frames[sta_id]))
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c

index 5ebca1d0cfc750969793c26ac5f37e858e897fc4..b62e03d11c2e27c240d02a4e7813958da562aca8 100644 (file)
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -57,8 +57,8 @@ MODULE_PARM_DESC(mfg_mode, "manufacturing mode enable:1, disable:0");
   * In case of any errors during inittialization, this function also ensures
   * proper cleanup before exiting.
   */
-static int mwifiex_register(void *card, struct mwifiex_if_ops *if_ops,
-                           void **padapter)
+static int mwifiex_register(void *card, struct device *dev,
+                           struct mwifiex_if_ops *if_ops, void **padapter)
  {
         struct mwifiex_adapter *adapter;
         int i;
@@ -68,6 +68,7 @@ static int mwifiex_register(void *card, struct mwifiex_if_ops *if_ops,
                 return -ENOMEM;
  
         *padapter = adapter;
+       adapter->dev = dev;
         adapter->card = card;
  
         /* Save interface specific operations in adapter */
@@ -1568,12 +1569,11 @@ mwifiex_add_card(void *card, struct completion *fw_done,
  {
         struct mwifiex_adapter *adapter;
  
-       if (mwifiex_register(card, if_ops, (void **)&adapter)) {
+       if (mwifiex_register(card, dev, if_ops, (void **)&adapter)) {
                 pr_err("%s: software init failed\n", __func__);
                 goto err_init_sw;
         }
  
-       adapter->dev = dev;
         mwifiex_probe_of(adapter);
  
         adapter->iface_type = iface_type;
@@ -1718,6 +1718,9 @@ int mwifiex_remove_card(struct mwifiex_adapter *adapter)
         wiphy_unregister(adapter->wiphy);
         wiphy_free(adapter->wiphy);
  
+       if (adapter->irq_wakeup >= 0)
+               device_init_wakeup(adapter->dev, false);
+
         /* Unregister device */
         mwifiex_dbg(adapter, INFO,
                     "info: unregister device\n");
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c

index a0d918094889df6cd9de14046b773d6112b2006b..b8c990d10d6ecb11491cbf5ed345ecbdd7dc0359 100644 (file)
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
@@ -2739,6 +2739,21 @@ static void mwifiex_pcie_device_dump(struct mwifiex_adapter *adapter)
         schedule_work(&card->work);
  }
  
+static void mwifiex_pcie_free_buffers(struct mwifiex_adapter *adapter)
+{
+       struct pcie_service_card *card = adapter->card;
+       const struct mwifiex_pcie_card_reg *reg = card->pcie.reg;
+
+       if (reg->sleep_cookie)
+               mwifiex_pcie_delete_sleep_cookie_buf(adapter);
+
+       mwifiex_pcie_delete_cmdrsp_buf(adapter);
+       mwifiex_pcie_delete_evtbd_ring(adapter);
+       mwifiex_pcie_delete_rxbd_ring(adapter);
+       mwifiex_pcie_delete_txbd_ring(adapter);
+       card->cmdrsp_buf = NULL;
+}
+
  /*
   * This function initializes the PCI-E host memory space, WCB rings, etc.
   *
@@ -2850,13 +2865,6 @@ err_enable_dev:
  
  /*
   * This function cleans up the allocated card buffers.
- *
- * The following are freed by this function -
- *      - TXBD ring buffers
- *      - RXBD ring buffers
- *      - Event BD ring buffers
- *      - Command response ring buffer
- *      - Sleep cookie buffer
   */
  static void mwifiex_cleanup_pcie(struct mwifiex_adapter *adapter)
  {
@@ -2875,6 +2883,8 @@ static void mwifiex_cleanup_pcie(struct mwifiex_adapter *adapter)
                                     "Failed to write driver not-ready signature\n");
         }
  
+       mwifiex_pcie_free_buffers(adapter);
+
         if (pdev) {
                 pci_iounmap(pdev, card->pci_mmap);
                 pci_iounmap(pdev, card->pci_mmap1);
@@ -3126,10 +3136,7 @@ err_cre_txbd:
         pci_iounmap(pdev, card->pci_mmap1);
  }
  
-/* This function cleans up the PCI-E host memory space.
- * Some code is extracted from mwifiex_unregister_dev()
- *
- */
+/* This function cleans up the PCI-E host memory space. */
  static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter)
  {
         struct pcie_service_card *card = adapter->card;
@@ -3140,14 +3147,7 @@ static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter)
  
         adapter->seq_num = 0;
  
-       if (reg->sleep_cookie)
-               mwifiex_pcie_delete_sleep_cookie_buf(adapter);
-
-       mwifiex_pcie_delete_cmdrsp_buf(adapter);
-       mwifiex_pcie_delete_evtbd_ring(adapter);
-       mwifiex_pcie_delete_rxbd_ring(adapter);
-       mwifiex_pcie_delete_txbd_ring(adapter);
-       card->cmdrsp_buf = NULL;
+       mwifiex_pcie_free_buffers(adapter);
  }
  
  static struct mwifiex_if_ops pcie_ops = {
diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c

index caea350f05aac7b2e3dc7137b0b4363abcd8c2d4..bdc379178e87955c5456028a43657f97862670af 100644 (file)
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -1742,12 +1742,14 @@ void rtl_c2hcmd_enqueue(struct ieee80211_hw *hw, u8 tag, u8 len, u8 *val)
         unsigned long flags;
         struct rtl_c2hcmd *c2hcmd;
  
-       c2hcmd = kmalloc(sizeof(*c2hcmd), GFP_KERNEL);
+       c2hcmd = kmalloc(sizeof(*c2hcmd),
+                        in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
  
         if (!c2hcmd)
                 goto label_err;
  
-       c2hcmd->val = kmalloc(len, GFP_KERNEL);
+       c2hcmd->val = kmalloc(len,
+                             in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
  
         if (!c2hcmd->val)
                 goto label_err2;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c

index 829b26cd4549a4e07ccdf30ea87d902424ce737b..8397f6c9245158e8b3ff005bc58a419e4250169d 100644 (file)
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -165,13 +165,17 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
  {
         struct xenvif *vif = netdev_priv(dev);
         struct xenvif_queue *queue = NULL;
-       unsigned int num_queues = vif->num_queues;
+       unsigned int num_queues;
         u16 index;
         struct xenvif_rx_cb *cb;
  
         BUG_ON(skb->dev != dev);
  
-       /* Drop the packet if queues are not set up */
+       /* Drop the packet if queues are not set up.
+        * This handler should be called inside an RCU read section
+        * so we don't need to enter it here explicitly.
+        */
+       num_queues = READ_ONCE(vif->num_queues);
         if (num_queues < 1)
                 goto drop;
  
@@ -222,18 +226,18 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
  {
         struct xenvif *vif = netdev_priv(dev);
         struct xenvif_queue *queue = NULL;
+       unsigned int num_queues;
         u64 rx_bytes = 0;
         u64 rx_packets = 0;
         u64 tx_bytes = 0;
         u64 tx_packets = 0;
         unsigned int index;
  
-       spin_lock(&vif->lock);
-       if (vif->queues == NULL)
-               goto out;
+       rcu_read_lock();
+       num_queues = READ_ONCE(vif->num_queues);
  
         /* Aggregate tx and rx stats from each queue */
-       for (index = 0; index < vif->num_queues; ++index) {
+       for (index = 0; index < num_queues; ++index) {
                 queue = &vif->queues[index];
                 rx_bytes += queue->stats.rx_bytes;
                 rx_packets += queue->stats.rx_packets;
@@ -241,8 +245,7 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
                 tx_packets += queue->stats.tx_packets;
         }
  
-out:
-       spin_unlock(&vif->lock);
+       rcu_read_unlock();
  
         vif->dev->stats.rx_bytes = rx_bytes;
         vif->dev->stats.rx_packets = rx_packets;
@@ -378,10 +381,13 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
                                      struct ethtool_stats *stats, u64 * data)
  {
         struct xenvif *vif = netdev_priv(dev);
-       unsigned int num_queues = vif->num_queues;
+       unsigned int num_queues;
         int i;
         unsigned int queue_index;
  
+       rcu_read_lock();
+       num_queues = READ_ONCE(vif->num_queues);
+
         for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
                 unsigned long accum = 0;
                 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
@@ -390,6 +396,8 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
                 }
                 data[i] = accum;
         }
+
+       rcu_read_unlock();
  }
  
  static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c

index f9bcf4a665bcaebc4f33bd28849cef2dadc4f698..602d408fa25e98a4651716b1390d2507bced4605 100644 (file)
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -214,7 +214,7 @@ static void xenvif_fatal_tx_err(struct xenvif *vif)
         netdev_err(vif->dev, "fatal error; disabling device\n");
         vif->disabled = true;
         /* Disable the vif from queue 0's kthread */
-       if (vif->queues)
+       if (vif->num_queues)
                 xenvif_kick_thread(&vif->queues[0]);
  }
  
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c

index d2d7cd9145b1c259a1f0f11414acca706e67bb7a..a56d3eab35dd650c4acfcda9e981c0220cba9e61 100644 (file)
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -495,26 +495,26 @@ static void backend_disconnect(struct backend_info *be)
         struct xenvif *vif = be->vif;
  
         if (vif) {
+               unsigned int num_queues = vif->num_queues;
                 unsigned int queue_index;
-               struct xenvif_queue *queues;
  
                 xen_unregister_watchers(vif);
  #ifdef CONFIG_DEBUG_FS
                 xenvif_debugfs_delif(vif);
  #endif /* CONFIG_DEBUG_FS */
                 xenvif_disconnect_data(vif);
-               for (queue_index = 0;
-                    queue_index < vif->num_queues;
-                    ++queue_index)
-                       xenvif_deinit_queue(&vif->queues[queue_index]);
  
-               spin_lock(&vif->lock);
-               queues = vif->queues;
+               /* At this point some of the handlers may still be active
+                * so we need to have additional synchronization here.
+                */
                 vif->num_queues = 0;
-               vif->queues = NULL;
-               spin_unlock(&vif->lock);
+               synchronize_net();
  
-               vfree(queues);
+               for (queue_index = 0; queue_index < num_queues; ++queue_index)
+                       xenvif_deinit_queue(&vif->queues[queue_index]);
+
+               vfree(vif->queues);
+               vif->queues = NULL;
  
                 xenvif_disconnect_ctrl(vif);
         }
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c

index 23d4a1728cdfa4993903b6b4504c6a70f5a7fff4..351bac8f65031edf831741159c01e860c89bb4a5 100644 (file)
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -934,8 +934,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
         rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL);
         if (rc < 0)
                 goto out_unlock;
+       nvdimm_bus_unlock(&nvdimm_bus->dev);
+
         if (copy_to_user(p, buf, buf_len))
                 rc = -EFAULT;
+
+       vfree(buf);
+       return rc;
+
   out_unlock:
         nvdimm_bus_unlock(&nvdimm_bus->dev);
   out:
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c

index b3323c0697f6239ebbfe757137cde8352fe3c480..ca6d572c48fcb62d7c8a83e4874f18b625caaeb8 100644 (file)
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -243,7 +243,15 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
         }
  
         if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
-               if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)) {
+               /*
+                * FIXME: nsio_rw_bytes() may be called from atomic
+                * context in the btt case and nvdimm_clear_poison()
+                * takes a sleeping lock. Until the locking can be
+                * reworked this capability requires that the namespace
+                * is not claimed by btt.
+                */
+               if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
+                               && (!ndns->claim || !is_nd_btt(ndns->claim))) {
                         long cleared;
  
                         cleared = nvdimm_clear_poison(&ndns->dev, offset, size);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c

index 0eedc49e0d473ed36b5ef9832760aa8498b9f146..8b721321be5b1cb291e780ae9a5ed7ea5ad67e09 100644 (file)
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(nvdimm_create);
  
  int alias_dpa_busy(struct device *dev, void *data)
  {
-       resource_size_t map_end, blk_start, new, busy;
+       resource_size_t map_end, blk_start, new;
         struct blk_alloc_info *info = data;
         struct nd_mapping *nd_mapping;
         struct nd_region *nd_region;
@@ -436,29 +436,19 @@ int alias_dpa_busy(struct device *dev, void *data)
   retry:
         /*
          * Find the free dpa from the end of the last pmem allocation to
-        * the end of the interleave-set mapping that is not already
-        * covered by a blk allocation.
+        * the end of the interleave-set mapping.
          */
-       busy = 0;
         for_each_dpa_resource(ndd, res) {
+               if (strncmp(res->name, "pmem", 4) != 0)
+                       continue;
                 if ((res->start >= blk_start && res->start < map_end)
                                 || (res->end >= blk_start
                                         && res->end <= map_end)) {
-                       if (strncmp(res->name, "pmem", 4) == 0) {
-                               new = max(blk_start, min(map_end + 1,
-                                                       res->end + 1));
-                               if (new != blk_start) {
-                                       blk_start = new;
-                                       goto retry;
-                               }
-                       } else
-                               busy += min(map_end, res->end)
-                                       - max(nd_mapping->start, res->start) + 1;
-               } else if (nd_mapping->start > res->start
-                               && map_end < res->end) {
-                       /* total eclipse of the PMEM region mapping */
-                       busy += nd_mapping->size;
-                       break;
+                       new = max(blk_start, min(map_end + 1, res->end + 1));
+                       if (new != blk_start) {
+                               blk_start = new;
+                               goto retry;
+                       }
                 }
         }
  
@@ -470,52 +460,11 @@ int alias_dpa_busy(struct device *dev, void *data)
                 return 1;
         }
  
-       info->available -= blk_start - nd_mapping->start + busy;
+       info->available -= blk_start - nd_mapping->start;
  
         return 0;
  }
  
-static int blk_dpa_busy(struct device *dev, void *data)
-{
-       struct blk_alloc_info *info = data;
-       struct nd_mapping *nd_mapping;
-       struct nd_region *nd_region;
-       resource_size_t map_end;
-       int i;
-
-       if (!is_nd_pmem(dev))
-               return 0;
-
-       nd_region = to_nd_region(dev);
-       for (i = 0; i < nd_region->ndr_mappings; i++) {
-               nd_mapping  = &nd_region->mapping[i];
-               if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
-                       break;
-       }
-
-       if (i >= nd_region->ndr_mappings)
-               return 0;
-
-       map_end = nd_mapping->start + nd_mapping->size - 1;
-       if (info->res->start >= nd_mapping->start
-                       && info->res->start < map_end) {
-               if (info->res->end <= map_end) {
-                       info->busy = 0;
-                       return 1;
-               } else {
-                       info->busy -= info->res->end - map_end;
-                       return 0;
-               }
-       } else if (info->res->end >= nd_mapping->start
-                       && info->res->end <= map_end) {
-               info->busy -= nd_mapping->start - info->res->start;
-               return 0;
-       } else {
-               info->busy -= nd_mapping->size;
-               return 0;
-       }
-}
-
  /**
   * nd_blk_available_dpa - account the unused dpa of BLK region
   * @nd_mapping: container of dpa-resource-root + labels
@@ -545,11 +494,7 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
         for_each_dpa_resource(ndd, res) {
                 if (strncmp(res->name, "blk", 3) != 0)
                         continue;
-
-               info.res = res;
-               info.busy = resource_size(res);
-               device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
-               info.available -= info.busy;
+               info.available -= resource_size(res);
         }
  
         return info.available;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c

index 9b3b57fef446dc753c966c90fc2529bc9f846dd8..d5e0906262ead5dd9f202385d7ace81444c25a0f 100644 (file)
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -49,10 +49,9 @@ unsigned char shutdown_timeout = 5;
  module_param(shutdown_timeout, byte, 0644);
  MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
  
-unsigned int nvme_max_retries = 5;
-module_param_named(max_retries, nvme_max_retries, uint, 0644);
+static u8 nvme_max_retries = 5;
+module_param_named(max_retries, nvme_max_retries, byte, 0644);
  MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
-EXPORT_SYMBOL_GPL(nvme_max_retries);
  
  static int nvme_char_major;
  module_param(nvme_char_major, int, 0);
@@ -62,11 +61,66 @@ module_param(default_ps_max_latency_us, ulong, 0644);
  MODULE_PARM_DESC(default_ps_max_latency_us,
                  "max power saving latency for new devices; use PM QOS to change per device");
  
+static bool force_apst;
+module_param(force_apst, bool, 0644);
+MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
+
  static LIST_HEAD(nvme_ctrl_list);
  static DEFINE_SPINLOCK(dev_list_lock);
  
  static struct class *nvme_class;
  
+static int nvme_error_status(struct request *req)
+{
+       switch (nvme_req(req)->status & 0x7ff) {
+       case NVME_SC_SUCCESS:
+               return 0;
+       case NVME_SC_CAP_EXCEEDED:
+               return -ENOSPC;
+       default:
+               return -EIO;
+
+       /*
+        * XXX: these errors are a nasty side-band protocol to
+        * drivers/md/dm-mpath.c:noretry_error() that aren't documented
+        * anywhere..
+        */
+       case NVME_SC_CMD_SEQ_ERROR:
+               return -EILSEQ;
+       case NVME_SC_ONCS_NOT_SUPPORTED:
+               return -EOPNOTSUPP;
+       case NVME_SC_WRITE_FAULT:
+       case NVME_SC_READ_ERROR:
+       case NVME_SC_UNWRITTEN_BLOCK:
+               return -ENODATA;
+       }
+}
+
+static inline bool nvme_req_needs_retry(struct request *req)
+{
+       if (blk_noretry_request(req))
+               return false;
+       if (nvme_req(req)->status & NVME_SC_DNR)
+               return false;
+       if (jiffies - req->start_time >= req->timeout)
+               return false;
+       if (nvme_req(req)->retries >= nvme_max_retries)
+               return false;
+       return true;
+}
+
+void nvme_complete_rq(struct request *req)
+{
+       if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
+               nvme_req(req)->retries++;
+               blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
+               return;
+       }
+
+       blk_mq_end_request(req, nvme_error_status(req));
+}
+EXPORT_SYMBOL_GPL(nvme_complete_rq);
+
  void nvme_cancel_request(struct request *req, void *data, bool reserved)
  {
         int status;
@@ -80,7 +134,9 @@ void nvme_cancel_request(struct request *req, void *data, bool reserved)
         status = NVME_SC_ABORT_REQ;
         if (blk_queue_dying(req->q))
                 status |= NVME_SC_DNR;
-       blk_mq_complete_request(req, status);
+       nvme_req(req)->status = status;
+       blk_mq_complete_request(req);
+
  }
  EXPORT_SYMBOL_GPL(nvme_cancel_request);
  
@@ -205,12 +261,6 @@ fail:
         return NULL;
  }
  
-void nvme_requeue_req(struct request *req)
-{
-       blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
-}
-EXPORT_SYMBOL_GPL(nvme_requeue_req);
-
  struct request *nvme_alloc_request(struct request_queue *q,
                 struct nvme_command *cmd, unsigned int flags, int qid)
  {
@@ -270,7 +320,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
         memset(cmnd, 0, sizeof(*cmnd));
         cmnd->dsm.opcode = nvme_cmd_dsm;
         cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
-       cmnd->dsm.nr = segments - 1;
+       cmnd->dsm.nr = cpu_to_le32(segments - 1);
         cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
  
         req->special_vec.bv_page = virt_to_page(range);
@@ -327,6 +377,12 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
  {
         int ret = BLK_MQ_RQ_QUEUE_OK;
  
+       if (!(req->rq_flags & RQF_DONTPREP)) {
+               nvme_req(req)->retries = 0;
+               nvme_req(req)->flags = 0;
+               req->rq_flags |= RQF_DONTPREP;
+       }
+
         switch (req_op(req)) {
         case REQ_OP_DRV_IN:
         case REQ_OP_DRV_OUT:
@@ -335,6 +391,8 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
         case REQ_OP_FLUSH:
                 nvme_setup_flush(ns, cmd);
                 break;
+       case REQ_OP_WRITE_ZEROES:
+               /* currently only aliased to deallocate for a few ctrls: */
         case REQ_OP_DISCARD:
                 ret = nvme_setup_discard(ns, req, cmd);
                 break;
@@ -378,7 +436,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
         blk_execute_rq(req->q, NULL, req, at_head);
         if (result)
                 *result = nvme_req(req)->result;
-       ret = req->errors;
+       if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+               ret = -EINTR;
+       else
+               ret = nvme_req(req)->status;
   out:
         blk_mq_free_request(req);
         return ret;
@@ -463,7 +524,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
         }
   submit:
         blk_execute_rq(req->q, disk, req, 0);
-       ret = req->errors;
+       if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+               ret = -EINTR;
+       else
+               ret = nvme_req(req)->status;
         if (result)
                 *result = le32_to_cpu(nvme_req(req)->result.u32);
         if (meta && !ret && !write) {
@@ -900,16 +964,14 @@ static void nvme_config_discard(struct nvme_ns *ns)
         BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                         NVME_DSM_MAX_RANGES);
  
-       if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES)
-               ns->queue->limits.discard_zeroes_data = 1;
-       else
-               ns->queue->limits.discard_zeroes_data = 0;
-
         ns->queue->limits.discard_alignment = logical_block_size;
         ns->queue->limits.discard_granularity = logical_block_size;
         blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
         blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+
+       if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
+               blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX);
  }
  
  static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
@@ -1267,7 +1329,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
          * heuristic: we are willing to spend at most 2% of the time
          * transitioning between power states.  Therefore, when running
          * in any given state, we will enter the next lower-power
-        * non-operational state after waiting 100 * (enlat + exlat)
+        * non-operational state after waiting 50 * (enlat + exlat)
          * microseconds, as long as that state's total latency is under
          * the requested maximum latency.
          *
@@ -1278,6 +1340,8 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
  
         unsigned apste;
         struct nvme_feat_auto_pst *table;
+       u64 max_lat_us = 0;
+       int max_ps = -1;
         int ret;
  
         /*
@@ -1299,6 +1363,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
         if (ctrl->ps_max_latency_us == 0) {
                 /* Turn off APST. */
                 apste = 0;
+               dev_dbg(ctrl->device, "APST disabled\n");
         } else {
                 __le64 target = cpu_to_le64(0);
                 int state;
@@ -1315,6 +1380,14 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
                         if (target)
                                 table->entries[state] = target;
  
+                       /*
+                        * Don't allow transitions to the deepest state
+                        * if it's quirked off.
+                        */
+                       if (state == ctrl->npss &&
+                           (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS))
+                               continue;
+
                         /*
                          * Is this state a useful non-operational state for
                          * higher-power states to autonomously transition to?
@@ -1340,9 +1413,22 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
  
                         target = cpu_to_le64((state << 3) |
                                              (transition_ms << 8));
+
+                       if (max_ps == -1)
+                               max_ps = state;
+
+                       if (total_latency_us > max_lat_us)
+                               max_lat_us = total_latency_us;
                 }
  
                 apste = 1;
+
+               if (max_ps == -1) {
+                       dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n");
+               } else {
+                       dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n",
+                               max_ps, max_lat_us, (int)sizeof(*table), table);
+               }
         }
  
         ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste,
@@ -1387,16 +1473,15 @@ struct nvme_core_quirk_entry {
  };
  
  static const struct nvme_core_quirk_entry core_quirks[] = {
-       /*
-        * Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes
-        * the controller to go out to lunch.  It dies when the watchdog
-        * timer reads CSTS and gets 0xffffffff.
-        */
         {
-               .vid = 0x144d,
-               .fr = "BXW75D0Q",
+               /*
+                * This Toshiba device seems to die using any APST states.  See:
+                * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678184/comments/11
+                */
+               .vid = 0x1179,
+               .mn = "THNSF5256GPUK TOSHIBA",
                 .quirks = NVME_QUIRK_NO_APST,
-       },
+       }
  };
  
  /* match is null-terminated but idstr is space-padded. */
@@ -1481,6 +1566,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
                 }
         }
  
+       if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
+               dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
+               ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
+       }
+
         ctrl->oacs = le16_to_cpu(id->oacs);
         ctrl->vid = le16_to_cpu(id->vid);
         ctrl->oncs = le16_to_cpup(&id->oncs);
@@ -1503,7 +1593,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
  
         ctrl->npss = id->npss;
         prev_apsta = ctrl->apsta;
-       ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta;
+       if (ctrl->quirks & NVME_QUIRK_NO_APST) {
+               if (force_apst && id->apsta) {
+                       dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
+                       ctrl->apsta = 1;
+               } else {
+                       ctrl->apsta = 0;
+               }
+       } else {
+               ctrl->apsta = id->apsta;
+       }
         memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
  
         if (ctrl->ops->is_fabrics) {
@@ -2386,7 +2485,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
  
         mutex_lock(&ctrl->namespaces_mutex);
         list_for_each_entry(ns, &ctrl->namespaces, list)
-               blk_mq_freeze_queue_start(ns->queue);
+               blk_freeze_queue_start(ns->queue);
         mutex_unlock(&ctrl->namespaces_mutex);
  }
  EXPORT_SYMBOL_GPL(nvme_start_freeze);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c

index 5b7386f69f4de5571112bcc504134c8d99744793..990e6fb32a636201078da585372d6ddfed97c9bd 100644 (file)
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -471,6 +471,16 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
  }
  EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
  
+bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
+{
+       if (ctrl->opts->max_reconnects != -1 &&
+           ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects)
+               return true;
+
+       return false;
+}
+EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
+
  /**
   * nvmf_register_transport() - NVMe Fabrics Library registration function.
   * @ops:       Transport ops instance to be registered to the
@@ -533,6 +543,7 @@ static const match_table_t opt_tokens = {
         { NVMF_OPT_QUEUE_SIZE,          "queue_size=%d"         },
         { NVMF_OPT_NR_IO_QUEUES,        "nr_io_queues=%d"       },
         { NVMF_OPT_RECONNECT_DELAY,     "reconnect_delay=%d"    },
+       { NVMF_OPT_CTRL_LOSS_TMO,       "ctrl_loss_tmo=%d"      },
         { NVMF_OPT_KATO,                "keep_alive_tmo=%d"     },
         { NVMF_OPT_HOSTNQN,             "hostnqn=%s"            },
         { NVMF_OPT_HOST_TRADDR,         "host_traddr=%s"        },
@@ -546,6 +557,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
         char *options, *o, *p;
         int token, ret = 0;
         size_t nqnlen  = 0;
+       int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
  
         /* Set defaults */
         opts->queue_size = NVMF_DEF_QUEUE_SIZE;
@@ -655,6 +667,16 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                         }
                         opts->kato = token;
                         break;
+               case NVMF_OPT_CTRL_LOSS_TMO:
+                       if (match_int(args, &token)) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+
+                       if (token < 0)
+                               pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
+                       ctrl_loss_tmo = token;
+                       break;
                 case NVMF_OPT_HOSTNQN:
                         if (opts->host) {
                                 pr_err("hostnqn already user-assigned: %s\n",
@@ -710,6 +732,12 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                 }
         }
  
+       if (ctrl_loss_tmo < 0)
+               opts->max_reconnects = -1;
+       else
+               opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
+                                               opts->reconnect_delay);
+
         if (!opts->host) {
                 kref_get(&nvmf_default_host->ref);
                 opts->host = nvmf_default_host;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h

index 156018182ce43bbf70fe34fa1ff71b6df2e4456b..f5a9c1fb186f2d5278ecd72eeaf93b1b7d75e6e8 100644 (file)
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -21,6 +21,8 @@
  #define NVMF_MAX_QUEUE_SIZE    1024
  #define NVMF_DEF_QUEUE_SIZE    128
  #define NVMF_DEF_RECONNECT_DELAY       10
+/* default to 600 seconds of reconnect attempts before giving up */
+#define NVMF_DEF_CTRL_LOSS_TMO         600
  
  /*
   * Define a host as seen by the target.  We allocate one at boot, but also
@@ -53,6 +55,7 @@ enum {
         NVMF_OPT_HOSTNQN        = 1 << 8,
         NVMF_OPT_RECONNECT_DELAY = 1 << 9,
         NVMF_OPT_HOST_TRADDR    = 1 << 10,
+       NVMF_OPT_CTRL_LOSS_TMO  = 1 << 11,
  };
  
  /**
@@ -77,6 +80,10 @@ enum {
   * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
   * @kato:      Keep-alive timeout.
   * @host:      Virtual NVMe host, contains the NQN and Host ID.
+ * @nr_reconnects: number of reconnect attempted since the last ctrl failure
+ * @max_reconnects: maximum number of allowed reconnect attempts before removing
+ *              the controller, (-1) means reconnect forever, zero means remove
+ *              immediately;
   */
  struct nvmf_ctrl_options {
         unsigned                mask;
@@ -91,6 +98,8 @@ struct nvmf_ctrl_options {
         bool                    discovery_nqn;
         unsigned int            kato;
         struct nvmf_host        *host;
+       int                     nr_reconnects;
+       int                     max_reconnects;
  };
  
  /*
@@ -133,5 +142,6 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
  void nvmf_free_options(struct nvmf_ctrl_options *opts);
  const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
  int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
+bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
  
  #endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c

index 9690beb15e69ab47bb04345da5f142ec56141035..4976db56e351901dcc2da7a56312105389b82276 100644 (file)
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -19,6 +19,7 @@
  #include <linux/parser.h>
  #include <uapi/scsi/fc/fc_fs.h>
  #include <uapi/scsi/fc/fc_els.h>
+#include <linux/delay.h>
  
  #include "nvme.h"
  #include "fabrics.h"
@@ -44,6 +45,8 @@ enum nvme_fc_queue_flags {
  
  #define NVMEFC_QUEUE_DELAY     3               /* ms units */
  
+#define NVME_FC_MAX_CONNECT_ATTEMPTS   1
+
  struct nvme_fc_queue {
         struct nvme_fc_ctrl     *ctrl;
         struct device           *dev;
@@ -61,16 +64,24 @@ struct nvme_fc_queue {
         unsigned long           flags;
  } __aligned(sizeof(u64));      /* alignment for other things alloc'd with */
  
+enum nvme_fcop_flags {
+       FCOP_FLAGS_TERMIO       = (1 << 0),
+       FCOP_FLAGS_RELEASED     = (1 << 1),
+       FCOP_FLAGS_COMPLETE     = (1 << 2),
+       FCOP_FLAGS_AEN          = (1 << 3),
+};
+
  struct nvmefc_ls_req_op {
         struct nvmefc_ls_req    ls_req;
  
-       struct nvme_fc_ctrl     *ctrl;
+       struct nvme_fc_rport    *rport;
         struct nvme_fc_queue    *queue;
         struct request          *rq;
+       u32                     flags;
  
         int                     ls_error;
         struct completion       ls_done;
-       struct list_head        lsreq_list;     /* ctrl->ls_req_list */
+       struct list_head        lsreq_list;     /* rport->ls_req_list */
         bool                    req_queued;
  };
  
@@ -79,6 +90,7 @@ enum nvme_fcpop_state {
         FCPOP_STATE_IDLE        = 1,
         FCPOP_STATE_ACTIVE      = 2,
         FCPOP_STATE_ABORTED     = 3,
+       FCPOP_STATE_COMPLETE    = 4,
  };
  
  struct nvme_fc_fcp_op {
@@ -97,6 +109,7 @@ struct nvme_fc_fcp_op {
         struct request          *rq;
  
         atomic_t                state;
+       u32                     flags;
         u32                     rqno;
         u32                     nents;
  
@@ -120,23 +133,24 @@ struct nvme_fc_rport {
  
         struct list_head                endp_list; /* for lport->endp_list */
         struct list_head                ctrl_list;
+       struct list_head                ls_req_list;
+       struct device                   *dev;   /* physical device for dma */
+       struct nvme_fc_lport            *lport;
         spinlock_t                      lock;
         struct kref                     ref;
  } __aligned(sizeof(u64));      /* alignment for other things alloc'd with */
  
-enum nvme_fcctrl_state {
-       FCCTRL_INIT             = 0,
-       FCCTRL_ACTIVE           = 1,
+enum nvme_fcctrl_flags {
+       FCCTRL_TERMIO           = (1 << 0),
  };
  
  struct nvme_fc_ctrl {
         spinlock_t              lock;
         struct nvme_fc_queue    *queues;
-       u32                     queue_count;
-
         struct device           *dev;
         struct nvme_fc_lport    *lport;
         struct nvme_fc_rport    *rport;
+       u32                     queue_count;
         u32                     cnum;
  
         u64                     association_id;
@@ -144,14 +158,19 @@ struct nvme_fc_ctrl {
         u64                     cap;
  
         struct list_head        ctrl_list;      /* rport->ctrl_list */
-       struct list_head        ls_req_list;
  
         struct blk_mq_tag_set   admin_tag_set;
         struct blk_mq_tag_set   tag_set;
  
         struct work_struct      delete_work;
+       struct work_struct      reset_work;
+       struct delayed_work     connect_work;
+       int                     reconnect_delay;
+       int                     connect_attempts;
+
         struct kref             ref;
-       int                     state;
+       u32                     flags;
+       u32                     iocnt;
  
         struct nvme_fc_fcp_op   aen_ops[NVME_FC_NR_AEN_COMMANDS];
  
@@ -419,9 +438,12 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
  
         INIT_LIST_HEAD(&newrec->endp_list);
         INIT_LIST_HEAD(&newrec->ctrl_list);
+       INIT_LIST_HEAD(&newrec->ls_req_list);
         kref_init(&newrec->ref);
         spin_lock_init(&newrec->lock);
         newrec->remoteport.localport = &lport->localport;
+       newrec->dev = lport->dev;
+       newrec->lport = lport;
         newrec->remoteport.private = &newrec[1];
         newrec->remoteport.port_role = pinfo->port_role;
         newrec->remoteport.node_name = pinfo->node_name;
@@ -444,7 +466,6 @@ out_kfree_rport:
  out_reghost_failed:
         *portptr = NULL;
         return ret;
-
  }
  EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport);
  
@@ -487,6 +508,30 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport)
         return kref_get_unless_zero(&rport->ref);
  }
  
+static int
+nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
+{
+       struct nvmefc_ls_req_op *lsop;
+       unsigned long flags;
+
+restart:
+       spin_lock_irqsave(&rport->lock, flags);
+
+       list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) {
+               if (!(lsop->flags & FCOP_FLAGS_TERMIO)) {
+                       lsop->flags |= FCOP_FLAGS_TERMIO;
+                       spin_unlock_irqrestore(&rport->lock, flags);
+                       rport->lport->ops->ls_abort(&rport->lport->localport,
+                                               &rport->remoteport,
+                                               &lsop->ls_req);
+                       goto restart;
+               }
+       }
+       spin_unlock_irqrestore(&rport->lock, flags);
+
+       return 0;
+}
+
  /**
   * nvme_fc_unregister_remoteport - transport entry point called by an
   *                              LLDD to deregister/remove a previously
@@ -522,6 +567,8 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
  
         spin_unlock_irqrestore(&rport->lock, flags);
  
+       nvme_fc_abort_lsops(rport);
+
         nvme_fc_rport_put(rport);
         return 0;
  }
@@ -624,16 +671,16 @@ static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *);
  
  
  static void
-__nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl,
-               struct nvmefc_ls_req_op *lsop)
+__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop)
  {
+       struct nvme_fc_rport *rport = lsop->rport;
         struct nvmefc_ls_req *lsreq = &lsop->ls_req;
         unsigned long flags;
  
-       spin_lock_irqsave(&ctrl->lock, flags);
+       spin_lock_irqsave(&rport->lock, flags);
  
         if (!lsop->req_queued) {
-               spin_unlock_irqrestore(&ctrl->lock, flags);
+               spin_unlock_irqrestore(&rport->lock, flags);
                 return;
         }
  
@@ -641,56 +688,71 @@ __nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl,
  
         lsop->req_queued = false;
  
-       spin_unlock_irqrestore(&ctrl->lock, flags);
+       spin_unlock_irqrestore(&rport->lock, flags);
  
-       fc_dma_unmap_single(ctrl->dev, lsreq->rqstdma,
+       fc_dma_unmap_single(rport->dev, lsreq->rqstdma,
                                   (lsreq->rqstlen + lsreq->rsplen),
                                   DMA_BIDIRECTIONAL);
  
-       nvme_fc_ctrl_put(ctrl);
+       nvme_fc_rport_put(rport);
  }
  
  static int
-__nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl,
+__nvme_fc_send_ls_req(struct nvme_fc_rport *rport,
                 struct nvmefc_ls_req_op *lsop,
                 void (*done)(struct nvmefc_ls_req *req, int status))
  {
         struct nvmefc_ls_req *lsreq = &lsop->ls_req;
         unsigned long flags;
-       int ret;
+       int ret = 0;
  
-       if (!nvme_fc_ctrl_get(ctrl))
+       if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
+               return -ECONNREFUSED;
+
+       if (!nvme_fc_rport_get(rport))
                 return -ESHUTDOWN;
  
         lsreq->done = done;
-       lsop->ctrl = ctrl;
+       lsop->rport = rport;
         lsop->req_queued = false;
         INIT_LIST_HEAD(&lsop->lsreq_list);
         init_completion(&lsop->ls_done);
  
-       lsreq->rqstdma = fc_dma_map_single(ctrl->dev, lsreq->rqstaddr,
+       lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr,
                                   lsreq->rqstlen + lsreq->rsplen,
                                   DMA_BIDIRECTIONAL);
-       if (fc_dma_mapping_error(ctrl->dev, lsreq->rqstdma)) {
-               nvme_fc_ctrl_put(ctrl);
-               dev_err(ctrl->dev,
-                       "els request command failed EFAULT.\n");
-               return -EFAULT;
+       if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) {
+               ret = -EFAULT;
+               goto out_putrport;
         }
         lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen;
  
-       spin_lock_irqsave(&ctrl->lock, flags);
+       spin_lock_irqsave(&rport->lock, flags);
  
-       list_add_tail(&lsop->lsreq_list, &ctrl->ls_req_list);
+       list_add_tail(&lsop->lsreq_list, &rport->ls_req_list);
  
         lsop->req_queued = true;
  
-       spin_unlock_irqrestore(&ctrl->lock, flags);
+       spin_unlock_irqrestore(&rport->lock, flags);
  
-       ret = ctrl->lport->ops->ls_req(&ctrl->lport->localport,
-                                       &ctrl->rport->remoteport, lsreq);
+       ret = rport->lport->ops->ls_req(&rport->lport->localport,
+                                       &rport->remoteport, lsreq);
         if (ret)
-               lsop->ls_error = ret;
+               goto out_unlink;
+
+       return 0;
+
+out_unlink:
+       lsop->ls_error = ret;
+       spin_lock_irqsave(&rport->lock, flags);
+       lsop->req_queued = false;
+       list_del(&lsop->lsreq_list);
+       spin_unlock_irqrestore(&rport->lock, flags);
+       fc_dma_unmap_single(rport->dev, lsreq->rqstdma,
+                                 (lsreq->rqstlen + lsreq->rsplen),
+                                 DMA_BIDIRECTIONAL);
+out_putrport:
+       nvme_fc_rport_put(rport);
  
         return ret;
  }
@@ -705,15 +767,15 @@ nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status)
  }
  
  static int
-nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop)
+nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop)
  {
         struct nvmefc_ls_req *lsreq = &lsop->ls_req;
         struct fcnvme_ls_rjt *rjt = lsreq->rspaddr;
         int ret;
  
-       ret = __nvme_fc_send_ls_req(ctrl, lsop, nvme_fc_send_ls_req_done);
+       ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done);
  
-       if (!ret)
+       if (!ret) {
                 /*
                  * No timeout/not interruptible as we need the struct
                  * to exist until the lldd calls us back. Thus mandate
@@ -722,14 +784,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop)
                  */
                 wait_for_completion(&lsop->ls_done);
  
-       __nvme_fc_finish_ls_req(ctrl, lsop);
+               __nvme_fc_finish_ls_req(lsop);
  
-       if (ret) {
-               dev_err(ctrl->dev,
-                       "ls request command failed (%d).\n", ret);
-               return ret;
+               ret = lsop->ls_error;
         }
  
+       if (ret)
+               return ret;
+
         /* ACC or RJT payload ? */
         if (rjt->w0.ls_cmd == FCNVME_LS_RJT)
                 return -ENXIO;
@@ -737,19 +799,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop)
         return 0;
  }
  
-static void
-nvme_fc_send_ls_req_async(struct nvme_fc_ctrl *ctrl,
+static int
+nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport,
                 struct nvmefc_ls_req_op *lsop,
                 void (*done)(struct nvmefc_ls_req *req, int status))
  {
-       int ret;
-
-       ret = __nvme_fc_send_ls_req(ctrl, lsop, done);
-
         /* don't wait for completion */
  
-       if (ret)
-               done(&lsop->ls_req, ret);
+       return __nvme_fc_send_ls_req(rport, lsop, done);
  }
  
  /* Validation Error indexes into the string table below */
@@ -839,7 +896,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
         lsreq->rsplen = sizeof(*assoc_acc);
         lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
  
-       ret = nvme_fc_send_ls_req(ctrl, lsop);
+       ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
         if (ret)
                 goto out_free_buffer;
  
@@ -848,11 +905,12 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
         /* validate the ACC response */
         if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC)
                 fcret = VERR_LSACC;
-       if (assoc_acc->hdr.desc_list_len !=
+       else if (assoc_acc->hdr.desc_list_len !=
                         fcnvme_lsdesc_len(
                                 sizeof(struct fcnvme_ls_cr_assoc_acc)))
                 fcret = VERR_CR_ASSOC_ACC_LEN;
-       if (assoc_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST))
+       else if (assoc_acc->hdr.rqst.desc_tag !=
+                       cpu_to_be32(FCNVME_LSDESC_RQST))
                 fcret = VERR_LSDESC_RQST;
         else if (assoc_acc->hdr.rqst.desc_len !=
                         fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)))
@@ -946,7 +1004,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
         lsreq->rsplen = sizeof(*conn_acc);
         lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
  
-       ret = nvme_fc_send_ls_req(ctrl, lsop);
+       ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
         if (ret)
                 goto out_free_buffer;
  
@@ -955,10 +1013,10 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
         /* validate the ACC response */
         if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC)
                 fcret = VERR_LSACC;
-       if (conn_acc->hdr.desc_list_len !=
+       else if (conn_acc->hdr.desc_list_len !=
                         fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)))
                 fcret = VERR_CR_CONN_ACC_LEN;
-       if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST))
+       else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST))
                 fcret = VERR_LSDESC_RQST;
         else if (conn_acc->hdr.rqst.desc_len !=
                         fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)))
@@ -997,14 +1055,8 @@ static void
  nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
  {
         struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq);
-       struct nvme_fc_ctrl *ctrl = lsop->ctrl;
  
-       __nvme_fc_finish_ls_req(ctrl, lsop);
-
-       if (status)
-               dev_err(ctrl->dev,
-                       "disconnect assoc ls request command failed (%d).\n",
-                       status);
+       __nvme_fc_finish_ls_req(lsop);
  
         /* fc-nvme iniator doesn't care about success or failure of cmd */
  
@@ -1035,6 +1087,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
         struct fcnvme_ls_disconnect_acc *discon_acc;
         struct nvmefc_ls_req_op *lsop;
         struct nvmefc_ls_req *lsreq;
+       int ret;
  
         lsop = kzalloc((sizeof(*lsop) +
                          ctrl->lport->ops->lsrqst_priv_sz +
@@ -1077,7 +1130,10 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
         lsreq->rsplen = sizeof(*discon_acc);
         lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
  
-       nvme_fc_send_ls_req_async(ctrl, lsop, nvme_fc_disconnect_assoc_done);
+       ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
+                               nvme_fc_disconnect_assoc_done);
+       if (ret)
+               kfree(lsop);
  
         /* only meaningful part to terminating the association */
         ctrl->association_id = 0;
@@ -1086,6 +1142,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
  
  /* *********************** NVME Ctrl Routines **************************** */
  
+static void __nvme_fc_final_op_cleanup(struct request *rq);
  
  static int
  nvme_fc_reinit_request(void *data, struct request *rq)
@@ -1123,21 +1180,84 @@ nvme_fc_exit_request(void *data, struct request *rq,
         return __nvme_fc_exit_request(data, op);
  }
  
+static int
+__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
+{
+       int state;
+
+       state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
+       if (state != FCPOP_STATE_ACTIVE) {
+               atomic_set(&op->state, state);
+               return -ECANCELED;
+       }
+
+       ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
+                                       &ctrl->rport->remoteport,
+                                       op->queue->lldd_handle,
+                                       &op->fcp_req);
+
+       return 0;
+}
+
  static void
-nvme_fc_exit_aen_ops(struct nvme_fc_ctrl *ctrl)
+nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
  {
         struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
-       int i;
+       unsigned long flags;
+       int i, ret;
  
         for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
-               if (atomic_read(&aen_op->state) == FCPOP_STATE_UNINIT)
+               if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE)
                         continue;
-               __nvme_fc_exit_request(ctrl, aen_op);
-               nvme_fc_ctrl_put(ctrl);
+
+               spin_lock_irqsave(&ctrl->lock, flags);
+               if (ctrl->flags & FCCTRL_TERMIO) {
+                       ctrl->iocnt++;
+                       aen_op->flags |= FCOP_FLAGS_TERMIO;
+               }
+               spin_unlock_irqrestore(&ctrl->lock, flags);
+
+               ret = __nvme_fc_abort_op(ctrl, aen_op);
+               if (ret) {
+                       /*
+                        * if __nvme_fc_abort_op failed the io wasn't
+                        * active. Thus this call path is running in
+                        * parallel to the io complete. Treat as non-error.
+                        */
+
+                       /* back out the flags/counters */
+                       spin_lock_irqsave(&ctrl->lock, flags);
+                       if (ctrl->flags & FCCTRL_TERMIO)
+                               ctrl->iocnt--;
+                       aen_op->flags &= ~FCOP_FLAGS_TERMIO;
+                       spin_unlock_irqrestore(&ctrl->lock, flags);
+                       return;
+               }
+       }
+}
+
+static inline int
+__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
+               struct nvme_fc_fcp_op *op)
+{
+       unsigned long flags;
+       bool complete_rq = false;
+
+       spin_lock_irqsave(&ctrl->lock, flags);
+       if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
+               if (ctrl->flags & FCCTRL_TERMIO)
+                       ctrl->iocnt--;
         }
+       if (op->flags & FCOP_FLAGS_RELEASED)
+               complete_rq = true;
+       else
+               op->flags |= FCOP_FLAGS_COMPLETE;
+       spin_unlock_irqrestore(&ctrl->lock, flags);
+
+       return complete_rq;
  }
  
-void
+static void
  nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
  {
         struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req);
@@ -1146,7 +1266,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
         struct nvme_fc_ctrl *ctrl = op->ctrl;
         struct nvme_fc_queue *queue = op->queue;
         struct nvme_completion *cqe = &op->rsp_iu.cqe;
-       u16 status;
+       struct nvme_command *sqe = &op->cmd_iu.sqe;
+       __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
+       union nvme_result result;
+       bool complete_rq;
  
         /*
          * WARNING:
@@ -1181,9 +1304,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
                                 sizeof(op->rsp_iu), DMA_FROM_DEVICE);
  
         if (atomic_read(&op->state) == FCPOP_STATE_ABORTED)
-               status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
-       else
-               status = freq->status;
+               status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
+       else if (freq->status)
+               status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
  
         /*
          * For the linux implementation, if we have an unsuccesful
@@ -1211,10 +1334,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
                  */
                 if (freq->transferred_length !=
                         be32_to_cpu(op->cmd_iu.data_len)) {
-                       status = -EIO;
+                       status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
                         goto done;
                 }
-               op->nreq.result.u64 = 0;
+               result.u64 = 0;
                 break;
  
         case sizeof(struct nvme_fc_ersp_iu):
@@ -1226,28 +1349,40 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
                                         (freq->rcv_rsplen / 4) ||
                              be32_to_cpu(op->rsp_iu.xfrd_len) !=
                                         freq->transferred_length ||
-                            op->rqno != le16_to_cpu(cqe->command_id))) {
-                       status = -EIO;
+                            op->rsp_iu.status_code ||
+                            sqe->common.command_id != cqe->command_id)) {
+                       status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
                         goto done;
                 }
-               op->nreq.result = cqe->result;
-               status = le16_to_cpu(cqe->status) >> 1;
+               result = cqe->result;
+               status = cqe->status;
                 break;
  
         default:
-               status = -EIO;
+               status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
                 goto done;
         }
  
  done:
-       if (!queue->qnum && op->rqno >= AEN_CMDID_BASE) {
-               nvme_complete_async_event(&queue->ctrl->ctrl, status,
-                                       &op->nreq.result);
+       if (op->flags & FCOP_FLAGS_AEN) {
+               nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
+               complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
+               atomic_set(&op->state, FCPOP_STATE_IDLE);
+               op->flags = FCOP_FLAGS_AEN;     /* clear other flags */
                 nvme_fc_ctrl_put(ctrl);
                 return;
         }
  
-       blk_mq_complete_request(rq, status);
+       complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
+       if (!complete_rq) {
+               if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
+                       status = cpu_to_le16(NVME_SC_ABORT_REQ);
+                       if (blk_queue_dying(rq->q))
+                               status |= cpu_to_le16(NVME_SC_DNR);
+               }
+               nvme_end_request(rq, status, result);
+       } else
+               __nvme_fc_final_op_cleanup(rq);
  }
  
  static int
@@ -1328,25 +1463,55 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
         struct nvme_fc_fcp_op *aen_op;
         struct nvme_fc_cmd_iu *cmdiu;
         struct nvme_command *sqe;
+       void *private;
         int i, ret;
  
         aen_op = ctrl->aen_ops;
         for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+               private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
+                                               GFP_KERNEL);
+               if (!private)
+                       return -ENOMEM;
+
                 cmdiu = &aen_op->cmd_iu;
                 sqe = &cmdiu->sqe;
                 ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0],
                                 aen_op, (struct request *)NULL,
                                 (AEN_CMDID_BASE + i));
-               if (ret)
+               if (ret) {
+                       kfree(private);
                         return ret;
+               }
+
+               aen_op->flags = FCOP_FLAGS_AEN;
+               aen_op->fcp_req.first_sgl = NULL; /* no sg list */
+               aen_op->fcp_req.private = private;
  
                 memset(sqe, 0, sizeof(*sqe));
                 sqe->common.opcode = nvme_admin_async_event;
+               /* Note: core layer may overwrite the sqe.command_id value */
                 sqe->common.command_id = AEN_CMDID_BASE + i;
         }
         return 0;
  }
  
+static void
+nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl)
+{
+       struct nvme_fc_fcp_op *aen_op;
+       int i;
+
+       aen_op = ctrl->aen_ops;
+       for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+               if (!aen_op->fcp_req.private)
+                       continue;
+
+               __nvme_fc_exit_request(ctrl, aen_op);
+
+               kfree(aen_op->fcp_req.private);
+               aen_op->fcp_req.private = NULL;
+       }
+}
  
  static inline void
  __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl,
@@ -1445,15 +1610,6 @@ __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl,
         queue->lldd_handle = NULL;
  }
  
-static void
-nvme_fc_destroy_admin_queue(struct nvme_fc_ctrl *ctrl)
-{
-       __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
-       blk_cleanup_queue(ctrl->ctrl.admin_q);
-       blk_mq_free_tag_set(&ctrl->admin_tag_set);
-       nvme_fc_free_queue(&ctrl->queues[0]);
-}
-
  static void
  nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl)
  {
@@ -1541,19 +1697,27 @@ nvme_fc_ctrl_free(struct kref *ref)
                 container_of(ref, struct nvme_fc_ctrl, ref);
         unsigned long flags;
  
-       if (ctrl->state != FCCTRL_INIT) {
-               /* remove from rport list */
-               spin_lock_irqsave(&ctrl->rport->lock, flags);
-               list_del(&ctrl->ctrl_list);
-               spin_unlock_irqrestore(&ctrl->rport->lock, flags);
+       if (ctrl->ctrl.tagset) {
+               blk_cleanup_queue(ctrl->ctrl.connect_q);
+               blk_mq_free_tag_set(&ctrl->tag_set);
         }
  
+       /* remove from rport list */
+       spin_lock_irqsave(&ctrl->rport->lock, flags);
+       list_del(&ctrl->ctrl_list);
+       spin_unlock_irqrestore(&ctrl->rport->lock, flags);
+
+       blk_cleanup_queue(ctrl->ctrl.admin_q);
+       blk_mq_free_tag_set(&ctrl->admin_tag_set);
+
+       kfree(ctrl->queues);
+
         put_device(ctrl->dev);
         nvme_fc_rport_put(ctrl->rport);
  
-       kfree(ctrl->queues);
         ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
-       nvmf_free_options(ctrl->ctrl.opts);
+       if (ctrl->ctrl.opts)
+               nvmf_free_options(ctrl->ctrl.opts);
         kfree(ctrl);
  }
  
@@ -1574,57 +1738,38 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl)
   * controller. Called after last nvme_put_ctrl() call
   */
  static void
-nvme_fc_free_nvme_ctrl(struct nvme_ctrl *nctrl)
+nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
  {
         struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
  
         WARN_ON(nctrl != &ctrl->ctrl);
  
-       /*
-        * Tear down the association, which will generate link
-        * traffic to terminate connections
-        */
-
-       if (ctrl->state != FCCTRL_INIT) {
-               /* send a Disconnect(association) LS to fc-nvme target */
-               nvme_fc_xmt_disconnect_assoc(ctrl);
-
-               if (ctrl->ctrl.tagset) {
-                       blk_cleanup_queue(ctrl->ctrl.connect_q);
-                       blk_mq_free_tag_set(&ctrl->tag_set);
-                       nvme_fc_delete_hw_io_queues(ctrl);
-                       nvme_fc_free_io_queues(ctrl);
-               }
-
-               nvme_fc_exit_aen_ops(ctrl);
-
-               nvme_fc_destroy_admin_queue(ctrl);
-       }
-
         nvme_fc_ctrl_put(ctrl);
  }
  
-
-static int
-__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
+static void
+nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
  {
-       int state;
+       dev_warn(ctrl->ctrl.device,
+               "NVME-FC{%d}: transport association error detected: %s\n",
+               ctrl->cnum, errmsg);
+       dev_info(ctrl->ctrl.device,
+               "NVME-FC{%d}: resetting controller\n", ctrl->cnum);
  
-       state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
-       if (state != FCPOP_STATE_ACTIVE) {
-               atomic_set(&op->state, state);
-               return -ECANCELED; /* fail */
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+               dev_err(ctrl->ctrl.device,
+                       "NVME-FC{%d}: error_recovery: Couldn't change state "
+                       "to RECONNECTING\n", ctrl->cnum);
+               return;
         }
  
-       ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
-                                       &ctrl->rport->remoteport,
-                                       op->queue->lldd_handle,
-                                       &op->fcp_req);
-
-       return 0;
+       if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
+               dev_err(ctrl->ctrl.device,
+                       "NVME-FC{%d}: error_recovery: Failed to schedule "
+                       "reset work\n", ctrl->cnum);
  }
  
-enum blk_eh_timer_return
+static enum blk_eh_timer_return
  nvme_fc_timeout(struct request *rq, bool reserved)
  {
         struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
@@ -1640,11 +1785,13 @@ nvme_fc_timeout(struct request *rq, bool reserved)
                 return BLK_EH_HANDLED;
  
         /*
-        * TODO: force a controller reset
-        *   when that happens, queues will be torn down and outstanding
-        *   ios will be terminated, and the above abort, on a single io
-        *   will no longer be needed.
+        * we can't individually ABTS an io without affecting the queue,
+        * thus killing the queue, adn thus the association.
+        * So resolve by performing a controller reset, which will stop
+        * the host/io stack, terminate the association on the link,
+        * and recreate an association on the link.
          */
+       nvme_fc_error_recovery(ctrl, "io timeout error");
  
         return BLK_EH_HANDLED;
  }
@@ -1738,6 +1885,13 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
         u32 csn;
         int ret;
  
+       /*
+        * before attempting to send the io, check to see if we believe
+        * the target device is present
+        */
+       if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
+               return BLK_MQ_RQ_QUEUE_ERROR;
+
         if (!nvme_fc_ctrl_get(ctrl))
                 return BLK_MQ_RQ_QUEUE_ERROR;
  
@@ -1761,7 +1915,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
         op->fcp_req.io_dir = io_dir;
         op->fcp_req.transferred_length = 0;
         op->fcp_req.rcv_rsplen = 0;
-       op->fcp_req.status = 0;
+       op->fcp_req.status = NVME_SC_SUCCESS;
         op->fcp_req.sqid = cpu_to_le16(queue->qnum);
  
         /*
@@ -1782,14 +1936,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
         sqe->rw.dptr.sgl.length = cpu_to_le32(data_len);
         sqe->rw.dptr.sgl.addr = 0;
  
-       /* odd that we set the command_id - should come from nvme-fabrics */
-       WARN_ON_ONCE(sqe->common.command_id != cpu_to_le16(op->rqno));
-
-       if (op->rq) {                           /* skipped on aens */
+       if (!(op->flags & FCOP_FLAGS_AEN)) {
                 ret = nvme_fc_map_data(ctrl, op->rq, op);
                 if (ret < 0) {
-                       dev_err(queue->ctrl->ctrl.device,
-                            "Failed to map data (%d)\n", ret);
                         nvme_cleanup_cmd(op->rq);
                         nvme_fc_ctrl_put(ctrl);
                         return (ret == -ENOMEM || ret == -EAGAIN) ?
@@ -1802,7 +1951,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
  
         atomic_set(&op->state, FCPOP_STATE_ACTIVE);
  
-       if (op->rq)
+       if (!(op->flags & FCOP_FLAGS_AEN))
                 blk_mq_start_request(op->rq);
  
         ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport,
@@ -1810,9 +1959,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
                                         queue->lldd_handle, &op->fcp_req);
  
         if (ret) {
-               dev_err(ctrl->dev,
-                       "Send nvme command failed - lldd returned %d.\n", ret);
-
                 if (op->rq) {                   /* normal request */
                         nvme_fc_unmap_data(ctrl, op->rq, op);
                         nvme_cleanup_cmd(op->rq);
@@ -1882,12 +2028,8 @@ nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
         struct nvme_fc_fcp_op *op;
  
         req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag);
-       if (!req) {
-               dev_err(queue->ctrl->ctrl.device,
-                        "tag 0x%x on QNum %#x not found\n",
-                       tag, queue->qnum);
+       if (!req)
                 return 0;
-       }
  
         op = blk_mq_rq_to_pdu(req);
  
@@ -1904,11 +2046,21 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
  {
         struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
         struct nvme_fc_fcp_op *aen_op;
+       unsigned long flags;
+       bool terminating = false;
         int ret;
  
         if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
                 return;
  
+       spin_lock_irqsave(&ctrl->lock, flags);
+       if (ctrl->flags & FCCTRL_TERMIO)
+               terminating = true;
+       spin_unlock_irqrestore(&ctrl->lock, flags);
+
+       if (terminating)
+               return;
+
         aen_op = &ctrl->aen_ops[aer_idx];
  
         ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0,
@@ -1919,36 +2071,101 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
  }
  
  static void
-nvme_fc_complete_rq(struct request *rq)
+__nvme_fc_final_op_cleanup(struct request *rq)
  {
         struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
         struct nvme_fc_ctrl *ctrl = op->ctrl;
-       int error = 0, state;
  
-       state = atomic_xchg(&op->state, FCPOP_STATE_IDLE);
+       atomic_set(&op->state, FCPOP_STATE_IDLE);
+       op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
+                       FCOP_FLAGS_COMPLETE);
  
         nvme_cleanup_cmd(rq);
-
         nvme_fc_unmap_data(ctrl, rq, op);
+       nvme_complete_rq(rq);
+       nvme_fc_ctrl_put(ctrl);
  
-       if (unlikely(rq->errors)) {
-               if (nvme_req_needs_retry(rq, rq->errors)) {
-                       nvme_requeue_req(rq);
-                       return;
-               }
+}
  
-               if (blk_rq_is_passthrough(rq))
-                       error = rq->errors;
-               else
-                       error = nvme_error_status(rq->errors);
+static void
+nvme_fc_complete_rq(struct request *rq)
+{
+       struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
+       struct nvme_fc_ctrl *ctrl = op->ctrl;
+       unsigned long flags;
+       bool completed = false;
+
+       /*
+        * the core layer, on controller resets after calling
+        * nvme_shutdown_ctrl(), calls complete_rq without our
+        * calling blk_mq_complete_request(), thus there may still
+        * be live i/o outstanding with the LLDD. Means transport has
+        * to track complete calls vs fcpio_done calls to know what
+        * path to take on completes and dones.
+        */
+       spin_lock_irqsave(&ctrl->lock, flags);
+       if (op->flags & FCOP_FLAGS_COMPLETE)
+               completed = true;
+       else
+               op->flags |= FCOP_FLAGS_RELEASED;
+       spin_unlock_irqrestore(&ctrl->lock, flags);
+
+       if (completed)
+               __nvme_fc_final_op_cleanup(rq);
+}
+
+/*
+ * This routine is used by the transport when it needs to find active
+ * io on a queue that is to be terminated. The transport uses
+ * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
+ * this routine to kill them on a 1 by 1 basis.
+ *
+ * As FC allocates FC exchange for each io, the transport must contact
+ * the LLDD to terminate the exchange, thus releasing the FC exchange.
+ * After terminating the exchange the LLDD will call the transport's
+ * normal io done path for the request, but it will have an aborted
+ * status. The done path will return the io request back to the block
+ * layer with an error status.
+ */
+static void
+nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
+{
+       struct nvme_ctrl *nctrl = data;
+       struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
+       struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
+       unsigned long flags;
+       int status;
+
+       if (!blk_mq_request_started(req))
+               return;
+
+       spin_lock_irqsave(&ctrl->lock, flags);
+       if (ctrl->flags & FCCTRL_TERMIO) {
+               ctrl->iocnt++;
+               op->flags |= FCOP_FLAGS_TERMIO;
         }
+       spin_unlock_irqrestore(&ctrl->lock, flags);
  
-       nvme_fc_ctrl_put(ctrl);
+       status = __nvme_fc_abort_op(ctrl, op);
+       if (status) {
+               /*
+                * if __nvme_fc_abort_op failed the io wasn't
+                * active. Thus this call path is running in
+                * parallel to the io complete. Treat as non-error.
+                */
  
-       blk_mq_end_request(rq, error);
+               /* back out the flags/counters */
+               spin_lock_irqsave(&ctrl->lock, flags);
+               if (ctrl->flags & FCCTRL_TERMIO)
+                       ctrl->iocnt--;
+               op->flags &= ~FCOP_FLAGS_TERMIO;
+               spin_unlock_irqrestore(&ctrl->lock, flags);
+               return;
+       }
  }
  
-static struct blk_mq_ops nvme_fc_mq_ops = {
+
+static const struct blk_mq_ops nvme_fc_mq_ops = {
         .queue_rq       = nvme_fc_queue_rq,
         .complete       = nvme_fc_complete_rq,
         .init_request   = nvme_fc_init_request,
@@ -1959,145 +2176,275 @@ static struct blk_mq_ops nvme_fc_mq_ops = {
         .timeout        = nvme_fc_timeout,
  };
  
-static struct blk_mq_ops nvme_fc_admin_mq_ops = {
-       .queue_rq       = nvme_fc_queue_rq,
-       .complete       = nvme_fc_complete_rq,
-       .init_request   = nvme_fc_init_admin_request,
-       .exit_request   = nvme_fc_exit_request,
-       .reinit_request = nvme_fc_reinit_request,
-       .init_hctx      = nvme_fc_init_admin_hctx,
-       .timeout        = nvme_fc_timeout,
-};
-
  static int
-nvme_fc_configure_admin_queue(struct nvme_fc_ctrl *ctrl)
+nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
  {
-       u32 segs;
-       int error;
+       struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+       int ret;
  
-       nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH);
+       ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+       if (ret) {
+               dev_info(ctrl->ctrl.device,
+                       "set_queue_count failed: %d\n", ret);
+               return ret;
+       }
  
-       error = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
-                               NVME_FC_AQ_BLKMQ_DEPTH,
-                               (NVME_FC_AQ_BLKMQ_DEPTH / 4));
-       if (error)
-               return error;
+       ctrl->queue_count = opts->nr_io_queues + 1;
+       if (!opts->nr_io_queues)
+               return 0;
  
-       memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
-       ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
-       ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH;
-       ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
-       ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
-       ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
+       dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
+                       opts->nr_io_queues);
+
+       nvme_fc_init_io_queues(ctrl);
+
+       memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
+       ctrl->tag_set.ops = &nvme_fc_mq_ops;
+       ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
+       ctrl->tag_set.reserved_tags = 1; /* fabric connect */
+       ctrl->tag_set.numa_node = NUMA_NO_NODE;
+       ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+       ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
                                         (SG_CHUNK_SIZE *
                                                 sizeof(struct scatterlist)) +
                                         ctrl->lport->ops->fcprqst_priv_sz;
-       ctrl->admin_tag_set.driver_data = ctrl;
-       ctrl->admin_tag_set.nr_hw_queues = 1;
-       ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
+       ctrl->tag_set.driver_data = ctrl;
+       ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+       ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
  
-       error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
-       if (error)
-               goto out_free_queue;
+       ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
+       if (ret)
+               return ret;
  
-       ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
-       if (IS_ERR(ctrl->ctrl.admin_q)) {
-               error = PTR_ERR(ctrl->ctrl.admin_q);
-               goto out_free_tagset;
+       ctrl->ctrl.tagset = &ctrl->tag_set;
+
+       ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
+       if (IS_ERR(ctrl->ctrl.connect_q)) {
+               ret = PTR_ERR(ctrl->ctrl.connect_q);
+               goto out_free_tag_set;
         }
  
-       error = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
+       ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+       if (ret)
+               goto out_cleanup_blk_queue;
+
+       ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+       if (ret)
+               goto out_delete_hw_queues;
+
+       return 0;
+
+out_delete_hw_queues:
+       nvme_fc_delete_hw_io_queues(ctrl);
+out_cleanup_blk_queue:
+       nvme_stop_keep_alive(&ctrl->ctrl);
+       blk_cleanup_queue(ctrl->ctrl.connect_q);
+out_free_tag_set:
+       blk_mq_free_tag_set(&ctrl->tag_set);
+       nvme_fc_free_io_queues(ctrl);
+
+       /* force put free routine to ignore io queues */
+       ctrl->ctrl.tagset = NULL;
+
+       return ret;
+}
+
+static int
+nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
+{
+       struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+       int ret;
+
+       ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+       if (ret) {
+               dev_info(ctrl->ctrl.device,
+                       "set_queue_count failed: %d\n", ret);
+               return ret;
+       }
+
+       /* check for io queues existing */
+       if (ctrl->queue_count == 1)
+               return 0;
+
+       dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n",
+                       opts->nr_io_queues);
+
+       nvme_fc_init_io_queues(ctrl);
+
+       ret = blk_mq_reinit_tagset(&ctrl->tag_set);
+       if (ret)
+               goto out_free_io_queues;
+
+       ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+       if (ret)
+               goto out_free_io_queues;
+
+       ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+       if (ret)
+               goto out_delete_hw_queues;
+
+       return 0;
+
+out_delete_hw_queues:
+       nvme_fc_delete_hw_io_queues(ctrl);
+out_free_io_queues:
+       nvme_fc_free_io_queues(ctrl);
+       return ret;
+}
+
+/*
+ * This routine restarts the controller on the host side, and
+ * on the link side, recreates the controller association.
+ */
+static int
+nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
+{
+       struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+       u32 segs;
+       int ret;
+       bool changed;
+
+       ctrl->connect_attempts++;
+
+       /*
+        * Create the admin queue
+        */
+
+       nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH);
+
+       ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
                                 NVME_FC_AQ_BLKMQ_DEPTH);
-       if (error)
-               goto out_cleanup_queue;
+       if (ret)
+               goto out_free_queue;
  
-       error = nvmf_connect_admin_queue(&ctrl->ctrl);
-       if (error)
+       ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
+                               NVME_FC_AQ_BLKMQ_DEPTH,
+                               (NVME_FC_AQ_BLKMQ_DEPTH / 4));
+       if (ret)
                 goto out_delete_hw_queue;
  
-       error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
-       if (error) {
+       if (ctrl->ctrl.state != NVME_CTRL_NEW)
+               blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+
+       ret = nvmf_connect_admin_queue(&ctrl->ctrl);
+       if (ret)
+               goto out_disconnect_admin_queue;
+
+       /*
+        * Check controller capabilities
+        *
+        * todo:- add code to check if ctrl attributes changed from
+        * prior connection values
+        */
+
+       ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+       if (ret) {
                 dev_err(ctrl->ctrl.device,
                         "prop_get NVME_REG_CAP failed\n");
-               goto out_delete_hw_queue;
+               goto out_disconnect_admin_queue;
         }
  
         ctrl->ctrl.sqsize =
                 min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
  
-       error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
-       if (error)
-               goto out_delete_hw_queue;
+       ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+       if (ret)
+               goto out_disconnect_admin_queue;
  
         segs = min_t(u32, NVME_FC_MAX_SEGMENTS,
                         ctrl->lport->ops->max_sgl_segments);
         ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9);
  
-       error = nvme_init_identify(&ctrl->ctrl);
-       if (error)
-               goto out_delete_hw_queue;
+       ret = nvme_init_identify(&ctrl->ctrl);
+       if (ret)
+               goto out_disconnect_admin_queue;
+
+       /* sanity checks */
+
+       /* FC-NVME does not have other data in the capsule */
+       if (ctrl->ctrl.icdoff) {
+               dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
+                               ctrl->ctrl.icdoff);
+               goto out_disconnect_admin_queue;
+       }
  
         nvme_start_keep_alive(&ctrl->ctrl);
  
-       return 0;
+       /* FC-NVME supports normal SGL Data Block Descriptors */
+
+       if (opts->queue_size > ctrl->ctrl.maxcmd) {
+               /* warn if maxcmd is lower than queue_size */
+               dev_warn(ctrl->ctrl.device,
+                       "queue_size %zu > ctrl maxcmd %u, reducing "
+                       "to queue_size\n",
+                       opts->queue_size, ctrl->ctrl.maxcmd);
+               opts->queue_size = ctrl->ctrl.maxcmd;
+       }
+
+       ret = nvme_fc_init_aen_ops(ctrl);
+       if (ret)
+               goto out_term_aen_ops;
+
+       /*
+        * Create the io queues
+        */
+
+       if (ctrl->queue_count > 1) {
+               if (ctrl->ctrl.state == NVME_CTRL_NEW)
+                       ret = nvme_fc_create_io_queues(ctrl);
+               else
+                       ret = nvme_fc_reinit_io_queues(ctrl);
+               if (ret)
+                       goto out_term_aen_ops;
+       }
+
+       changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+       WARN_ON_ONCE(!changed);
  
+       ctrl->connect_attempts = 0;
+
+       kref_get(&ctrl->ctrl.kref);
+
+       if (ctrl->queue_count > 1) {
+               nvme_start_queues(&ctrl->ctrl);
+               nvme_queue_scan(&ctrl->ctrl);
+               nvme_queue_async_events(&ctrl->ctrl);
+       }
+
+       return 0;       /* Success */
+
+out_term_aen_ops:
+       nvme_fc_term_aen_ops(ctrl);
+       nvme_stop_keep_alive(&ctrl->ctrl);
+out_disconnect_admin_queue:
+       /* send a Disconnect(association) LS to fc-nvme target */
+       nvme_fc_xmt_disconnect_assoc(ctrl);
  out_delete_hw_queue:
         __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
-out_cleanup_queue:
-       blk_cleanup_queue(ctrl->ctrl.admin_q);
-out_free_tagset:
-       blk_mq_free_tag_set(&ctrl->admin_tag_set);
  out_free_queue:
         nvme_fc_free_queue(&ctrl->queues[0]);
-       return error;
+
+       return ret;
  }
  
  /*
- * This routine is used by the transport when it needs to find active
- * io on a queue that is to be terminated. The transport uses
- * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
- * this routine to kill them on a 1 by 1 basis.
- *
- * As FC allocates FC exchange for each io, the transport must contact
- * the LLDD to terminate the exchange, thus releasing the FC exchange.
- * After terminating the exchange the LLDD will call the transport's
- * normal io done path for the request, but it will have an aborted
- * status. The done path will return the io request back to the block
- * layer with an error status.
+ * This routine stops operation of the controller on the host side.
+ * On the host os stack side: Admin and IO queues are stopped,
+ *   outstanding ios on them terminated via FC ABTS.
+ * On the link side: the association is terminated.
   */
  static void
-nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
+nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
  {
-       struct nvme_ctrl *nctrl = data;
-       struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-       struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
-int status;
-
-       if (!blk_mq_request_started(req))
-               return;
+       unsigned long flags;
  
-       /* this performs an ABTS-LS on the FC exchange for the io */
-       status = __nvme_fc_abort_op(ctrl, op);
-       /*
-        * if __nvme_fc_abort_op failed: io wasn't active to abort
-        * consider it done. Assume completion path already completing
-        * in parallel
-        */
-       if (status)
-               /* io wasn't active to abort consider it done */
-               /* assume completion path already completing in parallel */
-               return;
-}
+       nvme_stop_keep_alive(&ctrl->ctrl);
  
+       spin_lock_irqsave(&ctrl->lock, flags);
+       ctrl->flags |= FCCTRL_TERMIO;
+       ctrl->iocnt = 0;
+       spin_unlock_irqrestore(&ctrl->lock, flags);
  
-/*
- * This routine stops operation of the controller. Admin and IO queues
- * are stopped, outstanding ios on them terminated, and the nvme ctrl
- * is shutdown.
- */
-static void
-nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl)
-{
         /*
          * If io queues are present, stop them and terminate all outstanding
          * ios on them. As FC allocates FC exchange for each io, the
@@ -2116,35 +2463,79 @@ nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl)
                                 nvme_fc_terminate_exchange, &ctrl->ctrl);
         }
  
-       if (ctrl->ctrl.state == NVME_CTRL_LIVE)
-               nvme_shutdown_ctrl(&ctrl->ctrl);
+       /*
+        * Other transports, which don't have link-level contexts bound
+        * to sqe's, would try to gracefully shutdown the controller by
+        * writing the registers for shutdown and polling (call
+        * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
+        * just aborted and we will wait on those contexts, and given
+        * there was no indication of how live the controlelr is on the
+        * link, don't send more io to create more contexts for the
+        * shutdown. Let the controller fail via keepalive failure if
+        * its still present.
+        */
  
         /*
-        * now clean up the admin queue. Same thing as above.
+        * clean up the admin queue. Same thing as above.
          * use blk_mq_tagset_busy_itr() and the transport routine to
          * terminate the exchanges.
          */
         blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
         blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                 nvme_fc_terminate_exchange, &ctrl->ctrl);
+
+       /* kill the aens as they are a separate path */
+       nvme_fc_abort_aen_ops(ctrl);
+
+       /* wait for all io that had to be aborted */
+       spin_lock_irqsave(&ctrl->lock, flags);
+       while (ctrl->iocnt) {
+               spin_unlock_irqrestore(&ctrl->lock, flags);
+               msleep(1000);
+               spin_lock_irqsave(&ctrl->lock, flags);
+       }
+       ctrl->flags &= ~FCCTRL_TERMIO;
+       spin_unlock_irqrestore(&ctrl->lock, flags);
+
+       nvme_fc_term_aen_ops(ctrl);
+
+       /*
+        * send a Disconnect(association) LS to fc-nvme target
+        * Note: could have been sent at top of process, but
+        * cleaner on link traffic if after the aborts complete.
+        * Note: if association doesn't exist, association_id will be 0
+        */
+       if (ctrl->association_id)
+               nvme_fc_xmt_disconnect_assoc(ctrl);
+
+       if (ctrl->ctrl.tagset) {
+               nvme_fc_delete_hw_io_queues(ctrl);
+               nvme_fc_free_io_queues(ctrl);
+       }
+
+       __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
+       nvme_fc_free_queue(&ctrl->queues[0]);
  }
  
-/*
- * Called to teardown an association.
- * May be called with association fully in place or partially in place.
- */
  static void
-__nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl)
+nvme_fc_delete_ctrl_work(struct work_struct *work)
  {
-       nvme_stop_keep_alive(&ctrl->ctrl);
+       struct nvme_fc_ctrl *ctrl =
+               container_of(work, struct nvme_fc_ctrl, delete_work);
  
-       /* stop and terminate ios on admin and io queues */
-       nvme_fc_shutdown_ctrl(ctrl);
+       cancel_work_sync(&ctrl->reset_work);
+       cancel_delayed_work_sync(&ctrl->connect_work);
+
+       /*
+        * kill the association on the link side.  this will block
+        * waiting for io to terminate
+        */
+       nvme_fc_delete_association(ctrl);
  
         /*
          * tear down the controller
          * This will result in the last reference on the nvme ctrl to
-        * expire, calling the transport nvme_fc_free_nvme_ctrl() callback.
+        * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback.
          * From there, the transport will tear down it's logical queues and
          * association.
          */
@@ -2153,15 +2544,6 @@ __nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl)
         nvme_put_ctrl(&ctrl->ctrl);
  }
  
-static void
-nvme_fc_del_ctrl_work(struct work_struct *work)
-{
-       struct nvme_fc_ctrl *ctrl =
-                       container_of(work, struct nvme_fc_ctrl, delete_work);
-
-       __nvme_fc_remove_ctrl(ctrl);
-}
-
  static int
  __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
  {
@@ -2181,25 +2563,85 @@ static int
  nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
  {
         struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-       struct nvme_fc_rport *rport = ctrl->rport;
-       unsigned long flags;
         int ret;
  
-       spin_lock_irqsave(&rport->lock, flags);
+       if (!kref_get_unless_zero(&ctrl->ctrl.kref))
+               return -EBUSY;
+
         ret = __nvme_fc_del_ctrl(ctrl);
-       spin_unlock_irqrestore(&rport->lock, flags);
-       if (ret)
-               return ret;
  
-       flush_work(&ctrl->delete_work);
+       if (!ret)
+               flush_workqueue(nvme_fc_wq);
  
-       return 0;
+       nvme_put_ctrl(&ctrl->ctrl);
+
+       return ret;
  }
  
+static void
+nvme_fc_reset_ctrl_work(struct work_struct *work)
+{
+       struct nvme_fc_ctrl *ctrl =
+                       container_of(work, struct nvme_fc_ctrl, reset_work);
+       int ret;
+
+       /* will block will waiting for io to terminate */
+       nvme_fc_delete_association(ctrl);
+
+       ret = nvme_fc_create_association(ctrl);
+       if (ret) {
+               dev_warn(ctrl->ctrl.device,
+                       "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
+                       ctrl->cnum, ret);
+               if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
+                       dev_warn(ctrl->ctrl.device,
+                               "NVME-FC{%d}: Max reconnect attempts (%d) "
+                               "reached. Removing controller\n",
+                               ctrl->cnum, ctrl->connect_attempts);
+
+                       if (!nvme_change_ctrl_state(&ctrl->ctrl,
+                               NVME_CTRL_DELETING)) {
+                               dev_err(ctrl->ctrl.device,
+                                       "NVME-FC{%d}: failed to change state "
+                                       "to DELETING\n", ctrl->cnum);
+                               return;
+                       }
+
+                       WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
+                       return;
+               }
+
+               dev_warn(ctrl->ctrl.device,
+                       "NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
+                       ctrl->cnum, ctrl->reconnect_delay);
+               queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+                               ctrl->reconnect_delay * HZ);
+       } else
+               dev_info(ctrl->ctrl.device,
+                       "NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
+}
+
+/*
+ * called by the nvme core layer, for sysfs interface that requests
+ * a reset of the nvme controller
+ */
  static int
  nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
  {
-       return -EIO;
+       struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
+
+       dev_warn(ctrl->ctrl.device,
+               "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
+
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
+               return -EBUSY;
+
+       if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
+               return -EBUSY;
+
+       flush_work(&ctrl->reset_work);
+
+       return 0;
  }
  
  static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
@@ -2210,95 +2652,75 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
         .reg_read64             = nvmf_reg_read64,
         .reg_write32            = nvmf_reg_write32,
         .reset_ctrl             = nvme_fc_reset_nvme_ctrl,
-       .free_ctrl              = nvme_fc_free_nvme_ctrl,
+       .free_ctrl              = nvme_fc_nvme_ctrl_freed,
         .submit_async_event     = nvme_fc_submit_async_event,
         .delete_ctrl            = nvme_fc_del_nvme_ctrl,
         .get_subsysnqn          = nvmf_get_subsysnqn,
         .get_address            = nvmf_get_address,
  };
  
-static int
-nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
+static void
+nvme_fc_connect_ctrl_work(struct work_struct *work)
  {
-       struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
         int ret;
  
-       ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
-       if (ret) {
-               dev_info(ctrl->ctrl.device,
-                       "set_queue_count failed: %d\n", ret);
-               return ret;
-       }
-
-       ctrl->queue_count = opts->nr_io_queues + 1;
-       if (!opts->nr_io_queues)
-               return 0;
-
-       dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
-                       opts->nr_io_queues);
-
-       nvme_fc_init_io_queues(ctrl);
-
-       memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
-       ctrl->tag_set.ops = &nvme_fc_mq_ops;
-       ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
-       ctrl->tag_set.reserved_tags = 1; /* fabric connect */
-       ctrl->tag_set.numa_node = NUMA_NO_NODE;
-       ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
-       ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
-                                       (SG_CHUNK_SIZE *
-                                               sizeof(struct scatterlist)) +
-                                       ctrl->lport->ops->fcprqst_priv_sz;
-       ctrl->tag_set.driver_data = ctrl;
-       ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
-       ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
-
-       ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
-       if (ret)
-               return ret;
-
-       ctrl->ctrl.tagset = &ctrl->tag_set;
-
-       ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
-       if (IS_ERR(ctrl->ctrl.connect_q)) {
-               ret = PTR_ERR(ctrl->ctrl.connect_q);
-               goto out_free_tag_set;
-       }
-
-       ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
-       if (ret)
-               goto out_cleanup_blk_queue;
+       struct nvme_fc_ctrl *ctrl =
+                       container_of(to_delayed_work(work),
+                               struct nvme_fc_ctrl, connect_work);
  
-       ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
-       if (ret)
-               goto out_delete_hw_queues;
+       ret = nvme_fc_create_association(ctrl);
+       if (ret) {
+               dev_warn(ctrl->ctrl.device,
+                       "NVME-FC{%d}: Reconnect attempt failed (%d)\n",
+                       ctrl->cnum, ret);
+               if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
+                       dev_warn(ctrl->ctrl.device,
+                               "NVME-FC{%d}: Max reconnect attempts (%d) "
+                               "reached. Removing controller\n",
+                               ctrl->cnum, ctrl->connect_attempts);
+
+                       if (!nvme_change_ctrl_state(&ctrl->ctrl,
+                               NVME_CTRL_DELETING)) {
+                               dev_err(ctrl->ctrl.device,
+                                       "NVME-FC{%d}: failed to change state "
+                                       "to DELETING\n", ctrl->cnum);
+                               return;
+                       }
  
-       return 0;
+                       WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
+                       return;
+               }
  
-out_delete_hw_queues:
-       nvme_fc_delete_hw_io_queues(ctrl);
-out_cleanup_blk_queue:
-       nvme_stop_keep_alive(&ctrl->ctrl);
-       blk_cleanup_queue(ctrl->ctrl.connect_q);
-out_free_tag_set:
-       blk_mq_free_tag_set(&ctrl->tag_set);
-       nvme_fc_free_io_queues(ctrl);
+               dev_warn(ctrl->ctrl.device,
+                       "NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
+                       ctrl->cnum, ctrl->reconnect_delay);
+               queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+                               ctrl->reconnect_delay * HZ);
+       } else
+               dev_info(ctrl->ctrl.device,
+                       "NVME-FC{%d}: controller reconnect complete\n",
+                       ctrl->cnum);
+}
  
-       /* force put free routine to ignore io queues */
-       ctrl->ctrl.tagset = NULL;
  
-       return ret;
-}
+static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
+       .queue_rq       = nvme_fc_queue_rq,
+       .complete       = nvme_fc_complete_rq,
+       .init_request   = nvme_fc_init_admin_request,
+       .exit_request   = nvme_fc_exit_request,
+       .reinit_request = nvme_fc_reinit_request,
+       .init_hctx      = nvme_fc_init_admin_hctx,
+       .timeout        = nvme_fc_timeout,
+};
  
  
  static struct nvme_ctrl *
-__nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
+nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
         struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
  {
         struct nvme_fc_ctrl *ctrl;
         unsigned long flags;
         int ret, idx;
-       bool changed;
  
         ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
         if (!ctrl) {
@@ -2314,21 +2736,18 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
  
         ctrl->ctrl.opts = opts;
         INIT_LIST_HEAD(&ctrl->ctrl_list);
-       INIT_LIST_HEAD(&ctrl->ls_req_list);
         ctrl->lport = lport;
         ctrl->rport = rport;
         ctrl->dev = lport->dev;
-       ctrl->state = FCCTRL_INIT;
         ctrl->cnum = idx;
  
-       ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0);
-       if (ret)
-               goto out_free_ida;
-
         get_device(ctrl->dev);
         kref_init(&ctrl->ref);
  
-       INIT_WORK(&ctrl->delete_work, nvme_fc_del_ctrl_work);
+       INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
+       INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
+       INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
+       ctrl->reconnect_delay = opts->reconnect_delay;
         spin_lock_init(&ctrl->lock);
  
         /* io queue count */
@@ -2345,87 +2764,87 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
         ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue),
                                 GFP_KERNEL);
         if (!ctrl->queues)
-               goto out_uninit_ctrl;
-
-       ret = nvme_fc_configure_admin_queue(ctrl);
-       if (ret)
-               goto out_uninit_ctrl;
-
-       /* sanity checks */
-
-       /* FC-NVME does not have other data in the capsule */
-       if (ctrl->ctrl.icdoff) {
-               dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
-                               ctrl->ctrl.icdoff);
-               goto out_remove_admin_queue;
-       }
-
-       /* FC-NVME supports normal SGL Data Block Descriptors */
+               goto out_free_ida;
  
-       if (opts->queue_size > ctrl->ctrl.maxcmd) {
-               /* warn if maxcmd is lower than queue_size */
-               dev_warn(ctrl->ctrl.device,
-                       "queue_size %zu > ctrl maxcmd %u, reducing "
-                       "to queue_size\n",
-                       opts->queue_size, ctrl->ctrl.maxcmd);
-               opts->queue_size = ctrl->ctrl.maxcmd;
-       }
+       memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
+       ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
+       ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH;
+       ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
+       ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+       ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
+                                       (SG_CHUNK_SIZE *
+                                               sizeof(struct scatterlist)) +
+                                       ctrl->lport->ops->fcprqst_priv_sz;
+       ctrl->admin_tag_set.driver_data = ctrl;
+       ctrl->admin_tag_set.nr_hw_queues = 1;
+       ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
  
-       ret = nvme_fc_init_aen_ops(ctrl);
+       ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
         if (ret)
-               goto out_exit_aen_ops;
+               goto out_free_queues;
  
-       if (ctrl->queue_count > 1) {
-               ret = nvme_fc_create_io_queues(ctrl);
-               if (ret)
-                       goto out_exit_aen_ops;
+       ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+       if (IS_ERR(ctrl->ctrl.admin_q)) {
+               ret = PTR_ERR(ctrl->ctrl.admin_q);
+               goto out_free_admin_tag_set;
         }
  
-       spin_lock_irqsave(&ctrl->lock, flags);
-       ctrl->state = FCCTRL_ACTIVE;
-       spin_unlock_irqrestore(&ctrl->lock, flags);
-
-       changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-       WARN_ON_ONCE(!changed);
+       /*
+        * Would have been nice to init io queues tag set as well.
+        * However, we require interaction from the controller
+        * for max io queue count before we can do so.
+        * Defer this to the connect path.
+        */
  
-       dev_info(ctrl->ctrl.device,
-               "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
-               ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
+       ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0);
+       if (ret)
+               goto out_cleanup_admin_q;
  
-       kref_get(&ctrl->ctrl.kref);
+       /* at this point, teardown path changes to ref counting on nvme ctrl */
  
         spin_lock_irqsave(&rport->lock, flags);
         list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
         spin_unlock_irqrestore(&rport->lock, flags);
  
-       if (opts->nr_io_queues) {
-               nvme_queue_scan(&ctrl->ctrl);
-               nvme_queue_async_events(&ctrl->ctrl);
+       ret = nvme_fc_create_association(ctrl);
+       if (ret) {
+               ctrl->ctrl.opts = NULL;
+               /* initiate nvme ctrl ref counting teardown */
+               nvme_uninit_ctrl(&ctrl->ctrl);
+               nvme_put_ctrl(&ctrl->ctrl);
+
+               /* as we're past the point where we transition to the ref
+                * counting teardown path, if we return a bad pointer here,
+                * the calling routine, thinking it's prior to the
+                * transition, will do an rport put. Since the teardown
+                * path also does a rport put, we do an extra get here to
+                * so proper order/teardown happens.
+                */
+               nvme_fc_rport_get(rport);
+
+               if (ret > 0)
+                       ret = -EIO;
+               return ERR_PTR(ret);
         }
  
-       return &ctrl->ctrl;
+       dev_info(ctrl->ctrl.device,
+               "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
+               ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
  
-out_exit_aen_ops:
-       nvme_fc_exit_aen_ops(ctrl);
-out_remove_admin_queue:
-       /* send a Disconnect(association) LS to fc-nvme target */
-       nvme_fc_xmt_disconnect_assoc(ctrl);
-       nvme_stop_keep_alive(&ctrl->ctrl);
-       nvme_fc_destroy_admin_queue(ctrl);
-out_uninit_ctrl:
-       nvme_uninit_ctrl(&ctrl->ctrl);
-       nvme_put_ctrl(&ctrl->ctrl);
-       if (ret > 0)
-               ret = -EIO;
-       /* exit via here will follow ctlr ref point callbacks to free */
-       return ERR_PTR(ret);
+       return &ctrl->ctrl;
  
+out_cleanup_admin_q:
+       blk_cleanup_queue(ctrl->ctrl.admin_q);
+out_free_admin_tag_set:
+       blk_mq_free_tag_set(&ctrl->admin_tag_set);
+out_free_queues:
+       kfree(ctrl->queues);
  out_free_ida:
+       put_device(ctrl->dev);
         ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
  out_free_ctrl:
         kfree(ctrl);
  out_fail:
-       nvme_fc_rport_put(rport);
         /* exit via here doesn't follow ctlr ref points */
         return ERR_PTR(ret);
  }
@@ -2497,6 +2916,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
  {
         struct nvme_fc_lport *lport;
         struct nvme_fc_rport *rport;
+       struct nvme_ctrl *ctrl;
         struct nvmet_fc_traddr laddr = { 0L, 0L };
         struct nvmet_fc_traddr raddr = { 0L, 0L };
         unsigned long flags;
@@ -2528,7 +2948,10 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
  
                         spin_unlock_irqrestore(&nvme_fc_lock, flags);
  
-                       return __nvme_fc_create_ctrl(dev, opts, lport, rport);
+                       ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport);
+                       if (IS_ERR(ctrl))
+                               nvme_fc_rport_put(rport);
+                       return ctrl;
                 }
         }
         spin_unlock_irqrestore(&nvme_fc_lock, flags);
@@ -2546,11 +2969,20 @@ static struct nvmf_transport_ops nvme_fc_transport = {
  
  static int __init nvme_fc_init_module(void)
  {
+       int ret;
+
         nvme_fc_wq = create_workqueue("nvme_fc_wq");
         if (!nvme_fc_wq)
                 return -ENOMEM;
  
-       return nvmf_register_transport(&nvme_fc_transport);
+       ret = nvmf_register_transport(&nvme_fc_transport);
+       if (ret)
+               goto err;
+
+       return 0;
+err:
+       destroy_workqueue(nvme_fc_wq);
+       return ret;
  }
  
  static void __exit nvme_fc_exit_module(void)
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c

index 21cac8523bd8e335434ba4bdf73c61e8cdb34f9d..e4e4e60b1224f770ab89d0a4f0cde4e86d6eea42 100644 (file)
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -241,9 +241,9 @@ static inline void _nvme_nvm_check_size(void)
         BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
         BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
         BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
-       BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128);
+       BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
         BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
-       BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512);
+       BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
  }
  
  static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
@@ -324,7 +324,7 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
         nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
         nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
         memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
-                                       sizeof(struct nvme_nvm_addr_format));
+                                       sizeof(struct nvm_addr_format));
  
         ret = init_grps(nvm_id, nvme_nvm_id);
  out:
@@ -483,8 +483,8 @@ static void nvme_nvm_end_io(struct request *rq, int error)
  {
         struct nvm_rq *rqd = rq->end_io_data;
  
-       rqd->ppa_status = nvme_req(rq)->result.u64;
-       rqd->error = error;
+       rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64);
+       rqd->error = nvme_req(rq)->status;
         nvm_end_io(rqd);
  
         kfree(nvme_req(rq)->cmd);
@@ -510,12 +510,12 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
         }
         rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
  
-       rq->ioprio = bio_prio(bio);
-       if (bio_has_data(bio))
-               rq->nr_phys_segments = bio_phys_segments(q, bio);
-
-       rq->__data_len = bio->bi_iter.bi_size;
-       rq->bio = rq->biotail = bio;
+       if (bio) {
+               blk_init_request_from_bio(rq, bio);
+       } else {
+               rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+               rq->__data_len = 0;
+       }
  
         nvme_nvm_rqtocmd(rq, rqd, ns, cmd);
  
@@ -526,21 +526,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
         return 0;
  }
  
-static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
-       struct request_queue *q = dev->q;
-       struct nvme_ns *ns = q->queuedata;
-       struct nvme_nvm_command c = {};
-
-       c.erase.opcode = NVM_OP_ERASE;
-       c.erase.nsid = cpu_to_le32(ns->ns_id);
-       c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa);
-       c.erase.length = cpu_to_le16(rqd->nr_ppas - 1);
-       c.erase.control = cpu_to_le16(rqd->flags);
-
-       return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0);
-}
-
  static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
  {
         struct nvme_ns *ns = nvmdev->q->queuedata;
@@ -576,7 +561,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
         .set_bb_tbl             = nvme_nvm_set_bb_tbl,
  
         .submit_io              = nvme_nvm_submit_io,
-       .erase_block            = nvme_nvm_erase_block,
  
         .create_dma_pool        = nvme_nvm_create_dma_pool,
         .destroy_dma_pool       = nvme_nvm_destroy_dma_pool,
@@ -611,7 +595,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
         __le64 *metadata = NULL;
         dma_addr_t metadata_dma;
         DECLARE_COMPLETION_ONSTACK(wait);
-       int ret;
+       int ret = 0;
  
         rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0,
                         NVME_QID_ANY);
@@ -681,9 +665,12 @@ submit:
  
         wait_for_completion_io(&wait);
  
-       ret = nvme_error_status(rq->errors);
+       if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
+               ret = -EINTR;
+       else if (nvme_req(rq)->status & 0x7ff)
+               ret = -EIO;
         if (result)
-               *result = rq->errors & 0x7ff;
+               *result = nvme_req(rq)->status & 0x7ff;
         if (status)
                 *status = le64_to_cpu(nvme_req(rq)->result.u64);
  
@@ -766,7 +753,7 @@ static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin,
         c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3);
         /* cdw11-12 */
         c.ph_rw.length = cpu_to_le16(vcmd.nppas);
-       c.ph_rw.control  = cpu_to_le32(vcmd.control);
+       c.ph_rw.control  = cpu_to_le16(vcmd.control);
         c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13);
         c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14);
         c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15);
@@ -809,6 +796,8 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
         struct request_queue *q = ns->queue;
         struct nvm_dev *dev;
  
+       _nvme_nvm_check_size();
+
         dev = nvm_alloc_dev(node);
         if (!dev)
                 return -ENOMEM;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h

index 2aa20e3e5675bf14a8aaf8784bafc344b970a052..29c708ca9621c4622ab3f32d153c02546e9d0ca6 100644 (file)
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -21,16 +21,6 @@
  #include <linux/lightnvm.h>
  #include <linux/sed-opal.h>
  
-enum {
-       /*
-        * Driver internal status code for commands that were cancelled due
-        * to timeouts or controller shutdown.  The value is negative so
-        * that it a) doesn't overlap with the unsigned hardware error codes,
-        * and b) can easily be tested for.
-        */
-       NVME_SC_CANCELLED               = -EINTR,
-};
-
  extern unsigned char nvme_io_timeout;
  #define NVME_IO_TIMEOUT        (nvme_io_timeout * HZ)
  
@@ -43,8 +33,6 @@ extern unsigned char shutdown_timeout;
  #define NVME_DEFAULT_KATO      5
  #define NVME_KATO_GRACE                10
  
-extern unsigned int nvme_max_retries;
-
  enum {
         NVME_NS_LBA             = 0,
         NVME_NS_LIGHTNVM        = 1,
@@ -68,10 +56,10 @@ enum nvme_quirks {
         NVME_QUIRK_IDENTIFY_CNS                 = (1 << 1),
  
         /*
-        * The controller deterministically returns O's on reads to discarded
-        * logical blocks.
+        * The controller deterministically returns O's on reads to
+        * logical blocks that deallocate was called on.
          */
-       NVME_QUIRK_DISCARD_ZEROES               = (1 << 2),
+       NVME_QUIRK_DEALLOCATE_ZEROES            = (1 << 2),
  
         /*
          * The controller needs a delay before starts checking the device
@@ -83,6 +71,11 @@ enum nvme_quirks {
          * APST should not be used.
          */
         NVME_QUIRK_NO_APST                      = (1 << 4),
+
+       /*
+        * The deepest sleep state should not be used.
+        */
+       NVME_QUIRK_NO_DEEPEST_PS                = (1 << 5),
  };
  
  /*
@@ -92,6 +85,13 @@ enum nvme_quirks {
  struct nvme_request {
         struct nvme_command     *cmd;
         union nvme_result       result;
+       u8                      retries;
+       u8                      flags;
+       u16                     status;
+};
+
+enum {
+       NVME_REQ_CANCELLED              = (1 << 0),
  };
  
  static inline struct nvme_request *nvme_req(struct request *req)
@@ -249,25 +249,17 @@ static inline void nvme_cleanup_cmd(struct request *req)
         }
  }
  
-static inline int nvme_error_status(u16 status)
+static inline void nvme_end_request(struct request *req, __le16 status,
+               union nvme_result result)
  {
-       switch (status & 0x7ff) {
-       case NVME_SC_SUCCESS:
-               return 0;
-       case NVME_SC_CAP_EXCEEDED:
-               return -ENOSPC;
-       default:
-               return -EIO;
-       }
-}
+       struct nvme_request *rq = nvme_req(req);
  
-static inline bool nvme_req_needs_retry(struct request *req, u16 status)
-{
-       return !(status & NVME_SC_DNR || blk_noretry_request(req)) &&
-               (jiffies - req->start_time) < req->timeout &&
-               req->retries < nvme_max_retries;
+       rq->status = le16_to_cpu(status) >> 1;
+       rq->result = result;
+       blk_mq_complete_request(req);
  }
  
+void nvme_complete_rq(struct request *req);
  void nvme_cancel_request(struct request *req, void *data, bool reserved);
  bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                 enum nvme_ctrl_state new_state);
@@ -302,7 +294,6 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
  #define NVME_QID_ANY -1
  struct request *nvme_alloc_request(struct request_queue *q,
                 struct nvme_command *cmd, unsigned int flags, int qid);
-void nvme_requeue_req(struct request *req);
  int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
                 struct nvme_command *cmd);
  int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index 26a5fd05fe88aa003a00dc4ece6e9900bd95e618..c8541c3dcd19dbbf16e1f9b687a5a30fad0eb6d8 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -19,6 +19,7 @@
  #include <linux/blk-mq-pci.h>
  #include <linux/cpu.h>
  #include <linux/delay.h>
+#include <linux/dmi.h>
  #include <linux/errno.h>
  #include <linux/fs.h>
  #include <linux/genhd.h>
@@ -103,8 +104,22 @@ struct nvme_dev {
         u32 cmbloc;
         struct nvme_ctrl ctrl;
         struct completion ioq_wait;
+       u32 *dbbuf_dbs;
+       dma_addr_t dbbuf_dbs_dma_addr;
+       u32 *dbbuf_eis;
+       dma_addr_t dbbuf_eis_dma_addr;
  };
  
+static inline unsigned int sq_idx(unsigned int qid, u32 stride)
+{
+       return qid * 2 * stride;
+}
+
+static inline unsigned int cq_idx(unsigned int qid, u32 stride)
+{
+       return (qid * 2 + 1) * stride;
+}
+
  static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
  {
         return container_of(ctrl, struct nvme_dev, ctrl);
@@ -133,6 +148,10 @@ struct nvme_queue {
         u16 qid;
         u8 cq_phase;
         u8 cqe_seen;
+       u32 *dbbuf_sq_db;
+       u32 *dbbuf_cq_db;
+       u32 *dbbuf_sq_ei;
+       u32 *dbbuf_cq_ei;
  };
  
  /*
@@ -171,6 +190,112 @@ static inline void _nvme_check_size(void)
         BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
         BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
         BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
+       BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
+}
+
+static inline unsigned int nvme_dbbuf_size(u32 stride)
+{
+       return ((num_possible_cpus() + 1) * 8 * stride);
+}
+
+static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
+{
+       unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
+
+       if (dev->dbbuf_dbs)
+               return 0;
+
+       dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
+                                           &dev->dbbuf_dbs_dma_addr,
+                                           GFP_KERNEL);
+       if (!dev->dbbuf_dbs)
+               return -ENOMEM;
+       dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size,
+                                           &dev->dbbuf_eis_dma_addr,
+                                           GFP_KERNEL);
+       if (!dev->dbbuf_eis) {
+               dma_free_coherent(dev->dev, mem_size,
+                                 dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr);
+               dev->dbbuf_dbs = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void nvme_dbbuf_dma_free(struct nvme_dev *dev)
+{
+       unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
+
+       if (dev->dbbuf_dbs) {
+               dma_free_coherent(dev->dev, mem_size,
+                                 dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr);
+               dev->dbbuf_dbs = NULL;
+       }
+       if (dev->dbbuf_eis) {
+               dma_free_coherent(dev->dev, mem_size,
+                                 dev->dbbuf_eis, dev->dbbuf_eis_dma_addr);
+               dev->dbbuf_eis = NULL;
+       }
+}
+
+static void nvme_dbbuf_init(struct nvme_dev *dev,
+                           struct nvme_queue *nvmeq, int qid)
+{
+       if (!dev->dbbuf_dbs || !qid)
+               return;
+
+       nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)];
+       nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)];
+       nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)];
+       nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)];
+}
+
+static void nvme_dbbuf_set(struct nvme_dev *dev)
+{
+       struct nvme_command c;
+
+       if (!dev->dbbuf_dbs)
+               return;
+
+       memset(&c, 0, sizeof(c));
+       c.dbbuf.opcode = nvme_admin_dbbuf;
+       c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr);
+       c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
+
+       if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
+               dev_warn(dev->dev, "unable to set dbbuf\n");
+               /* Free memory and continue on */
+               nvme_dbbuf_dma_free(dev);
+       }
+}
+
+static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old)
+{
+       return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old);
+}
+
+/* Update dbbuf and return true if an MMIO is required */
+static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
+                                             volatile u32 *dbbuf_ei)
+{
+       if (dbbuf_db) {
+               u16 old_value;
+
+               /*
+                * Ensure that the queue is written before updating
+                * the doorbell in memory
+                */
+               wmb();
+
+               old_value = *dbbuf_db;
+               *dbbuf_db = value;
+
+               if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value))
+                       return false;
+       }
+
+       return true;
  }
  
  /*
@@ -297,7 +422,9 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
  
         if (++tail == nvmeq->q_depth)
                 tail = 0;
-       writel(tail, nvmeq->q_db);
+       if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db,
+                                             nvmeq->dbbuf_sq_ei))
+               writel(tail, nvmeq->q_db);
         nvmeq->sq_tail = tail;
  }
  
@@ -326,10 +453,6 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
         iod->nents = 0;
         iod->length = size;
  
-       if (!(rq->rq_flags & RQF_DONTPREP)) {
-               rq->retries = 0;
-               rq->rq_flags |= RQF_DONTPREP;
-       }
         return BLK_MQ_RQ_QUEUE_OK;
  }
  
@@ -628,34 +751,12 @@ out_free_cmd:
         return ret;
  }
  
-static void nvme_complete_rq(struct request *req)
+static void nvme_pci_complete_rq(struct request *req)
  {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-       struct nvme_dev *dev = iod->nvmeq->dev;
-       int error = 0;
-
-       nvme_unmap_data(dev, req);
  
-       if (unlikely(req->errors)) {
-               if (nvme_req_needs_retry(req, req->errors)) {
-                       req->retries++;
-                       nvme_requeue_req(req);
-                       return;
-               }
-
-               if (blk_rq_is_passthrough(req))
-                       error = req->errors;
-               else
-                       error = nvme_error_status(req->errors);
-       }
-
-       if (unlikely(iod->aborted)) {
-               dev_warn(dev->ctrl.device,
-                       "completing aborted command with status: %04x\n",
-                       req->errors);
-       }
-
-       blk_mq_end_request(req, error);
+       nvme_unmap_data(iod->nvmeq->dev, req);
+       nvme_complete_rq(req);
  }
  
  /* We read the CQE phase first to check if the rest of the entry is valid */
@@ -705,15 +806,16 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
                 }
  
                 req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
-               nvme_req(req)->result = cqe.result;
-               blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
+               nvme_end_request(req, cqe.status, cqe.result);
         }
  
         if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
                 return;
  
         if (likely(nvmeq->cq_vector >= 0))
-               writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+               if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
+                                                     nvmeq->dbbuf_cq_ei))
+                       writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
         nvmeq->cq_head = head;
         nvmeq->cq_phase = phase;
  
@@ -745,10 +847,8 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
         return IRQ_NONE;
  }
  
-static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
  {
-       struct nvme_queue *nvmeq = hctx->driver_data;
-
         if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
                 spin_lock_irq(&nvmeq->q_lock);
                 __nvme_process_cq(nvmeq, &tag);
@@ -761,6 +861,13 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
         return 0;
  }
  
+static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+{
+       struct nvme_queue *nvmeq = hctx->driver_data;
+
+       return __nvme_poll(nvmeq, tag);
+}
+
  static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx)
  {
         struct nvme_dev *dev = to_nvme_dev(ctrl);
@@ -812,7 +919,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
                                                 struct nvme_queue *nvmeq)
  {
         struct nvme_command c;
-       int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
+       int flags = NVME_QUEUE_PHYS_CONTIG;
  
         /*
          * Note: we (ab)use the fact the the prp fields survive if no data
@@ -843,9 +950,9 @@ static void abort_endio(struct request *req, int error)
  {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
         struct nvme_queue *nvmeq = iod->nvmeq;
-       u16 status = req->errors;
  
-       dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status);
+       dev_warn(nvmeq->dev->ctrl.device,
+                "Abort status: 0x%x", nvme_req(req)->status);
         atomic_inc(&nvmeq->dev->ctrl.abort_limit);
         blk_mq_free_request(req);
  }
@@ -858,6 +965,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
         struct request *abort_req;
         struct nvme_command cmd;
  
+       /*
+        * Did we miss an interrupt?
+        */
+       if (__nvme_poll(nvmeq, req->tag)) {
+               dev_warn(dev->ctrl.device,
+                        "I/O %d QID %d timeout, completion polled\n",
+                        req->tag, nvmeq->qid);
+               return BLK_EH_HANDLED;
+       }
+
         /*
          * Shutdown immediately if controller times out while starting. The
          * reset work will see the pci device disabled when it gets the forced
@@ -869,7 +986,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
                          "I/O %d QID %d timeout, disable controller\n",
                          req->tag, nvmeq->qid);
                 nvme_dev_disable(dev, false);
-               req->errors = NVME_SC_CANCELLED;
+               nvme_req(req)->flags |= NVME_REQ_CANCELLED;
                 return BLK_EH_HANDLED;
         }
  
@@ -889,7 +1006,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
                  * Mark the request as handled, since the inline shutdown
                  * forces all outstanding requests to complete.
                  */
-               req->errors = NVME_SC_CANCELLED;
+               nvme_req(req)->flags |= NVME_REQ_CANCELLED;
                 return BLK_EH_HANDLED;
         }
  
@@ -1097,6 +1214,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
         nvmeq->cq_phase = 1;
         nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
         memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
+       nvme_dbbuf_init(dev, nvmeq, qid);
         dev->online_queues++;
         spin_unlock_irq(&nvmeq->q_lock);
  }
@@ -1129,18 +1247,18 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
         return result;
  }
  
-static struct blk_mq_ops nvme_mq_admin_ops = {
+static const struct blk_mq_ops nvme_mq_admin_ops = {
         .queue_rq       = nvme_queue_rq,
-       .complete       = nvme_complete_rq,
+       .complete       = nvme_pci_complete_rq,
         .init_hctx      = nvme_admin_init_hctx,
         .exit_hctx      = nvme_admin_exit_hctx,
         .init_request   = nvme_admin_init_request,
         .timeout        = nvme_timeout,
  };
  
-static struct blk_mq_ops nvme_mq_ops = {
+static const struct blk_mq_ops nvme_mq_ops = {
         .queue_rq       = nvme_queue_rq,
-       .complete       = nvme_complete_rq,
+       .complete       = nvme_pci_complete_rq,
         .init_hctx      = nvme_init_hctx,
         .init_request   = nvme_init_request,
         .map_queues     = nvme_pci_map_queues,
@@ -1569,6 +1687,8 @@ static int nvme_dev_add(struct nvme_dev *dev)
                 if (blk_mq_alloc_tag_set(&dev->tagset))
                         return 0;
                 dev->ctrl.tagset = &dev->tagset;
+
+               nvme_dbbuf_set(dev);
         } else {
                 blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
  
@@ -1755,6 +1875,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
  {
         struct nvme_dev *dev = to_nvme_dev(ctrl);
  
+       nvme_dbbuf_dma_free(dev);
         put_device(dev->dev);
         if (dev->tagset.tags)
                 blk_mq_free_tag_set(&dev->tagset);
@@ -1822,6 +1943,13 @@ static void nvme_reset_work(struct work_struct *work)
                 dev->ctrl.opal_dev = NULL;
         }
  
+       if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) {
+               result = nvme_dbbuf_dma_alloc(dev);
+               if (result)
+                       dev_warn(dev->dev,
+                                "unable to allocate dma for dbbuf\n");
+       }
+
         result = nvme_setup_io_queues(dev);
         if (result)
                 goto out;
@@ -1943,10 +2071,31 @@ static int nvme_dev_map(struct nvme_dev *dev)
         return -ENODEV;
  }
  
+static unsigned long check_dell_samsung_bug(struct pci_dev *pdev)
+{
+       if (pdev->vendor == 0x144d && pdev->device == 0xa802) {
+               /*
+                * Several Samsung devices seem to drop off the PCIe bus
+                * randomly when APST is on and uses the deepest sleep state.
+                * This has been observed on a Samsung "SM951 NVMe SAMSUNG
+                * 256GB", a "PM951 NVMe SAMSUNG 512GB", and a "Samsung SSD
+                * 950 PRO 256GB", but it seems to be restricted to two Dell
+                * laptops.
+                */
+               if (dmi_match(DMI_SYS_VENDOR, "Dell Inc.") &&
+                   (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") ||
+                    dmi_match(DMI_PRODUCT_NAME, "Precision 5510")))
+                       return NVME_QUIRK_NO_DEEPEST_PS;
+       }
+
+       return 0;
+}
+
  static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  {
         int node, result = -ENOMEM;
         struct nvme_dev *dev;
+       unsigned long quirks = id->driver_data;
  
         node = dev_to_node(&pdev->dev);
         if (node == NUMA_NO_NODE)
@@ -1978,8 +2127,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
         if (result)
                 goto put_pci;
  
+       quirks |= check_dell_samsung_bug(pdev);
+
         result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
-                       id->driver_data);
+                       quirks);
         if (result)
                 goto release_pools;
  
@@ -2135,13 +2286,13 @@ static const struct pci_error_handlers nvme_err_handler = {
  static const struct pci_device_id nvme_id_table[] = {
         { PCI_VDEVICE(INTEL, 0x0953),
                 .driver_data = NVME_QUIRK_STRIPE_SIZE |
-                               NVME_QUIRK_DISCARD_ZEROES, },
+                               NVME_QUIRK_DEALLOCATE_ZEROES, },
         { PCI_VDEVICE(INTEL, 0x0a53),
                 .driver_data = NVME_QUIRK_STRIPE_SIZE |
-                               NVME_QUIRK_DISCARD_ZEROES, },
+                               NVME_QUIRK_DEALLOCATE_ZEROES, },
         { PCI_VDEVICE(INTEL, 0x0a54),
                 .driver_data = NVME_QUIRK_STRIPE_SIZE |
-                               NVME_QUIRK_DISCARD_ZEROES, },
+                               NVME_QUIRK_DEALLOCATE_ZEROES, },
         { PCI_VDEVICE(INTEL, 0x5845),   /* Qemu emulated controller */
                 .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
         { PCI_DEVICE(0x1c58, 0x0003),   /* HGST adapter */
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c

index 779f516e7a4ec405ff919f9b248d21ff0f748b8b..29cf88ac3f61d65bdb972960ff4bddb2abe69ae5 100644 (file)
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -34,7 +34,7 @@
  #include "fabrics.h"
  
  
-#define NVME_RDMA_CONNECT_TIMEOUT_MS   1000            /* 1 second */
+#define NVME_RDMA_CONNECT_TIMEOUT_MS   3000            /* 3 second */
  
  #define NVME_RDMA_MAX_SEGMENT_SIZE     0xffffff        /* 24-bit SGL field */
  
@@ -118,7 +118,6 @@ struct nvme_rdma_ctrl {
  
         struct nvme_rdma_qe     async_event_sqe;
  
-       int                     reconnect_delay;
         struct delayed_work     reconnect_work;
  
         struct list_head        list;
@@ -129,14 +128,8 @@ struct nvme_rdma_ctrl {
         u64                     cap;
         u32                     max_fr_pages;
  
-       union {
-               struct sockaddr addr;
-               struct sockaddr_in addr_in;
-       };
-       union {
-               struct sockaddr src_addr;
-               struct sockaddr_in src_addr_in;
-       };
+       struct sockaddr_storage addr;
+       struct sockaddr_storage src_addr;
  
         struct nvme_ctrl        ctrl;
  };
@@ -343,8 +336,6 @@ static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
         struct ib_device *ibdev = dev->dev;
         int ret;
  
-       BUG_ON(queue_idx >= ctrl->queue_count);
-
         ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command),
                         DMA_TO_DEVICE);
         if (ret)
@@ -571,11 +562,12 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
                 return PTR_ERR(queue->cm_id);
         }
  
-       queue->cm_error = -ETIMEDOUT;
         if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
-               src_addr = &ctrl->src_addr;
+               src_addr = (struct sockaddr *)&ctrl->src_addr;
  
-       ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr,
+       queue->cm_error = -ETIMEDOUT;
+       ret = rdma_resolve_addr(queue->cm_id, src_addr,
+                       (struct sockaddr *)&ctrl->addr,
                         NVME_RDMA_CONNECT_TIMEOUT_MS);
         if (ret) {
                 dev_info(ctrl->ctrl.device,
@@ -652,8 +644,22 @@ out_free_queues:
  
  static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
  {
+       struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+       unsigned int nr_io_queues;
         int i, ret;
  
+       nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
+       ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
+       if (ret)
+               return ret;
+
+       ctrl->queue_count = nr_io_queues + 1;
+       if (ctrl->queue_count < 2)
+               return 0;
+
+       dev_info(ctrl->ctrl.device,
+               "creating %d I/O queues.\n", nr_io_queues);
+
         for (i = 1; i < ctrl->queue_count; i++) {
                 ret = nvme_rdma_init_queue(ctrl, i,
                                            ctrl->ctrl.opts->queue_size);
@@ -700,6 +706,26 @@ free_ctrl:
         kfree(ctrl);
  }
  
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+{
+       /* If we are resetting/deleting then do nothing */
+       if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
+               WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
+                       ctrl->ctrl.state == NVME_CTRL_LIVE);
+               return;
+       }
+
+       if (nvmf_should_reconnect(&ctrl->ctrl)) {
+               dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
+                       ctrl->ctrl.opts->reconnect_delay);
+               queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
+                               ctrl->ctrl.opts->reconnect_delay * HZ);
+       } else {
+               dev_info(ctrl->ctrl.device, "Removing controller...\n");
+               queue_work(nvme_rdma_wq, &ctrl->delete_work);
+       }
+}
+
  static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
  {
         struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
@@ -707,6 +733,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
         bool changed;
         int ret;
  
+       ++ctrl->ctrl.opts->nr_reconnects;
+
         if (ctrl->queue_count > 1) {
                 nvme_rdma_free_io_queues(ctrl);
  
@@ -751,6 +779,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
  
         changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
         WARN_ON_ONCE(!changed);
+       ctrl->ctrl.opts->nr_reconnects = 0;
  
         if (ctrl->queue_count > 1) {
                 nvme_start_queues(&ctrl->ctrl);
@@ -765,13 +794,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
  stop_admin_q:
         blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
  requeue:
-       /* Make sure we are not resetting/deleting */
-       if (ctrl->ctrl.state == NVME_CTRL_RECONNECTING) {
-               dev_info(ctrl->ctrl.device,
-                       "Failed reconnect attempt, requeueing...\n");
-               queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
-                                       ctrl->reconnect_delay * HZ);
-       }
+       dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
+                       ctrl->ctrl.opts->nr_reconnects);
+       nvme_rdma_reconnect_or_remove(ctrl);
  }
  
  static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -798,11 +823,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
         blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                 nvme_cancel_request, &ctrl->ctrl);
  
-       dev_info(ctrl->ctrl.device, "reconnecting in %d seconds\n",
-               ctrl->reconnect_delay);
-
-       queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
-                               ctrl->reconnect_delay * HZ);
+       nvme_rdma_reconnect_or_remove(ctrl);
  }
  
  static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -1157,8 +1178,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
             wc->ex.invalidate_rkey == req->mr->rkey)
                 req->mr->need_inval = false;
  
-       req->req.result = cqe->result;
-       blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1);
+       nvme_end_request(rq, cqe->status, cqe->result);
         return ret;
  }
  
@@ -1395,7 +1415,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
         nvme_rdma_error_recovery(req->queue->ctrl);
  
         /* fail with DNR on cmd timeout */
-       rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR;
+       nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
  
         return BLK_EH_HANDLED;
  }
@@ -1497,27 +1517,12 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
  static void nvme_rdma_complete_rq(struct request *rq)
  {
         struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
-       struct nvme_rdma_queue *queue = req->queue;
-       int error = 0;
-
-       nvme_rdma_unmap_data(queue, rq);
-
-       if (unlikely(rq->errors)) {
-               if (nvme_req_needs_retry(rq, rq->errors)) {
-                       nvme_requeue_req(rq);
-                       return;
-               }
  
-               if (blk_rq_is_passthrough(rq))
-                       error = rq->errors;
-               else
-                       error = nvme_error_status(rq->errors);
-       }
-
-       blk_mq_end_request(rq, error);
+       nvme_rdma_unmap_data(req->queue, rq);
+       nvme_complete_rq(rq);
  }
  
-static struct blk_mq_ops nvme_rdma_mq_ops = {
+static const struct blk_mq_ops nvme_rdma_mq_ops = {
         .queue_rq       = nvme_rdma_queue_rq,
         .complete       = nvme_rdma_complete_rq,
         .init_request   = nvme_rdma_init_request,
@@ -1528,7 +1533,7 @@ static struct blk_mq_ops nvme_rdma_mq_ops = {
         .timeout        = nvme_rdma_timeout,
  };
  
-static struct blk_mq_ops nvme_rdma_admin_mq_ops = {
+static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
         .queue_rq       = nvme_rdma_queue_rq,
         .complete       = nvme_rdma_complete_rq,
         .init_request   = nvme_rdma_init_admin_request,
@@ -1594,7 +1599,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
         }
  
         ctrl->ctrl.sqsize =
-               min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+               min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
  
         error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
         if (error)
@@ -1791,20 +1796,8 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
  
  static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
  {
-       struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
         int ret;
  
-       ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
-       if (ret)
-               return ret;
-
-       ctrl->queue_count = opts->nr_io_queues + 1;
-       if (ctrl->queue_count < 2)
-               return 0;
-
-       dev_info(ctrl->ctrl.device,
-               "creating %d I/O queues.\n", opts->nr_io_queues);
-
         ret = nvme_rdma_init_io_queues(ctrl);
         if (ret)
                 return ret;
@@ -1857,27 +1850,13 @@ out_free_io_queues:
         return ret;
  }
  
-static int nvme_rdma_parse_ipaddr(struct sockaddr_in *in_addr, char *p)
-{
-       u8 *addr = (u8 *)&in_addr->sin_addr.s_addr;
-       size_t buflen = strlen(p);
-
-       /* XXX: handle IPv6 addresses */
-
-       if (buflen > INET_ADDRSTRLEN)
-               return -EINVAL;
-       if (in4_pton(p, buflen, addr, '\0', NULL) == 0)
-               return -EINVAL;
-       in_addr->sin_family = AF_INET;
-       return 0;
-}
-
  static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
                 struct nvmf_ctrl_options *opts)
  {
         struct nvme_rdma_ctrl *ctrl;
         int ret;
         bool changed;
+       char *port;
  
         ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
         if (!ctrl)
@@ -1885,40 +1864,33 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
         ctrl->ctrl.opts = opts;
         INIT_LIST_HEAD(&ctrl->list);
  
-       ret = nvme_rdma_parse_ipaddr(&ctrl->addr_in, opts->traddr);
+       if (opts->mask & NVMF_OPT_TRSVCID)
+               port = opts->trsvcid;
+       else
+               port = __stringify(NVME_RDMA_IP_PORT);
+
+       ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
+                       opts->traddr, port, &ctrl->addr);
         if (ret) {
-               pr_err("malformed IP address passed: %s\n", opts->traddr);
+               pr_err("malformed address passed: %s:%s\n", opts->traddr, port);
                 goto out_free_ctrl;
         }
  
         if (opts->mask & NVMF_OPT_HOST_TRADDR) {
-               ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in,
-                               opts->host_traddr);
+               ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
+                       opts->host_traddr, NULL, &ctrl->src_addr);
                 if (ret) {
-                       pr_err("malformed src IP address passed: %s\n",
+                       pr_err("malformed src address passed: %s\n",
                                opts->host_traddr);
                         goto out_free_ctrl;
                 }
         }
  
-       if (opts->mask & NVMF_OPT_TRSVCID) {
-               u16 port;
-
-               ret = kstrtou16(opts->trsvcid, 0, &port);
-               if (ret)
-                       goto out_free_ctrl;
-
-               ctrl->addr_in.sin_port = cpu_to_be16(port);
-       } else {
-               ctrl->addr_in.sin_port = cpu_to_be16(NVME_RDMA_IP_PORT);
-       }
-
         ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
                                 0 /* no quirks, we're perfect! */);
         if (ret)
                 goto out_free_ctrl;
  
-       ctrl->reconnect_delay = opts->reconnect_delay;
         INIT_DELAYED_WORK(&ctrl->reconnect_work,
                         nvme_rdma_reconnect_ctrl_work);
         INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
@@ -1977,7 +1949,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
         changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
         WARN_ON_ONCE(!changed);
  
-       dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
+       dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
                 ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
  
         kref_get(&ctrl->ctrl.kref);
@@ -2013,7 +1985,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
         .name           = "rdma",
         .required_opts  = NVMF_OPT_TRADDR,
         .allowed_opts   = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
-                         NVMF_OPT_HOST_TRADDR,
+                         NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO,
         .create_ctrl    = nvme_rdma_create_ctrl,
  };
  
@@ -2055,12 +2027,20 @@ static int __init nvme_rdma_init_module(void)
                 return -ENOMEM;
  
         ret = ib_register_client(&nvme_rdma_ib_client);
-       if (ret) {
-               destroy_workqueue(nvme_rdma_wq);
-               return ret;
-       }
+       if (ret)
+               goto err_destroy_wq;
+
+       ret = nvmf_register_transport(&nvme_rdma_transport);
+       if (ret)
+               goto err_unreg_client;
+
+       return 0;
  
-       return nvmf_register_transport(&nvme_rdma_transport);
+err_unreg_client:
+       ib_unregister_client(&nvme_rdma_ib_client);
+err_destroy_wq:
+       destroy_workqueue(nvme_rdma_wq);
+       return ret;
  }
  
  static void __exit nvme_rdma_cleanup_module(void)
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c

index f49ae2758bb70d367edf057309b2a5f784fb1c20..1f7671e631dd0304eb6727710740b45e0dac7971 100644 (file)
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -1609,7 +1609,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
         struct nvme_command c;
         u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read);
         u16 control;
-       u32 max_blocks = queue_max_hw_sectors(ns->queue);
+       u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9);
  
         num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks);
  
@@ -2138,15 +2138,6 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
         return res;
  }
  
-static int nvme_trans_security_protocol(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr,
-                                       u8 *cmd)
-{
-       return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                               ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND,
-                               SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-}
-
  static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
                                         struct sg_io_hdr *hdr)
  {
@@ -2414,10 +2405,6 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
         case REQUEST_SENSE:
                 retcode = nvme_trans_request_sense(ns, hdr, cmd);
                 break;
-       case SECURITY_PROTOCOL_IN:
-       case SECURITY_PROTOCOL_OUT:
-               retcode = nvme_trans_security_protocol(ns, hdr, cmd);
-               break;
         case SYNCHRONIZE_CACHE:
                 retcode = nvme_trans_synchronize_cache(ns, hdr);
                 break;
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c

index a7bcff45f4376d3b0c375bc1df6af3d2ca5c8c2e..ff1f97006322bbd7166fb1ba74c60e527dcf3cfb 100644 (file)
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -100,7 +100,7 @@ static u16 nvmet_get_smart_log(struct nvmet_req *req,
         u16 status;
  
         WARN_ON(req == NULL || slog == NULL);
-       if (req->cmd->get_log_page.nsid == 0xFFFFFFFF)
+       if (req->cmd->get_log_page.nsid == cpu_to_le32(0xFFFFFFFF))
                 status = nvmet_get_smart_log_all(req, slog);
         else
                 status = nvmet_get_smart_log_nsid(req, slog);
@@ -121,7 +121,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
         }
  
         switch (req->cmd->get_log_page.lid) {
-       case 0x01:
+       case NVME_LOG_ERROR:
                 /*
                  * We currently never set the More bit in the status field,
                  * so all error log entries are invalid and can be zeroed out.
@@ -129,7 +129,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
                  * mandatory log page.
                  */
                 break;
-       case 0x02:
+       case NVME_LOG_SMART:
                 /*
                  * XXX: fill out actual smart log
                  *
@@ -149,7 +149,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
                         goto err;
                 }
                 break;
-       case 0x03:
+       case NVME_LOG_FW_SLOT:
                 /*
                  * We only support a single firmware slot which always is
                  * active, so we can zero out the whole firmware slot log and
@@ -480,31 +480,25 @@ static void nvmet_execute_keep_alive(struct nvmet_req *req)
         nvmet_req_complete(req, 0);
  }
  
-int nvmet_parse_admin_cmd(struct nvmet_req *req)
+u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
  {
         struct nvme_command *cmd = req->cmd;
+       u16 ret;
  
         req->ns = NULL;
  
-       if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
-               pr_err("nvmet: got admin cmd %d while CC.EN == 0\n",
-                               cmd->common.opcode);
-               return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
-       }
-       if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
-               pr_err("nvmet: got admin cmd %d while CSTS.RDY == 0\n",
-                               cmd->common.opcode);
-               return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
-       }
+       ret = nvmet_check_ctrl_status(req, cmd);
+       if (unlikely(ret))
+               return ret;
  
         switch (cmd->common.opcode) {
         case nvme_admin_get_log_page:
                 req->data_len = nvmet_get_log_page_len(cmd);
  
                 switch (cmd->get_log_page.lid) {
-               case 0x01:
-               case 0x02:
-               case 0x03:
+               case NVME_LOG_ERROR:
+               case NVME_LOG_SMART:
+               case NVME_LOG_FW_SLOT:
                         req->execute = nvmet_execute_get_log_page;
                         return 0;
                 }
@@ -545,6 +539,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req)
                 return 0;
         }
  
-       pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+       pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
+              req->sq->qid);
         return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
  }
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c

index 11b0a0a5f661b502d15a22868fe78369ef428627..cf90713043da01ea7180d227feb2895da3653f49 100644 (file)
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -273,8 +273,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
         ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
                         NULL);
         if (IS_ERR(ns->bdev)) {
-               pr_err("nvmet: failed to open block device %s: (%ld)\n",
-                       ns->device_path, PTR_ERR(ns->bdev));
+               pr_err("failed to open block device %s: (%ld)\n",
+                      ns->device_path, PTR_ERR(ns->bdev));
                 ret = PTR_ERR(ns->bdev);
                 ns->bdev = NULL;
                 goto out_unlock;
@@ -425,6 +425,13 @@ void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
         ctrl->sqs[qid] = sq;
  }
  
+static void nvmet_confirm_sq(struct percpu_ref *ref)
+{
+       struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
+
+       complete(&sq->confirm_done);
+}
+
  void nvmet_sq_destroy(struct nvmet_sq *sq)
  {
         /*
@@ -433,7 +440,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
          */
         if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
                 nvmet_async_events_free(sq->ctrl);
-       percpu_ref_kill(&sq->ref);
+       percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
+       wait_for_completion(&sq->confirm_done);
         wait_for_completion(&sq->free_done);
         percpu_ref_exit(&sq->ref);
  
@@ -461,6 +469,7 @@ int nvmet_sq_init(struct nvmet_sq *sq)
                 return ret;
         }
         init_completion(&sq->free_done);
+       init_completion(&sq->confirm_done);
  
         return 0;
  }
@@ -652,6 +661,23 @@ out:
         return status;
  }
  
+u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
+{
+       if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
+               pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n",
+                      cmd->common.opcode, req->sq->qid);
+               return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+       }
+
+       if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
+               pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n",
+                      cmd->common.opcode, req->sq->qid);
+               req->ns = NULL;
+               return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+       }
+       return 0;
+}
+
  static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
                 const char *hostnqn)
  {
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c

index af8aabf0533504971bef38fbdd02c3943b805a5f..1aaf597e81fc763f96f5f61f4e2d47b9dd81578e 100644 (file)
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -159,15 +159,15 @@ out:
         nvmet_req_complete(req, status);
  }
  
-int nvmet_parse_discovery_cmd(struct nvmet_req *req)
+u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
  {
         struct nvme_command *cmd = req->cmd;
  
         req->ns = NULL;
  
         if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
-               pr_err("nvmet: got cmd %d while not ready\n",
-                               cmd->common.opcode);
+               pr_err("got cmd %d while not ready\n",
+                      cmd->common.opcode);
                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
         }
  
@@ -180,8 +180,8 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req)
                         req->execute = nvmet_execute_get_disc_log_page;
                         return 0;
                 default:
-                       pr_err("nvmet: unsupported get_log_page lid %d\n",
-                               cmd->get_log_page.lid);
+                       pr_err("unsupported get_log_page lid %d\n",
+                              cmd->get_log_page.lid);
                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
                 }
         case nvme_admin_identify:
@@ -192,17 +192,16 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req)
                                 nvmet_execute_identify_disc_ctrl;
                         return 0;
                 default:
-                       pr_err("nvmet: unsupported identify cns %d\n",
-                               cmd->identify.cns);
+                       pr_err("unsupported identify cns %d\n",
+                              cmd->identify.cns);
                         return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
                 }
         default:
-               pr_err("nvmet: unsupported cmd %d\n",
-                               cmd->common.opcode);
+               pr_err("unsupported cmd %d\n", cmd->common.opcode);
                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
         }
  
-       pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+       pr_err("unhandled cmd %d\n", cmd->common.opcode);
         return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
  }
  
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c

index 8bd022af3df6741ed1b08f10bb7b6dce40fe6925..3cc17269504bff54bfe93a8ede5995d94f7ab699 100644 (file)
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -73,7 +73,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
         nvmet_req_complete(req, status);
  }
  
-int nvmet_parse_fabrics_cmd(struct nvmet_req *req)
+u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
  {
         struct nvme_command *cmd = req->cmd;
  
@@ -122,7 +122,15 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
         struct nvmet_ctrl *ctrl = NULL;
         u16 status = 0;
  
-       d = kmap(sg_page(req->sg)) + req->sg->offset;
+       d = kmalloc(sizeof(*d), GFP_KERNEL);
+       if (!d) {
+               status = NVME_SC_INTERNAL;
+               goto complete;
+       }
+
+       status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d));
+       if (status)
+               goto out;
  
         /* zero out initial completion result, assign values as needed */
         req->rsp->result.u32 = 0;
@@ -143,7 +151,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
         }
  
         status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
-                       le32_to_cpu(c->kato), &ctrl);
+                                 le32_to_cpu(c->kato), &ctrl);
         if (status)
                 goto out;
  
@@ -158,7 +166,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
         req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
  
  out:
-       kunmap(sg_page(req->sg));
+       kfree(d);
+complete:
         nvmet_req_complete(req, status);
  }
  
@@ -170,7 +179,15 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
         u16 qid = le16_to_cpu(c->qid);
         u16 status = 0;
  
-       d = kmap(sg_page(req->sg)) + req->sg->offset;
+       d = kmalloc(sizeof(*d), GFP_KERNEL);
+       if (!d) {
+               status = NVME_SC_INTERNAL;
+               goto complete;
+       }
+
+       status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d));
+       if (status)
+               goto out;
  
         /* zero out initial completion result, assign values as needed */
         req->rsp->result.u32 = 0;
@@ -183,8 +200,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
         }
  
         status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn,
-                       le16_to_cpu(d->cntlid),
-                       req, &ctrl);
+                                    le16_to_cpu(d->cntlid),
+                                    req, &ctrl);
         if (status)
                 goto out;
  
@@ -205,7 +222,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
         pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
  
  out:
-       kunmap(sg_page(req->sg));
+       kfree(d);
+complete:
         nvmet_req_complete(req, status);
         return;
  
@@ -214,7 +232,7 @@ out_ctrl_put:
         goto out;
  }
  
-int nvmet_parse_connect_cmd(struct nvmet_req *req)
+u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
  {
         struct nvme_command *cmd = req->cmd;
  
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c

index 8f483ee7868c56bdc55e174f226f368609b32159..62eba29c85fb9b5101d4a974a9392dbb2e71c72d 100644 (file)
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -82,10 +82,13 @@ struct nvmet_fc_fcp_iod {
         enum nvmet_fcp_datadir          io_dir;
         bool                            active;
         bool                            abort;
+       bool                            aborted;
+       bool                            writedataactive;
         spinlock_t                      flock;
  
         struct nvmet_req                req;
         struct work_struct              work;
+       struct work_struct              done_work;
  
         struct nvmet_fc_tgtport         *tgtport;
         struct nvmet_fc_tgt_queue       *queue;
@@ -116,7 +119,7 @@ struct nvmet_fc_tgt_queue {
         u16                             qid;
         u16                             sqsize;
         u16                             ersp_ratio;
-       u16                             sqhd;
+       __le16                          sqhd;
         int                             cpu;
         atomic_t                        connected;
         atomic_t                        sqtail;
@@ -213,6 +216,7 @@ static DEFINE_IDA(nvmet_fc_tgtport_cnt);
  
  static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
  static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work);
+static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work);
  static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
  static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
  static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
@@ -414,9 +418,13 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport,
  
         for (i = 0; i < queue->sqsize; fod++, i++) {
                 INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work);
+               INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work);
                 fod->tgtport = tgtport;
                 fod->queue = queue;
                 fod->active = false;
+               fod->abort = false;
+               fod->aborted = false;
+               fod->fcpreq = NULL;
                 list_add_tail(&fod->fcp_list, &queue->fod_list);
                 spin_lock_init(&fod->flock);
  
@@ -463,7 +471,6 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
         if (fod) {
                 list_del(&fod->fcp_list);
                 fod->active = true;
-               fod->abort = false;
                 /*
                  * no queue reference is taken, as it was taken by the
                  * queue lookup just prior to the allocation. The iod
@@ -479,17 +486,30 @@ static void
  nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
                         struct nvmet_fc_fcp_iod *fod)
  {
+       struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
+       struct nvmet_fc_tgtport *tgtport = fod->tgtport;
         unsigned long flags;
  
+       fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma,
+                               sizeof(fod->rspiubuf), DMA_TO_DEVICE);
+
+       fcpreq->nvmet_fc_private = NULL;
+
         spin_lock_irqsave(&queue->qlock, flags);
         list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
         fod->active = false;
+       fod->abort = false;
+       fod->aborted = false;
+       fod->writedataactive = false;
+       fod->fcpreq = NULL;
         spin_unlock_irqrestore(&queue->qlock, flags);
  
         /*
          * release the reference taken at queue lookup and fod allocation
          */
         nvmet_fc_tgt_q_put(queue);
+
+       tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
  }
  
  static int
@@ -615,33 +635,13 @@ nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue)
  }
  
  
-static void
-nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport,
-                               struct nvmefc_tgt_fcp_req *fcpreq)
-{
-       int ret;
-
-       fcpreq->op = NVMET_FCOP_ABORT;
-       fcpreq->offset = 0;
-       fcpreq->timeout = 0;
-       fcpreq->transfer_length = 0;
-       fcpreq->transferred_length = 0;
-       fcpreq->fcp_error = 0;
-       fcpreq->sg_cnt = 0;
-
-       ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fcpreq);
-       if (ret)
-               /* should never reach here !! */
-               WARN_ON(1);
-}
-
-
  static void
  nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
  {
+       struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport;
         struct nvmet_fc_fcp_iod *fod = queue->fod;
         unsigned long flags;
-       int i;
+       int i, writedataactive;
         bool disconnect;
  
         disconnect = atomic_xchg(&queue->connected, 0);
@@ -652,7 +652,20 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
                 if (fod->active) {
                         spin_lock(&fod->flock);
                         fod->abort = true;
+                       writedataactive = fod->writedataactive;
                         spin_unlock(&fod->flock);
+                       /*
+                        * only call lldd abort routine if waiting for
+                        * writedata. other outstanding ops should finish
+                        * on their own.
+                        */
+                       if (writedataactive) {
+                               spin_lock(&fod->flock);
+                               fod->aborted = true;
+                               spin_unlock(&fod->flock);
+                               tgtport->ops->fcp_abort(
+                                       &tgtport->fc_target_port, fod->fcpreq);
+                       }
                 }
         }
         spin_unlock_irqrestore(&queue->qlock, flags);
@@ -846,7 +859,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
         int ret, idx;
  
         if (!template->xmt_ls_rsp || !template->fcp_op ||
-           !template->targetport_delete ||
+           !template->fcp_abort ||
+           !template->fcp_req_release || !template->targetport_delete ||
             !template->max_hw_queues || !template->max_sgl_segments ||
             !template->max_dif_sgl_segments || !template->dma_boundary) {
                 ret = -EINVAL;
@@ -1044,7 +1058,7 @@ EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport);
  
  
  static void
-nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, u32 desc_len, u8 rqst_ls_cmd)
+nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd)
  {
         struct fcnvme_ls_acc_hdr *acc = buf;
  
@@ -1189,8 +1203,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
                         validation_errors[ret]);
                 iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
                                 NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
-                               ELS_RJT_LOGIC,
-                               ELS_EXPL_NONE, 0);
+                               FCNVME_RJT_RC_LOGIC,
+                               FCNVME_RJT_EXP_NONE, 0);
                 return;
         }
  
@@ -1281,8 +1295,9 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport,
                 iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
                                 NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
                                 (ret == VERR_NO_ASSOC) ?
-                                               ELS_RJT_PROT : ELS_RJT_LOGIC,
-                               ELS_EXPL_NONE, 0);
+                                       FCNVME_RJT_RC_INV_ASSOC :
+                                       FCNVME_RJT_RC_LOGIC,
+                               FCNVME_RJT_EXP_NONE, 0);
                 return;
         }
  
@@ -1369,8 +1384,12 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
                         validation_errors[ret]);
                 iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
                                 NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
-                               (ret == 8) ? ELS_RJT_PROT : ELS_RJT_LOGIC,
-                               ELS_EXPL_NONE, 0);
+                               (ret == VERR_NO_ASSOC) ?
+                                       FCNVME_RJT_RC_INV_ASSOC :
+                                       (ret == VERR_NO_CONN) ?
+                                               FCNVME_RJT_RC_INV_CONN :
+                                               FCNVME_RJT_RC_LOGIC,
+                               FCNVME_RJT_EXP_NONE, 0);
                 return;
         }
  
@@ -1479,7 +1498,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport,
         default:
                 iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf,
                                 NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd,
-                               ELS_RJT_INVAL, ELS_EXPL_NONE, 0);
+                               FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
         }
  
         nvmet_fc_xmt_ls_rsp(tgtport, iod);
@@ -1619,6 +1638,8 @@ nvmet_fc_free_tgt_pgs(struct nvmet_fc_fcp_iod *fod)
         for_each_sg(fod->data_sg, sg, fod->data_sg_cnt, count)
                 __free_page(sg_page(sg));
         kfree(fod->data_sg);
+       fod->data_sg = NULL;
+       fod->data_sg_cnt = 0;
  }
  
  
@@ -1679,7 +1700,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport,
             xfr_length != fod->total_length ||
             (le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] ||
             (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) ||
-           queue_90percent_full(fod->queue, cqe->sq_head))
+           queue_90percent_full(fod->queue, le16_to_cpu(cqe->sq_head)))
                 send_ersp = true;
  
         /* re-set the fields */
@@ -1703,6 +1724,26 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport,
  
  static void nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq);
  
+static void
+nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport,
+                               struct nvmet_fc_fcp_iod *fod)
+{
+       struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
+
+       /* data no longer needed */
+       nvmet_fc_free_tgt_pgs(fod);
+
+       /*
+        * if an ABTS was received or we issued the fcp_abort early
+        * don't call abort routine again.
+        */
+       /* no need to take lock - lock was taken earlier to get here */
+       if (!fod->aborted)
+               tgtport->ops->fcp_abort(&tgtport->fc_target_port, fcpreq);
+
+       nvmet_fc_free_fcp_iod(fod->queue, fod);
+}
+
  static void
  nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport,
                                 struct nvmet_fc_fcp_iod *fod)
@@ -1716,7 +1757,7 @@ nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport,
  
         ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fod->fcpreq);
         if (ret)
-               nvmet_fc_abort_op(tgtport, fod->fcpreq);
+               nvmet_fc_abort_op(tgtport, fod);
  }
  
  static void
@@ -1725,6 +1766,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
  {
         struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
         struct scatterlist *sg, *datasg;
+       unsigned long flags;
         u32 tlen, sg_off;
         int ret;
  
@@ -1789,10 +1831,13 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
                  */
                 fod->abort = true;
  
-               if (op == NVMET_FCOP_WRITEDATA)
+               if (op == NVMET_FCOP_WRITEDATA) {
+                       spin_lock_irqsave(&fod->flock, flags);
+                       fod->writedataactive = false;
+                       spin_unlock_irqrestore(&fod->flock, flags);
                         nvmet_req_complete(&fod->req,
                                         NVME_SC_FC_TRANSPORT_ERROR);
-               else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ {
+               } else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ {
                         fcpreq->fcp_error = ret;
                         fcpreq->transferred_length = 0;
                         nvmet_fc_xmt_fcp_op_done(fod->fcpreq);
@@ -1800,32 +1845,54 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
         }
  }
  
+static inline bool
+__nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort)
+{
+       struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
+       struct nvmet_fc_tgtport *tgtport = fod->tgtport;
+
+       /* if in the middle of an io and we need to tear down */
+       if (abort) {
+               if (fcpreq->op == NVMET_FCOP_WRITEDATA) {
+                       nvmet_req_complete(&fod->req,
+                                       NVME_SC_FC_TRANSPORT_ERROR);
+                       return true;
+               }
+
+               nvmet_fc_abort_op(tgtport, fod);
+               return true;
+       }
+
+       return false;
+}
+
+/*
+ * actual done handler for FCP operations when completed by the lldd
+ */
  static void
-nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
+nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
  {
-       struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
+       struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
         struct nvmet_fc_tgtport *tgtport = fod->tgtport;
         unsigned long flags;
         bool abort;
  
         spin_lock_irqsave(&fod->flock, flags);
         abort = fod->abort;
+       fod->writedataactive = false;
         spin_unlock_irqrestore(&fod->flock, flags);
  
-       /* if in the middle of an io and we need to tear down */
-       if (abort && fcpreq->op != NVMET_FCOP_ABORT) {
-               /* data no longer needed */
-               nvmet_fc_free_tgt_pgs(fod);
-
-               nvmet_req_complete(&fod->req, fcpreq->fcp_error);
-               return;
-       }
-
         switch (fcpreq->op) {
  
         case NVMET_FCOP_WRITEDATA:
+               if (__nvmet_fc_fod_op_abort(fod, abort))
+                       return;
                 if (fcpreq->fcp_error ||
                     fcpreq->transferred_length != fcpreq->transfer_length) {
+                       spin_lock(&fod->flock);
+                       fod->abort = true;
+                       spin_unlock(&fod->flock);
+
                         nvmet_req_complete(&fod->req,
                                         NVME_SC_FC_TRANSPORT_ERROR);
                         return;
@@ -1833,6 +1900,10 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
  
                 fod->offset += fcpreq->transferred_length;
                 if (fod->offset != fod->total_length) {
+                       spin_lock_irqsave(&fod->flock, flags);
+                       fod->writedataactive = true;
+                       spin_unlock_irqrestore(&fod->flock, flags);
+
                         /* transfer the next chunk */
                         nvmet_fc_transfer_fcp_data(tgtport, fod,
                                                 NVMET_FCOP_WRITEDATA);
@@ -1847,12 +1918,11 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
  
         case NVMET_FCOP_READDATA:
         case NVMET_FCOP_READDATA_RSP:
+               if (__nvmet_fc_fod_op_abort(fod, abort))
+                       return;
                 if (fcpreq->fcp_error ||
                     fcpreq->transferred_length != fcpreq->transfer_length) {
-                       /* data no longer needed */
-                       nvmet_fc_free_tgt_pgs(fod);
-
-                       nvmet_fc_abort_op(tgtport, fod->fcpreq);
+                       nvmet_fc_abort_op(tgtport, fod);
                         return;
                 }
  
@@ -1861,8 +1931,6 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
                 if (fcpreq->op == NVMET_FCOP_READDATA_RSP) {
                         /* data no longer needed */
                         nvmet_fc_free_tgt_pgs(fod);
-                       fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma,
-                                       sizeof(fod->rspiubuf), DMA_TO_DEVICE);
                         nvmet_fc_free_fcp_iod(fod->queue, fod);
                         return;
                 }
@@ -1885,19 +1953,38 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
                 break;
  
         case NVMET_FCOP_RSP:
-       case NVMET_FCOP_ABORT:
-               fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma,
-                               sizeof(fod->rspiubuf), DMA_TO_DEVICE);
+               if (__nvmet_fc_fod_op_abort(fod, abort))
+                       return;
                 nvmet_fc_free_fcp_iod(fod->queue, fod);
                 break;
  
         default:
-               nvmet_fc_free_tgt_pgs(fod);
-               nvmet_fc_abort_op(tgtport, fod->fcpreq);
                 break;
         }
  }
  
+static void
+nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work)
+{
+       struct nvmet_fc_fcp_iod *fod =
+               container_of(work, struct nvmet_fc_fcp_iod, done_work);
+
+       nvmet_fc_fod_op_done(fod);
+}
+
+static void
+nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
+{
+       struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
+       struct nvmet_fc_tgt_queue *queue = fod->queue;
+
+       if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR)
+               /* context switch so completion is not in ISR context */
+               queue_work_on(queue->cpu, queue->work_q, &fod->done_work);
+       else
+               nvmet_fc_fod_op_done(fod);
+}
+
  /*
   * actual completion handler after execution by the nvmet layer
   */
@@ -1919,10 +2006,7 @@ __nvmet_fc_fcp_nvme_cmd_done(struct nvmet_fc_tgtport *tgtport,
                 fod->queue->sqhd = cqe->sq_head;
  
         if (abort) {
-               /* data no longer needed */
-               nvmet_fc_free_tgt_pgs(fod);
-
-               nvmet_fc_abort_op(tgtport, fod->fcpreq);
+               nvmet_fc_abort_op(tgtport, fod);
                 return;
         }
  
@@ -1971,7 +2055,7 @@ nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req)
  /*
   * Actual processing routine for received FC-NVME LS Requests from the LLD
   */
-void
+static void
  nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
                         struct nvmet_fc_fcp_iod *fod)
  {
@@ -2018,8 +2102,8 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
                                 &fod->queue->nvme_cq,
                                 &fod->queue->nvme_sq,
                                 &nvmet_fc_tgt_fcp_ops);
-       if (!ret) {     /* bad SQE content */
-               nvmet_fc_abort_op(tgtport, fod->fcpreq);
+       if (!ret) {     /* bad SQE content or invalid ctrl state */
+               nvmet_fc_abort_op(tgtport, fod);
                 return;
         }
  
@@ -2059,7 +2143,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
         return;
  
  transport_error:
-       nvmet_fc_abort_op(tgtport, fod->fcpreq);
+       nvmet_fc_abort_op(tgtport, fod);
  }
  
  /*
@@ -2089,7 +2173,7 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work)
   * If this routine returns error, the lldd should abort the exchange.
   *
   * @target_port: pointer to the (registered) target port the FCP CMD IU
- *              was receive on.
+ *              was received on.
   * @fcpreq:     pointer to a fcpreq request structure to be used to reference
   *              the exchange corresponding to the FCP Exchange.
   * @cmdiubuf:   pointer to the buffer containing the FCP CMD IU
@@ -2112,7 +2196,6 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
                         (be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4)))
                 return -EIO;
  
-
         queue = nvmet_fc_find_target_queue(tgtport,
                                 be64_to_cpu(cmdiu->connection_id));
         if (!queue)
@@ -2142,12 +2225,68 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
                         ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
         memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
  
-       queue_work_on(queue->cpu, queue->work_q, &fod->work);
+       if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
+               queue_work_on(queue->cpu, queue->work_q, &fod->work);
+       else
+               nvmet_fc_handle_fcp_rqst(tgtport, fod);
  
         return 0;
  }
  EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req);
  
+/**
+ * nvmet_fc_rcv_fcp_abort - transport entry point called by an LLDD
+ *                       upon the reception of an ABTS for a FCP command
+ *
+ * Notify the transport that an ABTS has been received for a FCP command
+ * that had been given to the transport via nvmet_fc_rcv_fcp_req(). The
+ * LLDD believes the command is still being worked on
+ * (template_ops->fcp_req_release() has not been called).
+ *
+ * The transport will wait for any outstanding work (an op to the LLDD,
+ * which the lldd should complete with error due to the ABTS; or the
+ * completion from the nvmet layer of the nvme command), then will
+ * stop processing and call the nvmet_fc_rcv_fcp_req() callback to
+ * return the i/o context to the LLDD.  The LLDD may send the BA_ACC
+ * to the ABTS either after return from this function (assuming any
+ * outstanding op work has been terminated) or upon the callback being
+ * called.
+ *
+ * @target_port: pointer to the (registered) target port the FCP CMD IU
+ *              was received on.
+ * @fcpreq:     pointer to the fcpreq request structure that corresponds
+ *              to the exchange that received the ABTS.
+ */
+void
+nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *target_port,
+                       struct nvmefc_tgt_fcp_req *fcpreq)
+{
+       struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
+       struct nvmet_fc_tgt_queue *queue;
+       unsigned long flags;
+
+       if (!fod || fod->fcpreq != fcpreq)
+               /* job appears to have already completed, ignore abort */
+               return;
+
+       queue = fod->queue;
+
+       spin_lock_irqsave(&queue->qlock, flags);
+       if (fod->active) {
+               /*
+                * mark as abort. The abort handler, invoked upon completion
+                * of any work, will detect the aborted status and do the
+                * callback.
+                */
+               spin_lock(&fod->flock);
+               fod->abort = true;
+               fod->aborted = true;
+               spin_unlock(&fod->flock);
+       }
+       spin_unlock_irqrestore(&queue->qlock, flags);
+}
+EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_abort);
+
  enum {
         FCT_TRADDR_ERR          = 0,
         FCT_TRADDR_WWNN         = 1 << 0,
@@ -2177,7 +2316,7 @@ nvmet_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf)
         if (!options)
                 return -ENOMEM;
  
-       while ((p = strsep(&o, ",\n")) != NULL) {
+       while ((p = strsep(&o, ":\n")) != NULL) {
                 if (!*p)
                         continue;
  
@@ -2238,6 +2377,7 @@ nvmet_fc_add_port(struct nvmet_port *port)
                         if (!tgtport->port) {
                                 tgtport->port = port;
                                 port->priv = tgtport;
+                               nvmet_fc_tgtport_get(tgtport);
                                 ret = 0;
                         } else
                                 ret = -EALREADY;
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c

index 4e8e6a22bce162a61eec428e9c435acb26b74046..15551ef79c8c4568a6f611b6e9ba9d736127825b 100644 (file)
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -246,11 +246,19 @@ struct fcloop_lsreq {
  struct fcloop_fcpreq {
         struct fcloop_tport             *tport;
         struct nvmefc_fcp_req           *fcpreq;
+       spinlock_t                      reqlock;
         u16                             status;
+       bool                            active;
+       bool                            aborted;
         struct work_struct              work;
         struct nvmefc_tgt_fcp_req       tgt_fcp_req;
  };
  
+struct fcloop_ini_fcpreq {
+       struct nvmefc_fcp_req           *fcpreq;
+       struct fcloop_fcpreq            *tfcp_req;
+       struct work_struct              iniwork;
+};
  
  static inline struct fcloop_lsreq *
  tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq)
@@ -341,7 +349,21 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport,
  }
  
  /*
- * FCP IO operation done. call back up initiator "done" flows.
+ * FCP IO operation done by initiator abort.
+ * call back up initiator "done" flows.
+ */
+static void
+fcloop_tgt_fcprqst_ini_done_work(struct work_struct *work)
+{
+       struct fcloop_ini_fcpreq *inireq =
+               container_of(work, struct fcloop_ini_fcpreq, iniwork);
+
+       inireq->fcpreq->done(inireq->fcpreq);
+}
+
+/*
+ * FCP IO operation done by target completion.
+ * call back up initiator "done" flows.
   */
  static void
  fcloop_tgt_fcprqst_done_work(struct work_struct *work)
@@ -349,12 +371,18 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work)
         struct fcloop_fcpreq *tfcp_req =
                 container_of(work, struct fcloop_fcpreq, work);
         struct fcloop_tport *tport = tfcp_req->tport;
-       struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq;
+       struct nvmefc_fcp_req *fcpreq;
  
-       if (tport->remoteport) {
+       spin_lock(&tfcp_req->reqlock);
+       fcpreq = tfcp_req->fcpreq;
+       spin_unlock(&tfcp_req->reqlock);
+
+       if (tport->remoteport && fcpreq) {
                 fcpreq->status = tfcp_req->status;
                 fcpreq->done(fcpreq);
         }
+
+       kfree(tfcp_req);
  }
  
  
@@ -364,20 +392,25 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport,
                         void *hw_queue_handle,
                         struct nvmefc_fcp_req *fcpreq)
  {
-       struct fcloop_fcpreq *tfcp_req = fcpreq->private;
         struct fcloop_rport *rport = remoteport->private;
+       struct fcloop_ini_fcpreq *inireq = fcpreq->private;
+       struct fcloop_fcpreq *tfcp_req;
         int ret = 0;
  
-       INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work);
+       if (!rport->targetport)
+               return -ECONNREFUSED;
  
-       if (!rport->targetport) {
-               tfcp_req->status = NVME_SC_FC_TRANSPORT_ERROR;
-               schedule_work(&tfcp_req->work);
-               return ret;
-       }
+       tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_KERNEL);
+       if (!tfcp_req)
+               return -ENOMEM;
  
+       inireq->fcpreq = fcpreq;
+       inireq->tfcp_req = tfcp_req;
+       INIT_WORK(&inireq->iniwork, fcloop_tgt_fcprqst_ini_done_work);
         tfcp_req->fcpreq = fcpreq;
         tfcp_req->tport = rport->targetport->private;
+       spin_lock_init(&tfcp_req->reqlock);
+       INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work);
  
         ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req,
                                  fcpreq->cmdaddr, fcpreq->cmdlen);
@@ -444,62 +477,128 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
                         struct nvmefc_tgt_fcp_req *tgt_fcpreq)
  {
         struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
-       struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq;
+       struct nvmefc_fcp_req *fcpreq;
         u32 rsplen = 0, xfrlen = 0;
-       int fcp_err = 0;
+       int fcp_err = 0, active, aborted;
         u8 op = tgt_fcpreq->op;
  
+       spin_lock(&tfcp_req->reqlock);
+       fcpreq = tfcp_req->fcpreq;
+       active = tfcp_req->active;
+       aborted = tfcp_req->aborted;
+       tfcp_req->active = true;
+       spin_unlock(&tfcp_req->reqlock);
+
+       if (unlikely(active))
+               /* illegal - call while i/o active */
+               return -EALREADY;
+
+       if (unlikely(aborted)) {
+               /* target transport has aborted i/o prior */
+               spin_lock(&tfcp_req->reqlock);
+               tfcp_req->active = false;
+               spin_unlock(&tfcp_req->reqlock);
+               tgt_fcpreq->transferred_length = 0;
+               tgt_fcpreq->fcp_error = -ECANCELED;
+               tgt_fcpreq->done(tgt_fcpreq);
+               return 0;
+       }
+
+       /*
+        * if fcpreq is NULL, the I/O has been aborted (from
+        * initiator side). For the target side, act as if all is well
+        * but don't actually move data.
+        */
+
         switch (op) {
         case NVMET_FCOP_WRITEDATA:
                 xfrlen = tgt_fcpreq->transfer_length;
-               fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl,
-                                       tgt_fcpreq->offset, xfrlen);
-               fcpreq->transferred_length += xfrlen;
+               if (fcpreq) {
+                       fcloop_fcp_copy_data(op, tgt_fcpreq->sg,
+                                       fcpreq->first_sgl, tgt_fcpreq->offset,
+                                       xfrlen);
+                       fcpreq->transferred_length += xfrlen;
+               }
                 break;
  
         case NVMET_FCOP_READDATA:
         case NVMET_FCOP_READDATA_RSP:
                 xfrlen = tgt_fcpreq->transfer_length;
-               fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl,
-                                       tgt_fcpreq->offset, xfrlen);
-               fcpreq->transferred_length += xfrlen;
+               if (fcpreq) {
+                       fcloop_fcp_copy_data(op, tgt_fcpreq->sg,
+                                       fcpreq->first_sgl, tgt_fcpreq->offset,
+                                       xfrlen);
+                       fcpreq->transferred_length += xfrlen;
+               }
                 if (op == NVMET_FCOP_READDATA)
                         break;
  
                 /* Fall-Thru to RSP handling */
  
         case NVMET_FCOP_RSP:
-               rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ?
-                               fcpreq->rsplen : tgt_fcpreq->rsplen);
-               memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen);
-               if (rsplen < tgt_fcpreq->rsplen)
-                       fcp_err = -E2BIG;
-               fcpreq->rcv_rsplen = rsplen;
-               fcpreq->status = 0;
+               if (fcpreq) {
+                       rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ?
+                                       fcpreq->rsplen : tgt_fcpreq->rsplen);
+                       memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen);
+                       if (rsplen < tgt_fcpreq->rsplen)
+                               fcp_err = -E2BIG;
+                       fcpreq->rcv_rsplen = rsplen;
+                       fcpreq->status = 0;
+               }
                 tfcp_req->status = 0;
                 break;
  
-       case NVMET_FCOP_ABORT:
-               tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED;
-               break;
-
         default:
                 fcp_err = -EINVAL;
                 break;
         }
  
+       spin_lock(&tfcp_req->reqlock);
+       tfcp_req->active = false;
+       spin_unlock(&tfcp_req->reqlock);
+
         tgt_fcpreq->transferred_length = xfrlen;
         tgt_fcpreq->fcp_error = fcp_err;
         tgt_fcpreq->done(tgt_fcpreq);
  
-       if ((!fcp_err) && (op == NVMET_FCOP_RSP ||
-                       op == NVMET_FCOP_READDATA_RSP ||
-                       op == NVMET_FCOP_ABORT))
-               schedule_work(&tfcp_req->work);
-
         return 0;
  }
  
+static void
+fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
+                       struct nvmefc_tgt_fcp_req *tgt_fcpreq)
+{
+       struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
+       int active;
+
+       /*
+        * mark aborted only in case there were 2 threads in transport
+        * (one doing io, other doing abort) and only kills ops posted
+        * after the abort request
+        */
+       spin_lock(&tfcp_req->reqlock);
+       active = tfcp_req->active;
+       tfcp_req->aborted = true;
+       spin_unlock(&tfcp_req->reqlock);
+
+       tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED;
+
+       /*
+        * nothing more to do. If io wasn't active, the transport should
+        * immediately call the req_release. If it was active, the op
+        * will complete, and the lldd should call req_release.
+        */
+}
+
+static void
+fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport,
+                       struct nvmefc_tgt_fcp_req *tgt_fcpreq)
+{
+       struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
+
+       schedule_work(&tfcp_req->work);
+}
+
  static void
  fcloop_ls_abort(struct nvme_fc_local_port *localport,
                         struct nvme_fc_remote_port *remoteport,
@@ -513,6 +612,27 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
                         void *hw_queue_handle,
                         struct nvmefc_fcp_req *fcpreq)
  {
+       struct fcloop_rport *rport = remoteport->private;
+       struct fcloop_ini_fcpreq *inireq = fcpreq->private;
+       struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req;
+
+       if (!tfcp_req)
+               /* abort has already been called */
+               return;
+
+       if (rport->targetport)
+               nvmet_fc_rcv_fcp_abort(rport->targetport,
+                                       &tfcp_req->tgt_fcp_req);
+
+       /* break initiator/target relationship for io */
+       spin_lock(&tfcp_req->reqlock);
+       inireq->tfcp_req = NULL;
+       tfcp_req->fcpreq = NULL;
+       spin_unlock(&tfcp_req->reqlock);
+
+       /* post the aborted io completion */
+       fcpreq->status = -ECANCELED;
+       schedule_work(&inireq->iniwork);
  }
  
  static void
@@ -546,7 +666,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
  #define        FCLOOP_SGL_SEGS                 256
  #define FCLOOP_DMABOUND_4G             0xFFFFFFFF
  
-struct nvme_fc_port_template fctemplate = {
+static struct nvme_fc_port_template fctemplate = {
         .localport_delete       = fcloop_localport_delete,
         .remoteport_delete      = fcloop_remoteport_delete,
         .create_queue           = fcloop_create_queue,
@@ -563,20 +683,23 @@ struct nvme_fc_port_template fctemplate = {
         .local_priv_sz          = sizeof(struct fcloop_lport),
         .remote_priv_sz         = sizeof(struct fcloop_rport),
         .lsrqst_priv_sz         = sizeof(struct fcloop_lsreq),
-       .fcprqst_priv_sz        = sizeof(struct fcloop_fcpreq),
+       .fcprqst_priv_sz        = sizeof(struct fcloop_ini_fcpreq),
  };
  
-struct nvmet_fc_target_template tgttemplate = {
+static struct nvmet_fc_target_template tgttemplate = {
         .targetport_delete      = fcloop_targetport_delete,
         .xmt_ls_rsp             = fcloop_xmt_ls_rsp,
         .fcp_op                 = fcloop_fcp_op,
+       .fcp_abort              = fcloop_tgt_fcp_abort,
+       .fcp_req_release        = fcloop_fcp_req_release,
         .max_hw_queues          = FCLOOP_HW_QUEUES,
         .max_sgl_segments       = FCLOOP_SGL_SEGS,
         .max_dif_sgl_segments   = FCLOOP_SGL_SEGS,
         .dma_boundary           = FCLOOP_DMABOUND_4G,
         /* optional features */
-       .target_features        = NVMET_FCTGTFEAT_READDATA_RSP |
-                                 NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED,
+       .target_features        = NVMET_FCTGTFEAT_CMD_IN_ISR |
+                                 NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED |
+                                 NVMET_FCTGTFEAT_OPDONE_IN_ISR,
         /* sizes of additional private data for data structures */
         .target_priv_sz         = sizeof(struct fcloop_tport),
  };
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c

index 4195115c7e5493be02acc7b4daddbfd2427c831e..c77940d80fc8e7386e3e968efc4be058d1abfab0 100644 (file)
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -180,11 +180,11 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req)
  
         sector = le64_to_cpu(write_zeroes->slba) <<
                 (req->ns->blksize_shift - 9);
-       nr_sector = (((sector_t)le32_to_cpu(write_zeroes->length)) <<
+       nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length)) <<
                 (req->ns->blksize_shift - 9)) + 1;
  
         if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
-                               GFP_KERNEL, &bio, true))
+                               GFP_KERNEL, &bio, 0))
                 status = NVME_SC_INTERNAL | NVME_SC_DNR;
  
         if (bio) {
@@ -196,26 +196,19 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req)
         }
  }
  
-int nvmet_parse_io_cmd(struct nvmet_req *req)
+u16 nvmet_parse_io_cmd(struct nvmet_req *req)
  {
         struct nvme_command *cmd = req->cmd;
+       u16 ret;
  
-       if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
-               pr_err("nvmet: got io cmd %d while CC.EN == 0\n",
-                               cmd->common.opcode);
+       ret = nvmet_check_ctrl_status(req, cmd);
+       if (unlikely(ret)) {
                 req->ns = NULL;
-               return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
-       }
-
-       if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
-               pr_err("nvmet: got io cmd %d while CSTS.RDY == 0\n",
-                               cmd->common.opcode);
-               req->ns = NULL;
-               return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+               return ret;
         }
  
         req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
-       if (!req->ns)
+       if (unlikely(!req->ns))
                 return NVME_SC_INVALID_NS | NVME_SC_DNR;
  
         switch (cmd->common.opcode) {
@@ -230,14 +223,15 @@ int nvmet_parse_io_cmd(struct nvmet_req *req)
                 return 0;
         case nvme_cmd_dsm:
                 req->execute = nvmet_execute_dsm;
-               req->data_len = le32_to_cpu(cmd->dsm.nr + 1) *
+               req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
                         sizeof(struct nvme_dsm_range);
                 return 0;
         case nvme_cmd_write_zeroes:
                 req->execute = nvmet_execute_write_zeroes;
                 return 0;
         default:
-               pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+               pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
+                      req->sq->qid);
                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
         }
  }
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c

index d1f06e7768ff1d7ff6ee787ff6d94eb01576252f..304f1c87c160cb0feccfaad3b28148db33bb2c82 100644 (file)
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -13,12 +13,10 @@
   */
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  #include <linux/scatterlist.h>
-#include <linux/delay.h>
  #include <linux/blk-mq.h>
  #include <linux/nvme.h>
  #include <linux/module.h>
  #include <linux/parser.h>
-#include <linux/t10-pi.h>
  #include "nvmet.h"
  #include "../host/nvme.h"
  #include "../host/fabrics.h"
@@ -93,31 +91,26 @@ static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue)
  static void nvme_loop_complete_rq(struct request *req)
  {
         struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
-       int error = 0;
  
         nvme_cleanup_cmd(req);
         sg_free_table_chained(&iod->sg_table, true);
+       nvme_complete_rq(req);
+}
  
-       if (unlikely(req->errors)) {
-               if (nvme_req_needs_retry(req, req->errors)) {
-                       nvme_requeue_req(req);
-                       return;
-               }
-
-               if (blk_rq_is_passthrough(req))
-                       error = req->errors;
-               else
-                       error = nvme_error_status(req->errors);
-       }
+static struct blk_mq_tags *nvme_loop_tagset(struct nvme_loop_queue *queue)
+{
+       u32 queue_idx = nvme_loop_queue_idx(queue);
  
-       blk_mq_end_request(req, error);
+       if (queue_idx == 0)
+               return queue->ctrl->admin_tag_set.tags[queue_idx];
+       return queue->ctrl->tag_set.tags[queue_idx - 1];
  }
  
  static void nvme_loop_queue_response(struct nvmet_req *req)
  {
-       struct nvme_loop_iod *iod =
-               container_of(req, struct nvme_loop_iod, req);
-       struct nvme_completion *cqe = &iod->rsp;
+       struct nvme_loop_queue *queue =
+               container_of(req->sq, struct nvme_loop_queue, nvme_sq);
+       struct nvme_completion *cqe = req->rsp;
  
         /*
          * AEN requests are special as they don't time out and can
@@ -125,15 +118,22 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
          * aborts.  We don't even bother to allocate a struct request
          * for them but rather special case them here.
          */
-       if (unlikely(nvme_loop_queue_idx(iod->queue) == 0 &&
+       if (unlikely(nvme_loop_queue_idx(queue) == 0 &&
                         cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) {
-               nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe->status,
+               nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
                                 &cqe->result);
         } else {
-               struct request *rq = blk_mq_rq_from_pdu(iod);
+               struct request *rq;
+
+               rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id);
+               if (!rq) {
+                       dev_err(queue->ctrl->ctrl.device,
+                               "tag 0x%x on queue %d not found\n",
+                               cqe->command_id, nvme_loop_queue_idx(queue));
+                       return;
+               }
  
-               iod->nvme_req.result = cqe->result;
-               blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1);
+               nvme_end_request(rq, cqe->status, cqe->result);
         }
  }
  
@@ -154,7 +154,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
         schedule_work(&iod->queue->ctrl->reset_work);
  
         /* fail with DNR on admin cmd timeout */
-       rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR;
+       nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
  
         return BLK_EH_HANDLED;
  }
@@ -223,8 +223,6 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
  static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
                 struct nvme_loop_iod *iod, unsigned int queue_idx)
  {
-       BUG_ON(queue_idx >= ctrl->queue_count);
-
         iod->req.cmd = &iod->cmd;
         iod->req.rsp = &iod->rsp;
         iod->queue = &ctrl->queues[queue_idx];
@@ -270,7 +268,7 @@ static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
         return 0;
  }
  
-static struct blk_mq_ops nvme_loop_mq_ops = {
+static const struct blk_mq_ops nvme_loop_mq_ops = {
         .queue_rq       = nvme_loop_queue_rq,
         .complete       = nvme_loop_complete_rq,
         .init_request   = nvme_loop_init_request,
@@ -278,7 +276,7 @@ static struct blk_mq_ops nvme_loop_mq_ops = {
         .timeout        = nvme_loop_timeout,
  };
  
-static struct blk_mq_ops nvme_loop_admin_mq_ops = {
+static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
         .queue_rq       = nvme_loop_queue_rq,
         .complete       = nvme_loop_complete_rq,
         .init_request   = nvme_loop_init_admin_request,
@@ -288,9 +286,9 @@ static struct blk_mq_ops nvme_loop_admin_mq_ops = {
  
  static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
  {
+       nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
         blk_cleanup_queue(ctrl->ctrl.admin_q);
         blk_mq_free_tag_set(&ctrl->admin_tag_set);
-       nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
  }
  
  static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
@@ -314,6 +312,56 @@ free_ctrl:
         kfree(ctrl);
  }
  
+static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
+{
+       int i;
+
+       for (i = 1; i < ctrl->queue_count; i++)
+               nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+}
+
+static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
+{
+       struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+       unsigned int nr_io_queues;
+       int ret, i;
+
+       nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
+       ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
+       if (ret || !nr_io_queues)
+               return ret;
+
+       dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues);
+
+       for (i = 1; i <= nr_io_queues; i++) {
+               ctrl->queues[i].ctrl = ctrl;
+               ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
+               if (ret)
+                       goto out_destroy_queues;
+
+               ctrl->queue_count++;
+       }
+
+       return 0;
+
+out_destroy_queues:
+       nvme_loop_destroy_io_queues(ctrl);
+       return ret;
+}
+
+static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl)
+{
+       int i, ret;
+
+       for (i = 1; i < ctrl->queue_count; i++) {
+               ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
  static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
  {
         int error;
@@ -357,7 +405,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
         }
  
         ctrl->ctrl.sqsize =
-               min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+               min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
  
         error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
         if (error)
@@ -385,17 +433,13 @@ out_free_sq:
  
  static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
  {
-       int i;
-
         nvme_stop_keep_alive(&ctrl->ctrl);
  
         if (ctrl->queue_count > 1) {
                 nvme_stop_queues(&ctrl->ctrl);
                 blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                         nvme_cancel_request, &ctrl->ctrl);
-
-               for (i = 1; i < ctrl->queue_count; i++)
-                       nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+               nvme_loop_destroy_io_queues(ctrl);
         }
  
         if (ctrl->ctrl.state == NVME_CTRL_LIVE)
@@ -459,7 +503,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
         struct nvme_loop_ctrl *ctrl = container_of(work,
                                         struct nvme_loop_ctrl, reset_work);
         bool changed;
-       int i, ret;
+       int ret;
  
         nvme_loop_shutdown_ctrl(ctrl);
  
@@ -467,20 +511,13 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
         if (ret)
                 goto out_disable;
  
-       for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
-               ctrl->queues[i].ctrl = ctrl;
-               ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
-               if (ret)
-                       goto out_free_queues;
-
-               ctrl->queue_count++;
-       }
+       ret = nvme_loop_init_io_queues(ctrl);
+       if (ret)
+               goto out_destroy_admin;
  
-       for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
-               ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
-               if (ret)
-                       goto out_free_queues;
-       }
+       ret = nvme_loop_connect_io_queues(ctrl);
+       if (ret)
+               goto out_destroy_io;
  
         changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
         WARN_ON_ONCE(!changed);
@@ -492,9 +529,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
  
         return;
  
-out_free_queues:
-       for (i = 1; i < ctrl->queue_count; i++)
-               nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+out_destroy_io:
+       nvme_loop_destroy_io_queues(ctrl);
+out_destroy_admin:
         nvme_loop_destroy_admin_queue(ctrl);
  out_disable:
         dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
@@ -533,25 +570,12 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
  
  static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
  {
-       struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
-       int ret, i;
+       int ret;
  
-       ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
-       if (ret || !opts->nr_io_queues)
+       ret = nvme_loop_init_io_queues(ctrl);
+       if (ret)
                 return ret;
  
-       dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
-               opts->nr_io_queues);
-
-       for (i = 1; i <= opts->nr_io_queues; i++) {
-               ctrl->queues[i].ctrl = ctrl;
-               ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
-               if (ret)
-                       goto out_destroy_queues;
-
-               ctrl->queue_count++;
-       }
-
         memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
         ctrl->tag_set.ops = &nvme_loop_mq_ops;
         ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
@@ -575,11 +599,9 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
                 goto out_free_tagset;
         }
  
-       for (i = 1; i <= opts->nr_io_queues; i++) {
-               ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
-               if (ret)
-                       goto out_cleanup_connect_q;
-       }
+       ret = nvme_loop_connect_io_queues(ctrl);
+       if (ret)
+               goto out_cleanup_connect_q;
  
         return 0;
  
@@ -588,8 +610,7 @@ out_cleanup_connect_q:
  out_free_tagset:
         blk_mq_free_tag_set(&ctrl->tag_set);
  out_destroy_queues:
-       for (i = 1; i < ctrl->queue_count; i++)
-               nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+       nvme_loop_destroy_io_queues(ctrl);
         return ret;
  }
  
@@ -724,7 +745,12 @@ static int __init nvme_loop_init_module(void)
         ret = nvmet_register_transport(&nvme_loop_ops);
         if (ret)
                 return ret;
-       return nvmf_register_transport(&nvme_loop_transport);
+
+       ret = nvmf_register_transport(&nvme_loop_transport);
+       if (ret)
+               nvmet_unregister_transport(&nvme_loop_ops);
+
+       return ret;
  }
  
  static void __exit nvme_loop_cleanup_module(void)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h

index 1370eee0a3c0f6295722d22e0c103a2f6cece47b..7cb77ba5993b98e0974a66a7712961c434677a7f 100644 (file)
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -73,6 +73,7 @@ struct nvmet_sq {
         u16                     qid;
         u16                     size;
         struct completion       free_done;
+       struct completion       confirm_done;
  };
  
  /**
@@ -252,11 +253,11 @@ struct nvmet_async_event {
         u8                      log_page;
  };
  
-int nvmet_parse_connect_cmd(struct nvmet_req *req);
-int nvmet_parse_io_cmd(struct nvmet_req *req);
-int nvmet_parse_admin_cmd(struct nvmet_req *req);
-int nvmet_parse_discovery_cmd(struct nvmet_req *req);
-int nvmet_parse_fabrics_cmd(struct nvmet_req *req);
+u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
+u16 nvmet_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
+u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
+u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
  
  bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
                 struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops);
@@ -277,6 +278,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
  u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
                 struct nvmet_req *req, struct nvmet_ctrl **ret);
  void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
+u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd);
  
  struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
                 enum nvme_subsys_type type);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c

index 9aa1da3778b3ac1d2262bfe9b845b65b9cd942d9..99c69018a35f4419b3dc3a14576c373c044e661a 100644 (file)
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -703,11 +703,6 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
  {
         u16 status;
  
-       cmd->queue = queue;
-       cmd->n_rdma = 0;
-       cmd->req.port = queue->port;
-
-
         ib_dma_sync_single_for_cpu(queue->dev->device,
                 cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length,
                 DMA_FROM_DEVICE);
@@ -760,9 +755,12 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
  
         cmd->queue = queue;
         rsp = nvmet_rdma_get_rsp(queue);
+       rsp->queue = queue;
         rsp->cmd = cmd;
         rsp->flags = 0;
         rsp->req.cmd = cmd->nvme_cmd;
+       rsp->req.port = queue->port;
+       rsp->n_rdma = 0;
  
         if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
                 unsigned long flags;
@@ -1201,6 +1199,11 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
         }
         queue->port = cm_id->context;
  
+       if (queue->host_qid == 0) {
+               /* Let inflight controller teardown complete */
+               flush_scheduled_work();
+       }
+
         ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
         if (ret)
                 goto release_queue;
@@ -1429,12 +1432,16 @@ restart:
  static int nvmet_rdma_add_port(struct nvmet_port *port)
  {
         struct rdma_cm_id *cm_id;
-       struct sockaddr_in addr_in;
-       u16 port_in;
+       struct sockaddr_storage addr = { };
+       __kernel_sa_family_t af;
         int ret;
  
         switch (port->disc_addr.adrfam) {
         case NVMF_ADDR_FAMILY_IP4:
+               af = AF_INET;
+               break;
+       case NVMF_ADDR_FAMILY_IP6:
+               af = AF_INET6;
                 break;
         default:
                 pr_err("address family %d not supported\n",
@@ -1442,13 +1449,13 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
                 return -EINVAL;
         }
  
-       ret = kstrtou16(port->disc_addr.trsvcid, 0, &port_in);
-       if (ret)
+       ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
+                       port->disc_addr.trsvcid, &addr);
+       if (ret) {
+               pr_err("malformed ip/port passed: %s:%s\n",
+                       port->disc_addr.traddr, port->disc_addr.trsvcid);
                 return ret;
-
-       addr_in.sin_family = AF_INET;
-       addr_in.sin_addr.s_addr = in_aton(port->disc_addr.traddr);
-       addr_in.sin_port = htons(port_in);
+       }
  
         cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port,
                         RDMA_PS_TCP, IB_QPT_RC);
@@ -1457,20 +1464,32 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
                 return PTR_ERR(cm_id);
         }
  
-       ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr_in);
+       /*
+        * Allow both IPv4 and IPv6 sockets to bind a single port
+        * at the same time.
+        */
+       ret = rdma_set_afonly(cm_id, 1);
+       if (ret) {
+               pr_err("rdma_set_afonly failed (%d)\n", ret);
+               goto out_destroy_id;
+       }
+
+       ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr);
         if (ret) {
-               pr_err("binding CM ID to %pISpc failed (%d)\n", &addr_in, ret);
+               pr_err("binding CM ID to %pISpcs failed (%d)\n",
+                       (struct sockaddr *)&addr, ret);
                 goto out_destroy_id;
         }
  
         ret = rdma_listen(cm_id, 128);
         if (ret) {
-               pr_err("listening to %pISpc failed (%d)\n", &addr_in, ret);
+               pr_err("listening to %pISpcs failed (%d)\n",
+                       (struct sockaddr *)&addr, ret);
                 goto out_destroy_id;
         }
  
-       pr_info("enabling port %d (%pISpc)\n",
-               le16_to_cpu(port->disc_addr.portid), &addr_in);
+       pr_info("enabling port %d (%pISpcs)\n",
+               le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr);
         port->priv = cm_id;
         return 0;
  
diff --git a/drivers/parport/share.c b/drivers/parport/share.c

index bc090daa850a4b8fdb8a29c8592582eda8aee668..5dc53d420ca8ca805c0c036c23e3c1a3fc42ac00 100644 (file)
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -939,8 +939,10 @@ parport_register_dev_model(struct parport *port, const char *name,
          * pardevice fields. -arca
          */
         port->ops->init_state(par_dev, par_dev->state);
-       port->proc_device = par_dev;
-       parport_device_proc_register(par_dev);
+       if (!test_and_set_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags)) {
+               port->proc_device = par_dev;
+               parport_device_proc_register(par_dev);
+       }
  
         return par_dev;
  
diff --git a/drivers/pci/dwc/Kconfig b/drivers/pci/dwc/Kconfig

index dfb8a69afc28f5b26a86bb024356c3ea03fdaf31..d2d2ba5b8a68e4eabcf9afe5c28f3fe1b29f3d5b 100644 (file)
--- a/drivers/pci/dwc/Kconfig
+++ b/drivers/pci/dwc/Kconfig
@@ -89,6 +89,7 @@ config PCI_HISI
         depends on PCI_MSI_IRQ_DOMAIN
         select PCIEPORTBUS
         select PCIE_DW_HOST
+       select PCI_HOST_COMMON
         help
           Say Y here if you want PCIe controller support on HiSilicon
           Hip05 and Hip06 SoCs
diff --git a/drivers/pci/dwc/pci-exynos.c b/drivers/pci/dwc/pci-exynos.c

index 993b650ef2759cbffc56c0bc086a8d2172ef4fcf..44f774c12fb25e7ab6f98df5edf8a09638971eca 100644 (file)
--- a/drivers/pci/dwc/pci-exynos.c
+++ b/drivers/pci/dwc/pci-exynos.c
@@ -132,10 +132,6 @@ static int exynos5440_pcie_get_mem_resources(struct platform_device *pdev,
         struct device *dev = pci->dev;
         struct resource *res;
  
-       /* If using the PHY framework, doesn't need to get other resource */
-       if (ep->using_phy)
-               return 0;
-
         ep->mem_res = devm_kzalloc(dev, sizeof(*ep->mem_res), GFP_KERNEL);
         if (!ep->mem_res)
                 return -ENOMEM;
@@ -145,6 +141,10 @@ static int exynos5440_pcie_get_mem_resources(struct platform_device *pdev,
         if (IS_ERR(ep->mem_res->elbi_base))
                 return PTR_ERR(ep->mem_res->elbi_base);
  
+       /* If using the PHY framework, doesn't need to get other resource */
+       if (ep->using_phy)
+               return 0;
+
         res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
         ep->mem_res->phy_base = devm_ioremap_resource(dev, res);
         if (IS_ERR(ep->mem_res->phy_base))
diff --git a/drivers/pci/dwc/pcie-artpec6.c b/drivers/pci/dwc/pcie-artpec6.c

index fcd3ef845883555648e0b2eeda907427d7811144..6d23683c0892f59646b4142242fa80bf0e85be98 100644 (file)
--- a/drivers/pci/dwc/pcie-artpec6.c
+++ b/drivers/pci/dwc/pcie-artpec6.c
@@ -234,6 +234,9 @@ static int artpec6_add_pcie_port(struct artpec6_pcie *artpec6_pcie,
         return 0;
  }
  
+static const struct dw_pcie_ops dw_pcie_ops = {
+};
+
  static int artpec6_pcie_probe(struct platform_device *pdev)
  {
         struct device *dev = &pdev->dev;
@@ -252,6 +255,7 @@ static int artpec6_pcie_probe(struct platform_device *pdev)
                 return -ENOMEM;
  
         pci->dev = dev;
+       pci->ops = &dw_pcie_ops;
  
         artpec6_pcie->pci = pci;
  
diff --git a/drivers/pci/dwc/pcie-designware-plat.c b/drivers/pci/dwc/pcie-designware-plat.c

index b6c832ba39dd6905a4640e770b69aff47e51b738..f20d494922ab890badd99dd96af9bcad866f2ca2 100644 (file)
--- a/drivers/pci/dwc/pcie-designware-plat.c
+++ b/drivers/pci/dwc/pcie-designware-plat.c
@@ -86,6 +86,9 @@ static int dw_plat_add_pcie_port(struct pcie_port *pp,
         return 0;
  }
  
+static const struct dw_pcie_ops dw_pcie_ops = {
+};
+
  static int dw_plat_pcie_probe(struct platform_device *pdev)
  {
         struct device *dev = &pdev->dev;
@@ -103,6 +106,7 @@ static int dw_plat_pcie_probe(struct platform_device *pdev)
                 return -ENOMEM;
  
         pci->dev = dev;
+       pci->ops = &dw_pcie_ops;
  
         dw_plat_pcie->pci = pci;
  
diff --git a/drivers/pci/dwc/pcie-hisi.c b/drivers/pci/dwc/pcie-hisi.c

index fd66a3199db77d41d08e0658d89611370d46446f..cf9d6a9d9fd4fc17afd19d0e659f5e7cff0a3ca8 100644 (file)
--- a/drivers/pci/dwc/pcie-hisi.c
+++ b/drivers/pci/dwc/pcie-hisi.c
@@ -380,9 +380,13 @@ struct pci_ecam_ops hisi_pcie_platform_ops = {
  
  static const struct of_device_id hisi_pcie_almost_ecam_of_match[] = {
         {
-               .compatible = "hisilicon,pcie-almost-ecam",
+               .compatible =  "hisilicon,hip06-pcie-ecam",
                 .data       = (void *) &hisi_pcie_platform_ops,
         },
+       {
+               .compatible =  "hisilicon,hip07-pcie-ecam",
+               .data       = (void *) &hisi_pcie_platform_ops,
+       },
         {},
  };
  
diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c

index 52b5bdccf5f0c2ab462cc695e332a8b3e3234970..6e031b522529daec0240022cf5221e92be3ea29b 100644 (file)
--- a/drivers/pci/host/pci-thunder-pem.c
+++ b/drivers/pci/host/pci-thunder-pem.c
@@ -14,6 +14,7 @@
   * Copyright (C) 2015 - 2016 Cavium, Inc.
   */
  
+#include <linux/bitfield.h>
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/of_address.h>
@@ -334,6 +335,49 @@ static int thunder_pem_init(struct device *dev, struct pci_config_window *cfg,
  
  #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
  
+#define PEM_RES_BASE           0x87e0c0000000UL
+#define PEM_NODE_MASK          GENMASK(45, 44)
+#define PEM_INDX_MASK          GENMASK(26, 24)
+#define PEM_MIN_DOM_IN_NODE    4
+#define PEM_MAX_DOM_IN_NODE    10
+
+static void thunder_pem_reserve_range(struct device *dev, int seg,
+                                     struct resource *r)
+{
+       resource_size_t start = r->start, end = r->end;
+       struct resource *res;
+       const char *regionid;
+
+       regionid = kasprintf(GFP_KERNEL, "PEM RC:%d", seg);
+       if (!regionid)
+               return;
+
+       res = request_mem_region(start, end - start + 1, regionid);
+       if (res)
+               res->flags &= ~IORESOURCE_BUSY;
+       else
+               kfree(regionid);
+
+       dev_info(dev, "%pR %s reserved\n", r,
+                res ? "has been" : "could not be");
+}
+
+static void thunder_pem_legacy_fw(struct acpi_pci_root *root,
+                                struct resource *res_pem)
+{
+       int node = acpi_get_node(root->device->handle);
+       int index;
+
+       if (node == NUMA_NO_NODE)
+               node = 0;
+
+       index = root->segment - PEM_MIN_DOM_IN_NODE;
+       index -= node * PEM_MAX_DOM_IN_NODE;
+       res_pem->start = PEM_RES_BASE | FIELD_PREP(PEM_NODE_MASK, node) |
+                                       FIELD_PREP(PEM_INDX_MASK, index);
+       res_pem->flags = IORESOURCE_MEM;
+}
+
  static int thunder_pem_acpi_init(struct pci_config_window *cfg)
  {
         struct device *dev = cfg->parent;
@@ -346,10 +390,24 @@ static int thunder_pem_acpi_init(struct pci_config_window *cfg)
         if (!res_pem)
                 return -ENOMEM;
  
-       ret = acpi_get_rc_resources(dev, "THRX0002", root->segment, res_pem);
+       ret = acpi_get_rc_resources(dev, "CAVA02B", root->segment, res_pem);
+
+       /*
+        * If we fail to gather resources it means that we run with old
+        * FW where we need to calculate PEM-specific resources manually.
+        */
         if (ret) {
-               dev_err(dev, "can't get rc base address\n");
-               return ret;
+               thunder_pem_legacy_fw(root, res_pem);
+               /*
+                * Reserve 64K size PEM specific resources. The full 16M range
+                * size is required for thunder_pem_init() call.
+                */
+               res_pem->end = res_pem->start + SZ_64K - 1;
+               thunder_pem_reserve_range(dev, root->segment, res_pem);
+               res_pem->end = res_pem->start + SZ_16M - 1;
+
+               /* Reserve PCI configuration space as well. */
+               thunder_pem_reserve_range(dev, root->segment, &cfg->res);
         }
  
         return thunder_pem_init(dev, cfg, res_pem);
diff --git a/drivers/pci/host/pcie-iproc-bcma.c b/drivers/pci/host/pcie-iproc-bcma.c

index bd4c9ec25edc22531ae450b2bb08f1d2aecd7b62..384c27e664fec8aa777246dce0bca499728ae42a 100644 (file)
--- a/drivers/pci/host/pcie-iproc-bcma.c
+++ b/drivers/pci/host/pcie-iproc-bcma.c
@@ -44,8 +44,7 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
  {
         struct device *dev = &bdev->dev;
         struct iproc_pcie *pcie;
-       LIST_HEAD(res);
-       struct resource res_mem;
+       LIST_HEAD(resources);
         int ret;
  
         pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
@@ -63,22 +62,23 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
  
         pcie->base_addr = bdev->addr;
  
-       res_mem.start = bdev->addr_s[0];
-       res_mem.end = bdev->addr_s[0] + SZ_128M - 1;
-       res_mem.name = "PCIe MEM space";
-       res_mem.flags = IORESOURCE_MEM;
-       pci_add_resource(&res, &res_mem);
+       pcie->mem.start = bdev->addr_s[0];
+       pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1;
+       pcie->mem.name = "PCIe MEM space";
+       pcie->mem.flags = IORESOURCE_MEM;
+       pci_add_resource(&resources, &pcie->mem);
  
         pcie->map_irq = iproc_pcie_bcma_map_irq;
  
-       ret = iproc_pcie_setup(pcie, &res);
-       if (ret)
+       ret = iproc_pcie_setup(pcie, &resources);
+       if (ret) {
                 dev_err(dev, "PCIe controller setup failed\n");
-
-       pci_free_resource_list(&res);
+               pci_free_resource_list(&resources);
+               return ret;
+       }
  
         bcma_set_drvdata(bdev, pcie);
-       return ret;
+       return 0;
  }
  
  static void iproc_pcie_bcma_remove(struct bcma_device *bdev)
diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c

index f4909bb0b2ad1505c3e084820f36104387add8f5..8c6a327ca6cdf883f32ea9a6fa0e7e1e28ab0e98 100644 (file)
--- a/drivers/pci/host/pcie-iproc-platform.c
+++ b/drivers/pci/host/pcie-iproc-platform.c
@@ -51,7 +51,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
         struct device_node *np = dev->of_node;
         struct resource reg;
         resource_size_t iobase = 0;
-       LIST_HEAD(res);
+       LIST_HEAD(resources);
         int ret;
  
         pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
@@ -96,10 +96,10 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
                 pcie->phy = NULL;
         }
  
-       ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase);
+       ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &resources,
+                                              &iobase);
         if (ret) {
-               dev_err(dev,
-                       "unable to get PCI host bridge resources\n");
+               dev_err(dev, "unable to get PCI host bridge resources\n");
                 return ret;
         }
  
@@ -112,14 +112,15 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
                 pcie->map_irq = of_irq_parse_and_map_pci;
         }
  
-       ret = iproc_pcie_setup(pcie, &res);
-       if (ret)
+       ret = iproc_pcie_setup(pcie, &resources);
+       if (ret) {
                 dev_err(dev, "PCIe controller setup failed\n");
-
-       pci_free_resource_list(&res);
+               pci_free_resource_list(&resources);
+               return ret;
+       }
  
         platform_set_drvdata(pdev, pcie);
-       return ret;
+       return 0;
  }
  
  static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h

index 04fed8e907f12b602b5fb1f11ff59971ebd0b87f..0bbe2ea44f3e1559dda22adc85ea70a8862684bd 100644 (file)
--- a/drivers/pci/host/pcie-iproc.h
+++ b/drivers/pci/host/pcie-iproc.h
@@ -90,6 +90,7 @@ struct iproc_pcie {
  #ifdef CONFIG_ARM
         struct pci_sys_data sysdata;
  #endif
+       struct resource mem;
         struct pci_bus *root_bus;
         struct phy *phy;
         int (*map_irq)(const struct pci_dev *, u8, u8);
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c

index 973472c23d89045000cf1119a09867c921f2fdf8..1dfa10cc566bebed005c2fe11a72c85a37036c32 100644 (file)
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -478,7 +478,7 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,
  
  static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
  {
-       struct pci_dev *child, *parent = link->pdev;
+       struct pci_dev *child = link->downstream, *parent = link->pdev;
         struct pci_bus *linkbus = parent->subordinate;
         struct aspm_register_info upreg, dwreg;
  
@@ -491,9 +491,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
  
         /* Get upstream/downstream components' register state */
         pcie_get_aspm_reg(parent, &upreg);
-       child = pci_function_0(linkbus);
         pcie_get_aspm_reg(child, &dwreg);
-       link->downstream = child;
  
         /*
          * If ASPM not supported, don't mess with the clocks and link,
@@ -800,6 +798,7 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev)
         INIT_LIST_HEAD(&link->children);
         INIT_LIST_HEAD(&link->link);
         link->pdev = pdev;
+       link->downstream = pci_function_0(pdev->subordinate);
  
         /*
          * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c

index f754453fe754e985361cb49cee0bddf54d752443..673683660b5c70567d7c49cd091c5c8ecf088655 100644 (file)
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2174,6 +2174,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, quirk_blacklist_vpd);
  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd);
  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID,
                 quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_QLOGIC, 0x2261, quirk_blacklist_vpd);
  
  /*
   * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig

index dc5277ad1b5a7a5a7b27a7329f0f6382587e6050..005cadb7a3f8e9076700bf24d280383a0d321c85 100644 (file)
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -449,6 +449,7 @@ config PHY_QCOM_UFS
  config PHY_QCOM_USB_HS
         tristate "Qualcomm USB HS PHY module"
         depends on USB_ULPI_BUS
+       depends on EXTCON || !EXTCON # if EXTCON=m, this cannot be built-in
         select GENERIC_PHY
         help
           Support for the USB high-speed ULPI compliant phy on Qualcomm
@@ -510,12 +511,4 @@ config PHY_MESON8B_USB2
           and GXBB SoCs.
           If unsure, say N.
  
-config PHY_NSP_USB3
-       tristate "Broadcom NorthStar plus USB3 PHY driver"
-       depends on OF && (ARCH_BCM_NSP || COMPILE_TEST)
-       select GENERIC_PHY
-       default ARCH_BCM_NSP
-       help
-         Enable this to support the Broadcom Northstar plus USB3 PHY.
-         If unsure, say N.
  endmenu
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile

index e7b0feb1e125a58c25fb96e784deae62bfb891e7..dd8f3b5d2918cd91bd48592b6771ca539321b086 100644 (file)
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -62,4 +62,3 @@ obj-$(CONFIG_PHY_CYGNUS_PCIE)         += phy-bcm-cygnus-pcie.o
  obj-$(CONFIG_ARCH_TEGRA) += tegra/
  obj-$(CONFIG_PHY_NS2_PCIE)             += phy-bcm-ns2-pcie.o
  obj-$(CONFIG_PHY_MESON8B_USB2)         += phy-meson8b-usb2.o
-obj-$(CONFIG_PHY_NSP_USB3)             += phy-bcm-nsp-usb3.o
diff --git a/drivers/phy/phy-bcm-nsp-usb3.c b/drivers/phy/phy-bcm-nsp-usb3.c

deleted file mode 100644 (file)

index 49024ea..0000000
--- a/drivers/phy/phy-bcm-nsp-usb3.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (C) 2016 Broadcom
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/mfd/syscon.h>
-#include <linux/mdio.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/phy/phy.h>
-#include <linux/regmap.h>
-
-#define NSP_USB3_RST_CTRL_OFFSET       0x3f8
-
-/* mdio reg access */
-#define NSP_USB3_PHY_BASE_ADDR_REG     0x1f
-
-#define NSP_USB3_PHY_PLL30_BLOCK       0x8000
-#define NSP_USB3_PLL_CONTROL           0x01
-#define NSP_USB3_PLLA_CONTROL0         0x0a
-#define NSP_USB3_PLLA_CONTROL1         0x0b
-
-#define NSP_USB3_PHY_TX_PMD_BLOCK      0x8040
-#define NSP_USB3_TX_PMD_CONTROL1       0x01
-
-#define NSP_USB3_PHY_PIPE_BLOCK                0x8060
-#define NSP_USB3_LFPS_CMP              0x02
-#define NSP_USB3_LFPS_DEGLITCH         0x03
-
-struct nsp_usb3_phy {
-       struct regmap *usb3_ctrl;
-       struct phy *phy;
-       struct mdio_device *mdiodev;
-};
-
-static int nsp_usb3_phy_init(struct phy *phy)
-{
-       struct nsp_usb3_phy *iphy = phy_get_drvdata(phy);
-       struct mii_bus *bus = iphy->mdiodev->bus;
-       int addr = iphy->mdiodev->addr;
-       u32 data;
-       int rc;
-
-       rc = regmap_read(iphy->usb3_ctrl, 0, &data);
-       if (rc)
-               return rc;
-       data |= 1;
-       rc = regmap_write(iphy->usb3_ctrl, 0, data);
-       if (rc)
-               return rc;
-
-       rc = regmap_write(iphy->usb3_ctrl, NSP_USB3_RST_CTRL_OFFSET, 1);
-       if (rc)
-               return rc;
-
-       rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG,
-                          NSP_USB3_PHY_PLL30_BLOCK);
-       if (rc)
-               return rc;
-
-       rc = mdiobus_write(bus, addr, NSP_USB3_PLL_CONTROL, 0x1000);
-       if (rc)
-               return rc;
-
-       rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL0, 0x6400);
-       if (rc)
-               return rc;
-
-       rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL1, 0xc000);
-       if (rc)
-               return rc;
-
-       rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL1, 0x8000);
-       if (rc)
-               return rc;
-
-       rc = regmap_write(iphy->usb3_ctrl, NSP_USB3_RST_CTRL_OFFSET, 0);
-       if (rc)
-               return rc;
-
-       rc = mdiobus_write(bus, addr, NSP_USB3_PLL_CONTROL, 0x9000);
-       if (rc)
-               return rc;
-
-       rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG,
-                          NSP_USB3_PHY_PIPE_BLOCK);
-       if (rc)
-               return rc;
-
-       rc = mdiobus_write(bus, addr, NSP_USB3_LFPS_CMP, 0xf30d);
-       if (rc)
-               return rc;
-
-       rc = mdiobus_write(bus, addr, NSP_USB3_LFPS_DEGLITCH, 0x6302);
-       if (rc)
-               return rc;
-
-       rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG,
-                          NSP_USB3_PHY_TX_PMD_BLOCK);
-       if (rc)
-               return rc;
-
-       rc = mdiobus_write(bus, addr, NSP_USB3_TX_PMD_CONTROL1, 0x1003);
-
-       return rc;
-}
-
-static struct phy_ops nsp_usb3_phy_ops = {
-       .init   = nsp_usb3_phy_init,
-       .owner  = THIS_MODULE,
-};
-
-static int nsp_usb3_phy_probe(struct mdio_device *mdiodev)
-{
-       struct device *dev = &mdiodev->dev;
-       struct phy_provider *provider;
-       struct nsp_usb3_phy *iphy;
-
-       iphy = devm_kzalloc(dev, sizeof(*iphy), GFP_KERNEL);
-       if (!iphy)
-               return -ENOMEM;
-       iphy->mdiodev = mdiodev;
-
-       iphy->usb3_ctrl = syscon_regmap_lookup_by_phandle(dev->of_node,
-                                                "usb3-ctrl-syscon");
-       if (IS_ERR(iphy->usb3_ctrl))
-               return PTR_ERR(iphy->usb3_ctrl);
-
-       iphy->phy = devm_phy_create(dev, dev->of_node, &nsp_usb3_phy_ops);
-       if (IS_ERR(iphy->phy)) {
-               dev_err(dev, "failed to create PHY\n");
-               return PTR_ERR(iphy->phy);
-       }
-
-       phy_set_drvdata(iphy->phy, iphy);
-
-       provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
-       if (IS_ERR(provider)) {
-               dev_err(dev, "could not register PHY provider\n");
-               return PTR_ERR(provider);
-       }
-
-       return 0;
-}
-
-static const struct of_device_id nsp_usb3_phy_of_match[] = {
-       {.compatible = "brcm,nsp-usb3-phy",},
-       { /* sentinel */ }
-};
-
-static struct mdio_driver nsp_usb3_phy_driver = {
-       .mdiodrv = {
-               .driver = {
-                       .name = "nsp-usb3-phy",
-                       .of_match_table = nsp_usb3_phy_of_match,
-               },
-       },
-       .probe = nsp_usb3_phy_probe,
-};
-
-mdio_module_driver(nsp_usb3_phy_driver);
-
-MODULE_DESCRIPTION("Broadcom NSP USB3 PHY driver");
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Yendapally Reddy Dhananjaya Reddy <yendapally.reddy@broadcom.com");
diff --git a/drivers/phy/phy-exynos-pcie.c b/drivers/phy/phy-exynos-pcie.c

index 4f60b83641d5952d55edd37137283114ea9ba836..60baf25d98e25eb3e716979a54223dd3d03e198d 100644 (file)
--- a/drivers/phy/phy-exynos-pcie.c
+++ b/drivers/phy/phy-exynos-pcie.c
@@ -254,8 +254,8 @@ static int exynos_pcie_phy_probe(struct platform_device *pdev)
  
         res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
         exynos_phy->blk_base = devm_ioremap_resource(dev, res);
-       if (IS_ERR(exynos_phy->phy_base))
-               return PTR_ERR(exynos_phy->phy_base);
+       if (IS_ERR(exynos_phy->blk_base))
+               return PTR_ERR(exynos_phy->blk_base);
  
         exynos_phy->drv_data = drv_data;
  
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c

index d69046537b75f31d0b1c881686677967865001e0..32822b0d9cd0f03f76eb5b96307c8b6f0ea1558a 100644 (file)
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -2010,29 +2010,57 @@ out_err:
         return ERR_PTR(ret);
  }
  
-static int pinctrl_create_and_start(struct pinctrl_dev *pctldev)
+static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev)
  {
         pctldev->p = create_pinctrl(pctldev->dev, pctldev);
-       if (!IS_ERR(pctldev->p)) {
-               kref_get(&pctldev->p->users);
-               pctldev->hog_default =
-                       pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT);
-               if (IS_ERR(pctldev->hog_default)) {
-                       dev_dbg(pctldev->dev,
-                               "failed to lookup the default state\n");
-               } else {
-                       if (pinctrl_select_state(pctldev->p,
-                                               pctldev->hog_default))
-                               dev_err(pctldev->dev,
-                                       "failed to select default state\n");
-               }
+       if (PTR_ERR(pctldev->p) == -ENODEV) {
+               dev_dbg(pctldev->dev, "no hogs found\n");
  
-               pctldev->hog_sleep =
-                       pinctrl_lookup_state(pctldev->p,
-                                                   PINCTRL_STATE_SLEEP);
-               if (IS_ERR(pctldev->hog_sleep))
-                       dev_dbg(pctldev->dev,
-                               "failed to lookup the sleep state\n");
+               return 0;
+       }
+
+       if (IS_ERR(pctldev->p)) {
+               dev_err(pctldev->dev, "error claiming hogs: %li\n",
+                       PTR_ERR(pctldev->p));
+
+               return PTR_ERR(pctldev->p);
+       }
+
+       kref_get(&pctldev->p->users);
+       pctldev->hog_default =
+               pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT);
+       if (IS_ERR(pctldev->hog_default)) {
+               dev_dbg(pctldev->dev,
+                       "failed to lookup the default state\n");
+       } else {
+               if (pinctrl_select_state(pctldev->p,
+                                        pctldev->hog_default))
+                       dev_err(pctldev->dev,
+                               "failed to select default state\n");
+       }
+
+       pctldev->hog_sleep =
+               pinctrl_lookup_state(pctldev->p,
+                                    PINCTRL_STATE_SLEEP);
+       if (IS_ERR(pctldev->hog_sleep))
+               dev_dbg(pctldev->dev,
+                       "failed to lookup the sleep state\n");
+
+       return 0;
+}
+
+int pinctrl_enable(struct pinctrl_dev *pctldev)
+{
+       int error;
+
+       error = pinctrl_claim_hogs(pctldev);
+       if (error) {
+               dev_err(pctldev->dev, "could not claim hogs: %i\n",
+                       error);
+               mutex_destroy(&pctldev->mutex);
+               kfree(pctldev);
+
+               return error;
         }
  
         mutex_lock(&pinctrldev_list_mutex);
@@ -2043,6 +2071,7 @@ static int pinctrl_create_and_start(struct pinctrl_dev *pctldev)
  
         return 0;
  }
+EXPORT_SYMBOL_GPL(pinctrl_enable);
  
  /**
   * pinctrl_register() - register a pin controller device
@@ -2065,25 +2094,30 @@ struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc,
         if (IS_ERR(pctldev))
                 return pctldev;
  
-       error = pinctrl_create_and_start(pctldev);
-       if (error) {
-               mutex_destroy(&pctldev->mutex);
-               kfree(pctldev);
-
+       error = pinctrl_enable(pctldev);
+       if (error)
                 return ERR_PTR(error);
-       }
  
         return pctldev;
  
  }
  EXPORT_SYMBOL_GPL(pinctrl_register);
  
+/**
+ * pinctrl_register_and_init() - register and init pin controller device
+ * @pctldesc: descriptor for this pin controller
+ * @dev: parent device for this pin controller
+ * @driver_data: private pin controller data for this pin controller
+ * @pctldev: pin controller device
+ *
+ * Note that pinctrl_enable() still needs to be manually called after
+ * this once the driver is ready.
+ */
  int pinctrl_register_and_init(struct pinctrl_desc *pctldesc,
                               struct device *dev, void *driver_data,
                               struct pinctrl_dev **pctldev)
  {
         struct pinctrl_dev *p;
-       int error;
  
         p = pinctrl_init_controller(pctldesc, dev, driver_data);
         if (IS_ERR(p))
@@ -2097,15 +2131,6 @@ int pinctrl_register_and_init(struct pinctrl_desc *pctldesc,
          */
         *pctldev = p;
  
-       error = pinctrl_create_and_start(p);
-       if (error) {
-               mutex_destroy(&p->mutex);
-               kfree(p);
-               *pctldev = NULL;
-
-               return error;
-       }
-
         return 0;
  }
  EXPORT_SYMBOL_GPL(pinctrl_register_and_init);
diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c

index a7ace9e1ad81f24f571d34ca155ba5d03c5f9a36..74bd90dfd7b1650acde5dc19f369b2cfc897362b 100644 (file)
--- a/drivers/pinctrl/freescale/pinctrl-imx.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx.c
@@ -790,7 +790,7 @@ int imx_pinctrl_probe(struct platform_device *pdev,
  
         dev_info(&pdev->dev, "initialized IMX pinctrl driver\n");
  
-       return 0;
+       return pinctrl_enable(ipctl->pctl);
  
  free:
         imx_free_resources(ipctl);
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c

index f80134e3e0b68aba9f8b94771702cdd3df83a678..9ff790174906e46962ec9b9fa00f4f04de14aa18 100644 (file)
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -13,6 +13,7 @@
   * published by the Free Software Foundation.
   */
  
+#include <linux/dmi.h>
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/init.h>
@@ -1524,10 +1525,31 @@ static void chv_gpio_irq_handler(struct irq_desc *desc)
         chained_irq_exit(chip, desc);
  }
  
+/*
+ * Certain machines seem to hardcode Linux IRQ numbers in their ACPI
+ * tables. Since we leave GPIOs that are not capable of generating
+ * interrupts out of the irqdomain the numbering will be different and
+ * cause devices using the hardcoded IRQ numbers fail. In order not to
+ * break such machines we will only mask pins from irqdomain if the machine
+ * is not listed below.
+ */
+static const struct dmi_system_id chv_no_valid_mask[] = {
+       {
+               /* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */
+               .ident = "Acer Chromebook (CYAN)",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"),
+                       DMI_MATCH(DMI_BIOS_DATE, "05/21/2016"),
+               },
+       }
+};
+
  static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
  {
         const struct chv_gpio_pinrange *range;
         struct gpio_chip *chip = &pctrl->chip;
+       bool need_valid_mask = !dmi_check_system(chv_no_valid_mask);
         int ret, i, offset;
  
         *chip = chv_gpio_chip;
@@ -1536,7 +1558,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
         chip->label = dev_name(pctrl->dev);
         chip->parent = pctrl->dev;
         chip->base = -1;
-       chip->irq_need_valid_mask = true;
+       chip->irq_need_valid_mask = need_valid_mask;
  
         ret = devm_gpiochip_add_data(pctrl->dev, chip, pctrl);
         if (ret) {
@@ -1567,7 +1589,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
                 intsel &= CHV_PADCTRL0_INTSEL_MASK;
                 intsel >>= CHV_PADCTRL0_INTSEL_SHIFT;
  
-               if (intsel >= pctrl->community->nirqs)
+               if (need_valid_mask && intsel >= pctrl->community->nirqs)
                         clear_bit(i, chip->irq_valid_mask);
         }
  
diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c

index 7671424d46cbe0a5628caee0615cde2d79b8d478..31a3a98d067caa4440a25e901eba69bdf3b0e862 100644 (file)
--- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
+++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
@@ -667,11 +667,11 @@ static const char * const uart_ao_b_groups[] = {
  };
  
  static const char * const i2c_ao_groups[] = {
-       "i2c_sdk_ao", "i2c_sda_ao",
+       "i2c_sck_ao", "i2c_sda_ao",
  };
  
  static const char * const i2c_slave_ao_groups[] = {
-       "i2c_slave_sdk_ao", "i2c_slave_sda_ao",
+       "i2c_slave_sck_ao", "i2c_slave_sda_ao",
  };
  
  static const char * const remote_input_ao_groups[] = {
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c

index 8b2d45e85baea612451812fb7cc82b88b75d8d9c..9c267dcda094a224888c69ed5aae8e143fc2a63a 100644 (file)
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1781,7 +1781,7 @@ static int pcs_probe(struct platform_device *pdev)
         dev_info(pcs->dev, "%i pins at pa %p size %u\n",
                  pcs->desc.npins, pcs->base, pcs->size);
  
-       return 0;
+       return pinctrl_enable(pcs->pctl);
  
  free:
         pcs_free_resources(pcs);
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c

index 676efcc032d26178718c601116a1a387622760c5..3ae8066bc1279c1c23dfc7144215fb48879efae8 100644 (file)
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -1285,6 +1285,22 @@ static void st_gpio_irq_unmask(struct irq_data *d)
         writel(BIT(d->hwirq), bank->base + REG_PIO_SET_PMASK);
  }
  
+static int st_gpio_irq_request_resources(struct irq_data *d)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+
+       st_gpio_direction_input(gc, d->hwirq);
+
+       return gpiochip_lock_as_irq(gc, d->hwirq);
+}
+
+static void st_gpio_irq_release_resources(struct irq_data *d)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+
+       gpiochip_unlock_as_irq(gc, d->hwirq);
+}
+
  static int st_gpio_irq_set_type(struct irq_data *d, unsigned type)
  {
         struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -1438,12 +1454,14 @@ static struct gpio_chip st_gpio_template = {
  };
  
  static struct irq_chip st_gpio_irqchip = {
-       .name           = "GPIO",
-       .irq_disable    = st_gpio_irq_mask,
-       .irq_mask       = st_gpio_irq_mask,
-       .irq_unmask     = st_gpio_irq_unmask,
-       .irq_set_type   = st_gpio_irq_set_type,
-       .flags          = IRQCHIP_SKIP_SET_WAKE,
+       .name                   = "GPIO",
+       .irq_request_resources  = st_gpio_irq_request_resources,
+       .irq_release_resources  = st_gpio_irq_release_resources,
+       .irq_disable            = st_gpio_irq_mask,
+       .irq_mask               = st_gpio_irq_mask,
+       .irq_unmask             = st_gpio_irq_unmask,
+       .irq_set_type           = st_gpio_irq_set_type,
+       .flags                  = IRQCHIP_SKIP_SET_WAKE,
  };
  
  static int st_gpiolib_register_bank(struct st_pinctrl *info,
diff --git a/drivers/pinctrl/qcom/pinctrl-ipq4019.c b/drivers/pinctrl/qcom/pinctrl-ipq4019.c

index b68ae424cee247d51541d4ceb36aaa8c8c9132f6..743d1f458205fac1a5c26c683fdc38f12e401095 100644 (file)
--- a/drivers/pinctrl/qcom/pinctrl-ipq4019.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq4019.c
@@ -405,6 +405,36 @@ static const struct msm_pingroup ipq4019_groups[] = {
         PINGROUP(67, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
         PINGROUP(68, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
         PINGROUP(69, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(70, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(71, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(72, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(73, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(74, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(75, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(76, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(77, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(78, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(79, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(80, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(81, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(82, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(83, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(84, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(85, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(86, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(87, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(88, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(89, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(90, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(91, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(92, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(93, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(94, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(95, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(96, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(97, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(98, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+       PINGROUP(99, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
  };
  
  static const struct msm_pinctrl_soc_data ipq4019_pinctrl = {
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c

index f8e9e1c2b2f6f45078aa2fe9a3356b450d60fd58..273badd925611aa86e19e3a4aebc6691cf812fcb 100644 (file)
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -422,6 +422,20 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in
         return 0;
  }
  
+static int msm_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
+{
+       struct msm_pinctrl *pctrl = gpiochip_get_data(chip);
+       const struct msm_pingroup *g;
+       u32 val;
+
+       g = &pctrl->soc->groups[offset];
+
+       val = readl(pctrl->regs + g->ctl_reg);
+
+       /* 0 = output, 1 = input */
+       return val & BIT(g->oe_bit) ? 0 : 1;
+}
+
  static int msm_gpio_get(struct gpio_chip *chip, unsigned offset)
  {
         const struct msm_pingroup *g;
@@ -510,6 +524,7 @@ static void msm_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
  static struct gpio_chip msm_gpio_template = {
         .direction_input  = msm_gpio_direction_input,
         .direction_output = msm_gpio_direction_output,
+       .get_direction    = msm_gpio_get_direction,
         .get              = msm_gpio_get,
         .set              = msm_gpio_set,
         .request          = gpiochip_generic_request,
@@ -594,10 +609,6 @@ static void msm_gpio_irq_unmask(struct irq_data *d)
  
         raw_spin_lock_irqsave(&pctrl->lock, flags);
  
-       val = readl(pctrl->regs + g->intr_status_reg);
-       val &= ~BIT(g->intr_status_bit);
-       writel(val, pctrl->regs + g->intr_status_reg);
-
         val = readl(pctrl->regs + g->intr_cfg_reg);
         val |= BIT(g->intr_enable_bit);
         writel(val, pctrl->regs + g->intr_cfg_reg);
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c

index f9b49967f512b52cec5447ae0baee9e146745d1b..63e51b56a22a94c528489a8d18aaf38795f0072e 100644 (file)
--- a/drivers/pinctrl/samsung/pinctrl-exynos.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.c
@@ -1468,82 +1468,82 @@ const struct samsung_pin_ctrl exynos5420_pin_ctrl[] __initconst = {
  
  /* pin banks of exynos5433 pin-controller - ALIVE */
  static const struct samsung_pin_bank_data exynos5433_pin_banks0[] __initconst = {
-       EXYNOS_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00),
-       EXYNOS_PIN_BANK_EINTW(8, 0x020, "gpa1", 0x04),
-       EXYNOS_PIN_BANK_EINTW(8, 0x040, "gpa2", 0x08),
-       EXYNOS_PIN_BANK_EINTW(8, 0x060, "gpa3", 0x0c),
-       EXYNOS_PIN_BANK_EINTW_EXT(8, 0x020, "gpf1", 0x1004, 1),
-       EXYNOS_PIN_BANK_EINTW_EXT(4, 0x040, "gpf2", 0x1008, 1),
-       EXYNOS_PIN_BANK_EINTW_EXT(4, 0x060, "gpf3", 0x100c, 1),
-       EXYNOS_PIN_BANK_EINTW_EXT(8, 0x080, "gpf4", 0x1010, 1),
-       EXYNOS_PIN_BANK_EINTW_EXT(8, 0x0a0, "gpf5", 0x1014, 1),
+       EXYNOS5433_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00),
+       EXYNOS5433_PIN_BANK_EINTW(8, 0x020, "gpa1", 0x04),
+       EXYNOS5433_PIN_BANK_EINTW(8, 0x040, "gpa2", 0x08),
+       EXYNOS5433_PIN_BANK_EINTW(8, 0x060, "gpa3", 0x0c),
+       EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x020, "gpf1", 0x1004, 1),
+       EXYNOS5433_PIN_BANK_EINTW_EXT(4, 0x040, "gpf2", 0x1008, 1),
+       EXYNOS5433_PIN_BANK_EINTW_EXT(4, 0x060, "gpf3", 0x100c, 1),
+       EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x080, "gpf4", 0x1010, 1),
+       EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x0a0, "gpf5", 0x1014, 1),
  };
  
  /* pin banks of exynos5433 pin-controller - AUD */
  static const struct samsung_pin_bank_data exynos5433_pin_banks1[] __initconst = {
-       EXYNOS_PIN_BANK_EINTG(7, 0x000, "gpz0", 0x00),
-       EXYNOS_PIN_BANK_EINTG(4, 0x020, "gpz1", 0x04),
+       EXYNOS5433_PIN_BANK_EINTG(7, 0x000, "gpz0", 0x00),
+       EXYNOS5433_PIN_BANK_EINTG(4, 0x020, "gpz1", 0x04),
  };
  
  /* pin banks of exynos5433 pin-controller - CPIF */
  static const struct samsung_pin_bank_data exynos5433_pin_banks2[] __initconst = {
-       EXYNOS_PIN_BANK_EINTG(2, 0x000, "gpv6", 0x00),
+       EXYNOS5433_PIN_BANK_EINTG(2, 0x000, "gpv6", 0x00),
  };
  
  /* pin banks of exynos5433 pin-controller - eSE */
  static const struct samsung_pin_bank_data exynos5433_pin_banks3[] __initconst = {
-       EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj2", 0x00),
+       EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj2", 0x00),
  };
  
  /* pin banks of exynos5433 pin-controller - FINGER */
  static const struct samsung_pin_bank_data exynos5433_pin_banks4[] __initconst = {
-       EXYNOS_PIN_BANK_EINTG(4, 0x000, "gpd5", 0x00),
+       EXYNOS5433_PIN_BANK_EINTG(4, 0x000, "gpd5", 0x00),
  };
  
  /* pin banks of exynos5433 pin-controller - FSYS */
  static const struct samsung_pin_bank_data exynos5433_pin_banks5[] __initconst = {
-       EXYNOS_PIN_BANK_EINTG(6, 0x000, "gph1", 0x00),
-       EXYNOS_PIN_BANK_EINTG(7, 0x020, "gpr4", 0x04),
-       EXYNOS_PIN_BANK_EINTG(5, 0x040, "gpr0", 0x08),
-       EXYNOS_PIN_BANK_EINTG(8, 0x060, "gpr1", 0x0c),
-       EXYNOS_PIN_BANK_EINTG(2, 0x080, "gpr2", 0x10),
-       EXYNOS_PIN_BANK_EINTG(8, 0x0a0, "gpr3", 0x14),
+       EXYNOS5433_PIN_BANK_EINTG(6, 0x000, "gph1", 0x00),
+       EXYNOS5433_PIN_BANK_EINTG(7, 0x020, "gpr4", 0x04),
+       EXYNOS5433_PIN_BANK_EINTG(5, 0x040, "gpr0", 0x08),
+       EXYNOS5433_PIN_BANK_EINTG(8, 0x060, "gpr1", 0x0c),
+       EXYNOS5433_PIN_BANK_EINTG(2, 0x080, "gpr2", 0x10),
+       EXYNOS5433_PIN_BANK_EINTG(8, 0x0a0, "gpr3", 0x14),
  };
  
  /* pin banks of exynos5433 pin-controller - IMEM */
  static const struct samsung_pin_bank_data exynos5433_pin_banks6[] __initconst = {
-       EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpf0", 0x00),
+       EXYNOS5433_PIN_BANK_EINTG(8, 0x000, "gpf0", 0x00),
  };
  
  /* pin banks of exynos5433 pin-controller - NFC */
  static const struct samsung_pin_bank_data exynos5433_pin_banks7[] __initconst = {
-       EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj0", 0x00),
+       EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj0", 0x00),
  };
  
  /* pin banks of exynos5433 pin-controller - PERIC */
  static const struct samsung_pin_bank_data exynos5433_pin_banks8[] __initconst = {
-       EXYNOS_PIN_BANK_EINTG(6, 0x000, "gpv7", 0x00),
-       EXYNOS_PIN_BANK_EINTG(5, 0x020, "gpb0", 0x04),
-       EXYNOS_PIN_BANK_EINTG(8, 0x040, "gpc0", 0x08),
-       EXYNOS_PIN_BANK_EINTG(2, 0x060, "gpc1", 0x0c),
-       EXYNOS_PIN_BANK_EINTG(6, 0x080, "gpc2", 0x10),
-       EXYNOS_PIN_BANK_EINTG(8, 0x0a0, "gpc3", 0x14),
-       EXYNOS_PIN_BANK_EINTG(2, 0x0c0, "gpg0", 0x18),
-       EXYNOS_PIN_BANK_EINTG(4, 0x0e0, "gpd0", 0x1c),
-       EXYNOS_PIN_BANK_EINTG(6, 0x100, "gpd1", 0x20),
-       EXYNOS_PIN_BANK_EINTG(8, 0x120, "gpd2", 0x24),
-       EXYNOS_PIN_BANK_EINTG(5, 0x140, "gpd4", 0x28),
-       EXYNOS_PIN_BANK_EINTG(2, 0x160, "gpd8", 0x2c),
-       EXYNOS_PIN_BANK_EINTG(7, 0x180, "gpd6", 0x30),
-       EXYNOS_PIN_BANK_EINTG(3, 0x1a0, "gpd7", 0x34),
-       EXYNOS_PIN_BANK_EINTG(5, 0x1c0, "gpg1", 0x38),
-       EXYNOS_PIN_BANK_EINTG(2, 0x1e0, "gpg2", 0x3c),
-       EXYNOS_PIN_BANK_EINTG(8, 0x200, "gpg3", 0x40),
+       EXYNOS5433_PIN_BANK_EINTG(6, 0x000, "gpv7", 0x00),
+       EXYNOS5433_PIN_BANK_EINTG(5, 0x020, "gpb0", 0x04),
+       EXYNOS5433_PIN_BANK_EINTG(8, 0x040, "gpc0", 0x08),
+       EXYNOS5433_PIN_BANK_EINTG(2, 0x060, "gpc1", 0x0c),
+       EXYNOS5433_PIN_BANK_EINTG(6, 0x080, "gpc2", 0x10),
+       EXYNOS5433_PIN_BANK_EINTG(8, 0x0a0, "gpc3", 0x14),
+       EXYNOS5433_PIN_BANK_EINTG(2, 0x0c0, "gpg0", 0x18),
+       EXYNOS5433_PIN_BANK_EINTG(4, 0x0e0, "gpd0", 0x1c),
+       EXYNOS5433_PIN_BANK_EINTG(6, 0x100, "gpd1", 0x20),
+       EXYNOS5433_PIN_BANK_EINTG(8, 0x120, "gpd2", 0x24),
+       EXYNOS5433_PIN_BANK_EINTG(5, 0x140, "gpd4", 0x28),
+       EXYNOS5433_PIN_BANK_EINTG(2, 0x160, "gpd8", 0x2c),
+       EXYNOS5433_PIN_BANK_EINTG(7, 0x180, "gpd6", 0x30),
+       EXYNOS5433_PIN_BANK_EINTG(3, 0x1a0, "gpd7", 0x34),
+       EXYNOS5433_PIN_BANK_EINTG(5, 0x1c0, "gpg1", 0x38),
+       EXYNOS5433_PIN_BANK_EINTG(2, 0x1e0, "gpg2", 0x3c),
+       EXYNOS5433_PIN_BANK_EINTG(8, 0x200, "gpg3", 0x40),
  };
  
  /* pin banks of exynos5433 pin-controller - TOUCH */
  static const struct samsung_pin_bank_data exynos5433_pin_banks9[] __initconst = {
-       EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj1", 0x00),
+       EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj1", 0x00),
  };
  
  /*
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.h b/drivers/pinctrl/samsung/pinctrl-exynos.h

index a473092fb8d2362f11a1a48beb7f545b7c9f25ce..cd046eb7d705682fae1293abf432b9d04465328f 100644 (file)
--- a/drivers/pinctrl/samsung/pinctrl-exynos.h
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.h
@@ -79,17 +79,6 @@
                 .name           = id                    \
         }
  
-#define EXYNOS_PIN_BANK_EINTW_EXT(pins, reg, id, offs, pctl_idx) \
-       {                                               \
-               .type           = &bank_type_alive,     \
-               .pctl_offset    = reg,                  \
-               .nr_pins        = pins,                 \
-               .eint_type      = EINT_TYPE_WKUP,       \
-               .eint_offset    = offs,                 \
-               .name           = id,                   \
-               .pctl_res_idx   = pctl_idx,             \
-       }                                               \
-
  #define EXYNOS5433_PIN_BANK_EINTG(pins, reg, id, offs)         \
         {                                                       \
                 .type           = &exynos5433_bank_type_off,    \
diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c

index f9ddba7decc18563916d5adc118958fad46f0073..d7aa22cff480ed63d73c9e1e6f8fbf22d0eda290 100644 (file)
--- a/drivers/pinctrl/samsung/pinctrl-samsung.c
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.c
@@ -988,9 +988,16 @@ samsung_pinctrl_get_soc_data(struct samsung_pinctrl_drv_data *d,
  
         for (i = 0; i < ctrl->nr_ext_resources + 1; i++) {
                 res = platform_get_resource(pdev, IORESOURCE_MEM, i);
-               virt_base[i] = devm_ioremap_resource(&pdev->dev, res);
-               if (IS_ERR(virt_base[i]))
-                       return ERR_CAST(virt_base[i]);
+               if (!res) {
+                       dev_err(&pdev->dev, "failed to get mem%d resource\n", i);
+                       return ERR_PTR(-EINVAL);
+               }
+               virt_base[i] = devm_ioremap(&pdev->dev, res->start,
+                                               resource_size(res));
+               if (!virt_base[i]) {
+                       dev_err(&pdev->dev, "failed to ioremap %pR\n", res);
+                       return ERR_PTR(-EIO);
+               }
         }
  
         bank = d->pin_banks;
diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c

index 08150a321be6f1e2df11fce59745430b1ead1213..a70157f0acf4ecf7b55fb89a8bb346b537bb1903 100644 (file)
--- a/drivers/pinctrl/sh-pfc/pinctrl.c
+++ b/drivers/pinctrl/sh-pfc/pinctrl.c
@@ -816,6 +816,13 @@ int sh_pfc_register_pinctrl(struct sh_pfc *pfc)
         pmx->pctl_desc.pins = pmx->pins;
         pmx->pctl_desc.npins = pfc->info->nr_pins;
  
-       return devm_pinctrl_register_and_init(pfc->dev, &pmx->pctl_desc, pmx,
-                                             &pmx->pctl);
+       ret = devm_pinctrl_register_and_init(pfc->dev, &pmx->pctl_desc, pmx,
+                                            &pmx->pctl);
+       if (ret) {
+               dev_err(pfc->dev, "could not register: %i\n", ret);
+
+               return ret;
+       }
+
+       return pinctrl_enable(pmx->pctl);
  }
diff --git a/drivers/pinctrl/ti/Kconfig b/drivers/pinctrl/ti/Kconfig

index 815a88673d38193b8c1313265d46b536738b3586..542077069391b9b262192a502809708476072e7e 100644 (file)
--- a/drivers/pinctrl/ti/Kconfig
+++ b/drivers/pinctrl/ti/Kconfig
@@ -1,6 +1,6 @@
  config PINCTRL_TI_IODELAY
         tristate "TI IODelay Module pinconf driver"
-       depends on OF
+       depends on OF && (SOC_DRA7XX || COMPILE_TEST)
         select GENERIC_PINCTRL_GROUPS
         select GENERIC_PINMUX_FUNCTIONS
         select GENERIC_PINCONF
diff --git a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c

index 717e3404900ca414325086268e55d7ee9f5eb42f..362c50918c13a6252e7c363c2474a26a437605c8 100644 (file)
--- a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c
+++ b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c
@@ -893,6 +893,8 @@ static int ti_iodelay_probe(struct platform_device *pdev)
  
         platform_set_drvdata(pdev, iod);
  
+       return pinctrl_enable(iod->pctl);
+
  exit_out:
         of_node_put(np);
         return ret;
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c

index 77a0236ee781dd06949ed288005bb45b99396287..83f8864fa76ac5a26a1947902f691877f0510c56 100644 (file)
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
@@ -390,22 +390,22 @@ static const struct pinctrl_pin_desc uniphier_ld11_pins[] = {
         UNIPHIER_PINCTRL_PIN(140, "AO1D0", 140,
                              140, UNIPHIER_PIN_DRV_1BIT,
                              140, UNIPHIER_PIN_PULL_DOWN),
-       UNIPHIER_PINCTRL_PIN(141, "TCON0", 141,
+       UNIPHIER_PINCTRL_PIN(141, "AO1D1", 141,
                              141, UNIPHIER_PIN_DRV_1BIT,
                              141, UNIPHIER_PIN_PULL_DOWN),
-       UNIPHIER_PINCTRL_PIN(142, "TCON1", 142,
+       UNIPHIER_PINCTRL_PIN(142, "AO1D2", 142,
                              142, UNIPHIER_PIN_DRV_1BIT,
                              142, UNIPHIER_PIN_PULL_DOWN),
-       UNIPHIER_PINCTRL_PIN(143, "TCON2", 143,
+       UNIPHIER_PINCTRL_PIN(143, "XIRQ9", 143,
                              143, UNIPHIER_PIN_DRV_1BIT,
                              143, UNIPHIER_PIN_PULL_DOWN),
-       UNIPHIER_PINCTRL_PIN(144, "TCON3", 144,
+       UNIPHIER_PINCTRL_PIN(144, "XIRQ10", 144,
                              144, UNIPHIER_PIN_DRV_1BIT,
                              144, UNIPHIER_PIN_PULL_DOWN),
-       UNIPHIER_PINCTRL_PIN(145, "TCON4", 145,
+       UNIPHIER_PINCTRL_PIN(145, "XIRQ11", 145,
                              145, UNIPHIER_PIN_DRV_1BIT,
                              145, UNIPHIER_PIN_PULL_DOWN),
-       UNIPHIER_PINCTRL_PIN(146, "TCON5", 146,
+       UNIPHIER_PINCTRL_PIN(146, "XIRQ13", 146,
                              146, UNIPHIER_PIN_DRV_1BIT,
                              146, UNIPHIER_PIN_PULL_DOWN),
         UNIPHIER_PINCTRL_PIN(147, "PWMA", 147,
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c

index 5be4783e40d4c9e9ca547d89d5faa6c1437f927a..dea98ffb6f606a6079f40607c8f5476c7a738f7d 100644 (file)
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -103,15 +103,6 @@ static struct quirk_entry quirk_asus_x200ca = {
         .wapf = 2,
  };
  
-static struct quirk_entry quirk_no_rfkill = {
-       .no_rfkill = true,
-};
-
-static struct quirk_entry quirk_no_rfkill_wapf4 = {
-       .wapf = 4,
-       .no_rfkill = true,
-};
-
  static struct quirk_entry quirk_asus_ux303ub = {
         .wmi_backlight_native = true,
  };
@@ -194,7 +185,7 @@ static const struct dmi_system_id asus_quirks[] = {
                         DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
                         DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"),
                 },
-               .driver_data = &quirk_no_rfkill_wapf4,
+               .driver_data = &quirk_asus_wapf4,
         },
         {
                 .callback = dmi_matched,
@@ -203,7 +194,7 @@ static const struct dmi_system_id asus_quirks[] = {
                         DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
                         DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"),
                 },
-               .driver_data = &quirk_no_rfkill_wapf4,
+               .driver_data = &quirk_asus_wapf4,
         },
         {
                 .callback = dmi_matched,
@@ -367,42 +358,6 @@ static const struct dmi_system_id asus_quirks[] = {
                 },
                 .driver_data = &quirk_asus_x200ca,
         },
-       {
-               .callback = dmi_matched,
-               .ident = "ASUSTeK COMPUTER INC. X555UB",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "X555UB"),
-               },
-               .driver_data = &quirk_no_rfkill,
-       },
-       {
-               .callback = dmi_matched,
-               .ident = "ASUSTeK COMPUTER INC. N552VW",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "N552VW"),
-               },
-               .driver_data = &quirk_no_rfkill,
-       },
-       {
-               .callback = dmi_matched,
-               .ident = "ASUSTeK COMPUTER INC. U303LB",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "U303LB"),
-               },
-               .driver_data = &quirk_no_rfkill,
-       },
-       {
-               .callback = dmi_matched,
-               .ident = "ASUSTeK COMPUTER INC. Z550MA",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Z550MA"),
-               },
-               .driver_data = &quirk_no_rfkill,
-       },
         {
                 .callback = dmi_matched,
                 .ident = "ASUSTeK COMPUTER INC. UX303UB",
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c

index 43cb680adbb42045aaeea332220460440180215a..8fe5890bf539f4f2eb722139f4c56a3065403e92 100644 (file)
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -159,6 +159,8 @@ MODULE_LICENSE("GPL");
  #define USB_INTEL_XUSB2PR              0xD0
  #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI  0x9c31
  
+static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
+
  struct bios_args {
         u32 arg0;
         u32 arg1;
@@ -2051,6 +2053,16 @@ static int asus_wmi_fan_init(struct asus_wmi *asus)
         return 0;
  }
  
+static bool ashs_present(void)
+{
+       int i = 0;
+       while (ashs_ids[i]) {
+               if (acpi_dev_found(ashs_ids[i++]))
+                       return true;
+       }
+       return false;
+}
+
  /*
   * WMI Driver
   */
@@ -2095,7 +2107,11 @@ static int asus_wmi_add(struct platform_device *pdev)
         if (err)
                 goto fail_leds;
  
-       if (!asus->driver->quirks->no_rfkill) {
+       asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
+       if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
+               asus->driver->wlan_ctrl_by_user = 1;
+
+       if (!(asus->driver->wlan_ctrl_by_user && ashs_present())) {
                 err = asus_wmi_rfkill_init(asus);
                 if (err)
                         goto fail_rfkill;
@@ -2134,10 +2150,6 @@ static int asus_wmi_add(struct platform_device *pdev)
         if (err)
                 goto fail_debugfs;
  
-       asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
-       if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
-               asus->driver->wlan_ctrl_by_user = 1;
-
         return 0;
  
  fail_debugfs:
diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h

index fdff626c3b51b039f3b63473a6cf333d04fda819..c9589d9342bbf8f883c49abbcd7cebf9a23c608e 100644 (file)
--- a/drivers/platform/x86/asus-wmi.h
+++ b/drivers/platform/x86/asus-wmi.h
@@ -39,7 +39,6 @@ struct key_entry;
  struct asus_wmi;
  
  struct quirk_entry {
-       bool no_rfkill;
         bool hotplug_wireless;
         bool scalar_panel_brightness;
         bool store_backlight_power;
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c

index 2b218b1d13e55dc985a2ca27e44b6a6ddf905141..e12cc3504d48799b447e636e21c2c6c440fc5827 100644 (file)
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -78,18 +78,18 @@
  
  #define FUJITSU_LCD_N_LEVELS 8
  
-#define ACPI_FUJITSU_CLASS              "fujitsu"
-#define ACPI_FUJITSU_HID                "FUJ02B1"
-#define ACPI_FUJITSU_DRIVER_NAME       "Fujitsu laptop FUJ02B1 ACPI brightness driver"
-#define ACPI_FUJITSU_DEVICE_NAME        "Fujitsu FUJ02B1"
-#define ACPI_FUJITSU_HOTKEY_HID        "FUJ02E3"
-#define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
-#define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3"
+#define ACPI_FUJITSU_CLASS             "fujitsu"
+#define ACPI_FUJITSU_BL_HID            "FUJ02B1"
+#define ACPI_FUJITSU_BL_DRIVER_NAME    "Fujitsu laptop FUJ02B1 ACPI brightness driver"
+#define ACPI_FUJITSU_BL_DEVICE_NAME    "Fujitsu FUJ02B1"
+#define ACPI_FUJITSU_LAPTOP_HID                "FUJ02E3"
+#define ACPI_FUJITSU_LAPTOP_DRIVER_NAME        "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
+#define ACPI_FUJITSU_LAPTOP_DEVICE_NAME        "Fujitsu FUJ02E3"
  
  #define ACPI_FUJITSU_NOTIFY_CODE1     0x80
  
  /* FUNC interface - command values */
-#define FUNC_RFKILL    0x1000
+#define FUNC_FLAGS     0x1000
  #define FUNC_LEDS      0x1001
  #define FUNC_BUTTONS   0x1002
  #define FUNC_BACKLIGHT  0x1004
@@ -97,6 +97,11 @@
  /* FUNC interface - responses */
  #define UNSUPPORTED_CMD 0x80000000
  
+/* FUNC interface - status flags */
+#define FLAG_RFKILL    0x020
+#define FLAG_LID       0x100
+#define FLAG_DOCK      0x200
+
  #if IS_ENABLED(CONFIG_LEDS_CLASS)
  /* FUNC interface - LED control */
  #define FUNC_LED_OFF   0x1
@@ -136,7 +141,7 @@
  #endif
  
  /* Device controlling the backlight and associated keys */
-struct fujitsu_t {
+struct fujitsu_bl {
         acpi_handle acpi_handle;
         struct acpi_device *dev;
         struct input_dev *input;
@@ -150,12 +155,12 @@ struct fujitsu_t {
         unsigned int brightness_level;
  };
  
-static struct fujitsu_t *fujitsu;
+static struct fujitsu_bl *fujitsu_bl;
  static int use_alt_lcd_levels = -1;
  static int disable_brightness_adjust = -1;
  
-/* Device used to access other hotkeys on the laptop */
-struct fujitsu_hotkey_t {
+/* Device used to access hotkeys and other features on the laptop */
+struct fujitsu_laptop {
         acpi_handle acpi_handle;
         struct acpi_device *dev;
         struct input_dev *input;
@@ -163,17 +168,15 @@ struct fujitsu_hotkey_t {
         struct platform_device *pf_device;
         struct kfifo fifo;
         spinlock_t fifo_lock;
-       int rfkill_supported;
-       int rfkill_state;
+       int flags_supported;
+       int flags_state;
         int logolamp_registered;
         int kblamps_registered;
         int radio_led_registered;
         int eco_led_registered;
  };
  
-static struct fujitsu_hotkey_t *fujitsu_hotkey;
-
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event);
+static struct fujitsu_laptop *fujitsu_laptop;
  
  #if IS_ENABLED(CONFIG_LEDS_CLASS)
  static enum led_brightness logolamp_get(struct led_classdev *cdev);
@@ -222,8 +225,6 @@ static struct led_classdev eco_led = {
  static u32 dbg_level = 0x03;
  #endif
  
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event);
-
  /* Fujitsu ACPI interface function */
  
  static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
@@ -239,7 +240,7 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
         unsigned long long value;
         acpi_handle handle = NULL;
  
-       status = acpi_get_handle(fujitsu_hotkey->acpi_handle, "FUNC", &handle);
+       status = acpi_get_handle(fujitsu_laptop->acpi_handle, "FUNC", &handle);
         if (ACPI_FAILURE(status)) {
                 vdbg_printk(FUJLAPTOP_DBG_ERROR,
                                 "FUNC interface is not present\n");
@@ -300,9 +301,9 @@ static int radio_led_set(struct led_classdev *cdev,
                                 enum led_brightness brightness)
  {
         if (brightness >= LED_FULL)
-               return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON);
+               return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, RADIO_LED_ON);
         else
-               return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0);
+               return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, 0x0);
  }
  
  static int eco_led_set(struct led_classdev *cdev,
@@ -346,7 +347,7 @@ static enum led_brightness radio_led_get(struct led_classdev *cdev)
  {
         enum led_brightness brightness = LED_OFF;
  
-       if (call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0) & RADIO_LED_ON)
+       if (call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0) & RADIO_LED_ON)
                 brightness = LED_FULL;
  
         return brightness;
@@ -373,10 +374,10 @@ static int set_lcd_level(int level)
         vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBLL [%d]\n",
                     level);
  
-       if (level < 0 || level >= fujitsu->max_brightness)
+       if (level < 0 || level >= fujitsu_bl->max_brightness)
                 return -EINVAL;
  
-       status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle);
+       status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBLL", &handle);
         if (ACPI_FAILURE(status)) {
                 vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n");
                 return -ENODEV;
@@ -398,10 +399,10 @@ static int set_lcd_level_alt(int level)
         vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBL2 [%d]\n",
                     level);
  
-       if (level < 0 || level >= fujitsu->max_brightness)
+       if (level < 0 || level >= fujitsu_bl->max_brightness)
                 return -EINVAL;
  
-       status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle);
+       status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBL2", &handle);
         if (ACPI_FAILURE(status)) {
                 vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n");
                 return -ENODEV;
@@ -421,19 +422,19 @@ static int get_lcd_level(void)
  
         vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLL\n");
  
-       status =
-           acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state);
+       status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "GBLL", NULL,
+                                      &state);
         if (ACPI_FAILURE(status))
                 return 0;
  
-       fujitsu->brightness_level = state & 0x0fffffff;
+       fujitsu_bl->brightness_level = state & 0x0fffffff;
  
         if (state & 0x80000000)
-               fujitsu->brightness_changed = 1;
+               fujitsu_bl->brightness_changed = 1;
         else
-               fujitsu->brightness_changed = 0;
+               fujitsu_bl->brightness_changed = 0;
  
-       return fujitsu->brightness_level;
+       return fujitsu_bl->brightness_level;
  }
  
  static int get_max_brightness(void)
@@ -443,14 +444,14 @@ static int get_max_brightness(void)
  
         vdbg_printk(FUJLAPTOP_DBG_TRACE, "get max lcd level via RBLL\n");
  
-       status =
-           acpi_evaluate_integer(fujitsu->acpi_handle, "RBLL", NULL, &state);
+       status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "RBLL", NULL,
+                                      &state);
         if (ACPI_FAILURE(status))
                 return -1;
  
-       fujitsu->max_brightness = state;
+       fujitsu_bl->max_brightness = state;
  
-       return fujitsu->max_brightness;
+       return fujitsu_bl->max_brightness;
  }
  
  /* Backlight device stuff */
@@ -483,7 +484,7 @@ static int bl_update_status(struct backlight_device *b)
         return ret;
  }
  
-static const struct backlight_ops fujitsubl_ops = {
+static const struct backlight_ops fujitsu_bl_ops = {
         .get_brightness = bl_get_brightness,
         .update_status = bl_update_status,
  };
@@ -511,7 +512,7 @@ show_brightness_changed(struct device *dev,
  
         int ret;
  
-       ret = fujitsu->brightness_changed;
+       ret = fujitsu_bl->brightness_changed;
         if (ret < 0)
                 return ret;
  
@@ -539,7 +540,7 @@ static ssize_t store_lcd_level(struct device *dev,
         int level, ret;
  
         if (sscanf(buf, "%i", &level) != 1
-           || (level < 0 || level >= fujitsu->max_brightness))
+           || (level < 0 || level >= fujitsu_bl->max_brightness))
                 return -EINVAL;
  
         if (use_alt_lcd_levels)
@@ -567,9 +568,9 @@ static ssize_t
  show_lid_state(struct device *dev,
                         struct device_attribute *attr, char *buf)
  {
-       if (!(fujitsu_hotkey->rfkill_supported & 0x100))
+       if (!(fujitsu_laptop->flags_supported & FLAG_LID))
                 return sprintf(buf, "unknown\n");
-       if (fujitsu_hotkey->rfkill_state & 0x100)
+       if (fujitsu_laptop->flags_state & FLAG_LID)
                 return sprintf(buf, "open\n");
         else
                 return sprintf(buf, "closed\n");
@@ -579,9 +580,9 @@ static ssize_t
  show_dock_state(struct device *dev,
                         struct device_attribute *attr, char *buf)
  {
-       if (!(fujitsu_hotkey->rfkill_supported & 0x200))
+       if (!(fujitsu_laptop->flags_supported & FLAG_DOCK))
                 return sprintf(buf, "unknown\n");
-       if (fujitsu_hotkey->rfkill_state & 0x200)
+       if (fujitsu_laptop->flags_state & FLAG_DOCK)
                 return sprintf(buf, "docked\n");
         else
                 return sprintf(buf, "undocked\n");
@@ -591,9 +592,9 @@ static ssize_t
  show_radios_state(struct device *dev,
                         struct device_attribute *attr, char *buf)
  {
-       if (!(fujitsu_hotkey->rfkill_supported & 0x20))
+       if (!(fujitsu_laptop->flags_supported & FLAG_RFKILL))
                 return sprintf(buf, "unknown\n");
-       if (fujitsu_hotkey->rfkill_state & 0x20)
+       if (fujitsu_laptop->flags_state & FLAG_RFKILL)
                 return sprintf(buf, "on\n");
         else
                 return sprintf(buf, "killed\n");
@@ -607,7 +608,7 @@ static DEVICE_ATTR(lid, 0444, show_lid_state, ignore_store);
  static DEVICE_ATTR(dock, 0444, show_dock_state, ignore_store);
  static DEVICE_ATTR(radios, 0444, show_radios_state, ignore_store);
  
-static struct attribute *fujitsupf_attributes[] = {
+static struct attribute *fujitsu_pf_attributes[] = {
         &dev_attr_brightness_changed.attr,
         &dev_attr_max_brightness.attr,
         &dev_attr_lcd_level.attr,
@@ -617,11 +618,11 @@ static struct attribute *fujitsupf_attributes[] = {
         NULL
  };
  
-static struct attribute_group fujitsupf_attribute_group = {
-       .attrs = fujitsupf_attributes
+static struct attribute_group fujitsu_pf_attribute_group = {
+       .attrs = fujitsu_pf_attributes
  };
  
-static struct platform_driver fujitsupf_driver = {
+static struct platform_driver fujitsu_pf_driver = {
         .driver = {
                    .name = "fujitsu-laptop",
                    }
@@ -630,39 +631,30 @@ static struct platform_driver fujitsupf_driver = {
  static void __init dmi_check_cb_common(const struct dmi_system_id *id)
  {
         pr_info("Identified laptop model '%s'\n", id->ident);
-       if (use_alt_lcd_levels == -1) {
-               if (acpi_has_method(NULL,
-                               "\\_SB.PCI0.LPCB.FJEX.SBL2"))
-                       use_alt_lcd_levels = 1;
-               else
-                       use_alt_lcd_levels = 0;
-               vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as "
-                       "%i\n", use_alt_lcd_levels);
-       }
  }
  
  static int __init dmi_check_cb_s6410(const struct dmi_system_id *id)
  {
         dmi_check_cb_common(id);
-       fujitsu->keycode1 = KEY_SCREENLOCK;     /* "Lock" */
-       fujitsu->keycode2 = KEY_HELP;   /* "Mobility Center" */
+       fujitsu_bl->keycode1 = KEY_SCREENLOCK;  /* "Lock" */
+       fujitsu_bl->keycode2 = KEY_HELP;        /* "Mobility Center" */
         return 1;
  }
  
  static int __init dmi_check_cb_s6420(const struct dmi_system_id *id)
  {
         dmi_check_cb_common(id);
-       fujitsu->keycode1 = KEY_SCREENLOCK;     /* "Lock" */
-       fujitsu->keycode2 = KEY_HELP;   /* "Mobility Center" */
+       fujitsu_bl->keycode1 = KEY_SCREENLOCK;  /* "Lock" */
+       fujitsu_bl->keycode2 = KEY_HELP;        /* "Mobility Center" */
         return 1;
  }
  
  static int __init dmi_check_cb_p8010(const struct dmi_system_id *id)
  {
         dmi_check_cb_common(id);
-       fujitsu->keycode1 = KEY_HELP;   /* "Support" */
-       fujitsu->keycode3 = KEY_SWITCHVIDEOMODE;        /* "Presentation" */
-       fujitsu->keycode4 = KEY_WWW;    /* "Internet" */
+       fujitsu_bl->keycode1 = KEY_HELP;                /* "Support" */
+       fujitsu_bl->keycode3 = KEY_SWITCHVIDEOMODE;     /* "Presentation" */
+       fujitsu_bl->keycode4 = KEY_WWW;                 /* "Internet" */
         return 1;
  }
  
@@ -693,7 +685,7 @@ static const struct dmi_system_id fujitsu_dmi_table[] __initconst = {
  
  /* ACPI device for LCD brightness control */
  
-static int acpi_fujitsu_add(struct acpi_device *device)
+static int acpi_fujitsu_bl_add(struct acpi_device *device)
  {
         int state = 0;
         struct input_dev *input;
@@ -702,22 +694,22 @@ static int acpi_fujitsu_add(struct acpi_device *device)
         if (!device)
                 return -EINVAL;
  
-       fujitsu->acpi_handle = device->handle;
-       sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_DEVICE_NAME);
+       fujitsu_bl->acpi_handle = device->handle;
+       sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_BL_DEVICE_NAME);
         sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
-       device->driver_data = fujitsu;
+       device->driver_data = fujitsu_bl;
  
-       fujitsu->input = input = input_allocate_device();
+       fujitsu_bl->input = input = input_allocate_device();
         if (!input) {
                 error = -ENOMEM;
                 goto err_stop;
         }
  
-       snprintf(fujitsu->phys, sizeof(fujitsu->phys),
+       snprintf(fujitsu_bl->phys, sizeof(fujitsu_bl->phys),
                  "%s/video/input0", acpi_device_hid(device));
  
         input->name = acpi_device_name(device);
-       input->phys = fujitsu->phys;
+       input->phys = fujitsu_bl->phys;
         input->id.bustype = BUS_HOST;
         input->id.product = 0x06;
         input->dev.parent = &device->dev;
@@ -730,7 +722,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
         if (error)
                 goto err_free_input_dev;
  
-       error = acpi_bus_update_power(fujitsu->acpi_handle, &state);
+       error = acpi_bus_update_power(fujitsu_bl->acpi_handle, &state);
         if (error) {
                 pr_err("Error reading power state\n");
                 goto err_unregister_input_dev;
@@ -740,7 +732,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
                acpi_device_name(device), acpi_device_bid(device),
                !device->power.state ? "on" : "off");
  
-       fujitsu->dev = device;
+       fujitsu_bl->dev = device;
  
         if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
                 vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
@@ -750,6 +742,15 @@ static int acpi_fujitsu_add(struct acpi_device *device)
                         pr_err("_INI Method failed\n");
         }
  
+       if (use_alt_lcd_levels == -1) {
+               if (acpi_has_method(NULL, "\\_SB.PCI0.LPCB.FJEX.SBL2"))
+                       use_alt_lcd_levels = 1;
+               else
+                       use_alt_lcd_levels = 0;
+               vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as %i\n",
+                           use_alt_lcd_levels);
+       }
+
         /* do config (detect defaults) */
         use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0;
         disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0;
@@ -758,7 +759,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
                     use_alt_lcd_levels, disable_brightness_adjust);
  
         if (get_max_brightness() <= 0)
-               fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS;
+               fujitsu_bl->max_brightness = FUJITSU_LCD_N_LEVELS;
         get_lcd_level();
  
         return 0;
@@ -772,38 +773,38 @@ err_stop:
         return error;
  }
  
-static int acpi_fujitsu_remove(struct acpi_device *device)
+static int acpi_fujitsu_bl_remove(struct acpi_device *device)
  {
-       struct fujitsu_t *fujitsu = acpi_driver_data(device);
-       struct input_dev *input = fujitsu->input;
+       struct fujitsu_bl *fujitsu_bl = acpi_driver_data(device);
+       struct input_dev *input = fujitsu_bl->input;
  
         input_unregister_device(input);
  
-       fujitsu->acpi_handle = NULL;
+       fujitsu_bl->acpi_handle = NULL;
  
         return 0;
  }
  
  /* Brightness notify */
  
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event)
  {
         struct input_dev *input;
         int keycode;
         int oldb, newb;
  
-       input = fujitsu->input;
+       input = fujitsu_bl->input;
  
         switch (event) {
         case ACPI_FUJITSU_NOTIFY_CODE1:
                 keycode = 0;
-               oldb = fujitsu->brightness_level;
+               oldb = fujitsu_bl->brightness_level;
                 get_lcd_level();
-               newb = fujitsu->brightness_level;
+               newb = fujitsu_bl->brightness_level;
  
                 vdbg_printk(FUJLAPTOP_DBG_TRACE,
                             "brightness button event [%i -> %i (%i)]\n",
-                           oldb, newb, fujitsu->brightness_changed);
+                           oldb, newb, fujitsu_bl->brightness_changed);
  
                 if (oldb < newb) {
                         if (disable_brightness_adjust != 1) {
@@ -840,7 +841,7 @@ static void acpi_fujitsu_notify(struct acpi_device *device, u32 event)
  
  /* ACPI device for hotkey handling */
  
-static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
+static int acpi_fujitsu_laptop_add(struct acpi_device *device)
  {
         int result = 0;
         int state = 0;
@@ -851,42 +852,42 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
         if (!device)
                 return -EINVAL;
  
-       fujitsu_hotkey->acpi_handle = device->handle;
+       fujitsu_laptop->acpi_handle = device->handle;
         sprintf(acpi_device_name(device), "%s",
-               ACPI_FUJITSU_HOTKEY_DEVICE_NAME);
+               ACPI_FUJITSU_LAPTOP_DEVICE_NAME);
         sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
-       device->driver_data = fujitsu_hotkey;
+       device->driver_data = fujitsu_laptop;
  
         /* kfifo */
-       spin_lock_init(&fujitsu_hotkey->fifo_lock);
-       error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
+       spin_lock_init(&fujitsu_laptop->fifo_lock);
+       error = kfifo_alloc(&fujitsu_laptop->fifo, RINGBUFFERSIZE * sizeof(int),
                         GFP_KERNEL);
         if (error) {
                 pr_err("kfifo_alloc failed\n");
                 goto err_stop;
         }
  
-       fujitsu_hotkey->input = input = input_allocate_device();
+       fujitsu_laptop->input = input = input_allocate_device();
         if (!input) {
                 error = -ENOMEM;
                 goto err_free_fifo;
         }
  
-       snprintf(fujitsu_hotkey->phys, sizeof(fujitsu_hotkey->phys),
+       snprintf(fujitsu_laptop->phys, sizeof(fujitsu_laptop->phys),
                  "%s/video/input0", acpi_device_hid(device));
  
         input->name = acpi_device_name(device);
-       input->phys = fujitsu_hotkey->phys;
+       input->phys = fujitsu_laptop->phys;
         input->id.bustype = BUS_HOST;
         input->id.product = 0x06;
         input->dev.parent = &device->dev;
  
         set_bit(EV_KEY, input->evbit);
-       set_bit(fujitsu->keycode1, input->keybit);
-       set_bit(fujitsu->keycode2, input->keybit);
-       set_bit(fujitsu->keycode3, input->keybit);
-       set_bit(fujitsu->keycode4, input->keybit);
-       set_bit(fujitsu->keycode5, input->keybit);
+       set_bit(fujitsu_bl->keycode1, input->keybit);
+       set_bit(fujitsu_bl->keycode2, input->keybit);
+       set_bit(fujitsu_bl->keycode3, input->keybit);
+       set_bit(fujitsu_bl->keycode4, input->keybit);
+       set_bit(fujitsu_bl->keycode5, input->keybit);
         set_bit(KEY_TOUCHPAD_TOGGLE, input->keybit);
         set_bit(KEY_UNKNOWN, input->keybit);
  
@@ -894,7 +895,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
         if (error)
                 goto err_free_input_dev;
  
-       error = acpi_bus_update_power(fujitsu_hotkey->acpi_handle, &state);
+       error = acpi_bus_update_power(fujitsu_laptop->acpi_handle, &state);
         if (error) {
                 pr_err("Error reading power state\n");
                 goto err_unregister_input_dev;
@@ -904,7 +905,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
                 acpi_device_name(device), acpi_device_bid(device),
                 !device->power.state ? "on" : "off");
  
-       fujitsu_hotkey->dev = device;
+       fujitsu_laptop->dev = device;
  
         if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
                 vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
@@ -920,27 +921,27 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
                 ; /* No action, result is discarded */
         vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i);
  
-       fujitsu_hotkey->rfkill_supported =
-               call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0);
+       fujitsu_laptop->flags_supported =
+               call_fext_func(FUNC_FLAGS, 0x0, 0x0, 0x0);
  
         /* Make sure our bitmask of supported functions is cleared if the
            RFKILL function block is not implemented, like on the S7020. */
-       if (fujitsu_hotkey->rfkill_supported == UNSUPPORTED_CMD)
-               fujitsu_hotkey->rfkill_supported = 0;
+       if (fujitsu_laptop->flags_supported == UNSUPPORTED_CMD)
+               fujitsu_laptop->flags_supported = 0;
  
-       if (fujitsu_hotkey->rfkill_supported)
-               fujitsu_hotkey->rfkill_state =
-                       call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+       if (fujitsu_laptop->flags_supported)
+               fujitsu_laptop->flags_state =
+                       call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
  
         /* Suspect this is a keymap of the application panel, print it */
         pr_info("BTNI: [0x%x]\n", call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0));
  
  #if IS_ENABLED(CONFIG_LEDS_CLASS)
         if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
-               result = led_classdev_register(&fujitsu->pf_device->dev,
+               result = led_classdev_register(&fujitsu_bl->pf_device->dev,
                                                 &logolamp_led);
                 if (result == 0) {
-                       fujitsu_hotkey->logolamp_registered = 1;
+                       fujitsu_laptop->logolamp_registered = 1;
                 } else {
                         pr_err("Could not register LED handler for logo lamp, error %i\n",
                                result);
@@ -949,10 +950,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
  
         if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & KEYBOARD_LAMPS) &&
            (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) == 0x0)) {
-               result = led_classdev_register(&fujitsu->pf_device->dev,
+               result = led_classdev_register(&fujitsu_bl->pf_device->dev,
                                                 &kblamps_led);
                 if (result == 0) {
-                       fujitsu_hotkey->kblamps_registered = 1;
+                       fujitsu_laptop->kblamps_registered = 1;
                 } else {
                         pr_err("Could not register LED handler for keyboard lamps, error %i\n",
                                result);
@@ -966,10 +967,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
          * that an RF LED is present.
          */
         if (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) & BIT(24)) {
-               result = led_classdev_register(&fujitsu->pf_device->dev,
+               result = led_classdev_register(&fujitsu_bl->pf_device->dev,
                                                 &radio_led);
                 if (result == 0) {
-                       fujitsu_hotkey->radio_led_registered = 1;
+                       fujitsu_laptop->radio_led_registered = 1;
                 } else {
                         pr_err("Could not register LED handler for radio LED, error %i\n",
                                result);
@@ -983,10 +984,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
         */
         if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & BIT(14)) &&
            (call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0) != UNSUPPORTED_CMD)) {
-               result = led_classdev_register(&fujitsu->pf_device->dev,
+               result = led_classdev_register(&fujitsu_bl->pf_device->dev,
                                                 &eco_led);
                 if (result == 0) {
-                       fujitsu_hotkey->eco_led_registered = 1;
+                       fujitsu_laptop->eco_led_registered = 1;
                 } else {
                         pr_err("Could not register LED handler for eco LED, error %i\n",
                                result);
@@ -1002,47 +1003,47 @@ err_unregister_input_dev:
  err_free_input_dev:
         input_free_device(input);
  err_free_fifo:
-       kfifo_free(&fujitsu_hotkey->fifo);
+       kfifo_free(&fujitsu_laptop->fifo);
  err_stop:
         return error;
  }
  
-static int acpi_fujitsu_hotkey_remove(struct acpi_device *device)
+static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
  {
-       struct fujitsu_hotkey_t *fujitsu_hotkey = acpi_driver_data(device);
-       struct input_dev *input = fujitsu_hotkey->input;
+       struct fujitsu_laptop *fujitsu_laptop = acpi_driver_data(device);
+       struct input_dev *input = fujitsu_laptop->input;
  
  #if IS_ENABLED(CONFIG_LEDS_CLASS)
-       if (fujitsu_hotkey->logolamp_registered)
+       if (fujitsu_laptop->logolamp_registered)
                 led_classdev_unregister(&logolamp_led);
  
-       if (fujitsu_hotkey->kblamps_registered)
+       if (fujitsu_laptop->kblamps_registered)
                 led_classdev_unregister(&kblamps_led);
  
-       if (fujitsu_hotkey->radio_led_registered)
+       if (fujitsu_laptop->radio_led_registered)
                 led_classdev_unregister(&radio_led);
  
-       if (fujitsu_hotkey->eco_led_registered)
+       if (fujitsu_laptop->eco_led_registered)
                 led_classdev_unregister(&eco_led);
  #endif
  
         input_unregister_device(input);
  
-       kfifo_free(&fujitsu_hotkey->fifo);
+       kfifo_free(&fujitsu_laptop->fifo);
  
-       fujitsu_hotkey->acpi_handle = NULL;
+       fujitsu_laptop->acpi_handle = NULL;
  
         return 0;
  }
  
-static void acpi_fujitsu_hotkey_press(int keycode)
+static void acpi_fujitsu_laptop_press(int keycode)
  {
-       struct input_dev *input = fujitsu_hotkey->input;
+       struct input_dev *input = fujitsu_laptop->input;
         int status;
  
-       status = kfifo_in_locked(&fujitsu_hotkey->fifo,
+       status = kfifo_in_locked(&fujitsu_laptop->fifo,
                                  (unsigned char *)&keycode, sizeof(keycode),
-                                &fujitsu_hotkey->fifo_lock);
+                                &fujitsu_laptop->fifo_lock);
         if (status != sizeof(keycode)) {
                 vdbg_printk(FUJLAPTOP_DBG_WARN,
                             "Could not push keycode [0x%x]\n", keycode);
@@ -1054,16 +1055,16 @@ static void acpi_fujitsu_hotkey_press(int keycode)
                     "Push keycode into ringbuffer [%d]\n", keycode);
  }
  
-static void acpi_fujitsu_hotkey_release(void)
+static void acpi_fujitsu_laptop_release(void)
  {
-       struct input_dev *input = fujitsu_hotkey->input;
+       struct input_dev *input = fujitsu_laptop->input;
         int keycode, status;
  
         while (true) {
-               status = kfifo_out_locked(&fujitsu_hotkey->fifo,
+               status = kfifo_out_locked(&fujitsu_laptop->fifo,
                                           (unsigned char *)&keycode,
                                           sizeof(keycode),
-                                         &fujitsu_hotkey->fifo_lock);
+                                         &fujitsu_laptop->fifo_lock);
                 if (status != sizeof(keycode))
                         return;
                 input_report_key(input, keycode, 0);
@@ -1073,14 +1074,14 @@ static void acpi_fujitsu_hotkey_release(void)
         }
  }
  
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
  {
         struct input_dev *input;
         int keycode;
         unsigned int irb = 1;
         int i;
  
-       input = fujitsu_hotkey->input;
+       input = fujitsu_laptop->input;
  
         if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
                 keycode = KEY_UNKNOWN;
@@ -1093,9 +1094,9 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
                 return;
         }
  
-       if (fujitsu_hotkey->rfkill_supported)
-               fujitsu_hotkey->rfkill_state =
-                       call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+       if (fujitsu_laptop->flags_supported)
+               fujitsu_laptop->flags_state =
+                       call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
  
         i = 0;
         while ((irb =
@@ -1103,19 +1104,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
                         && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) {
                 switch (irb & 0x4ff) {
                 case KEY1_CODE:
-                       keycode = fujitsu->keycode1;
+                       keycode = fujitsu_bl->keycode1;
                         break;
                 case KEY2_CODE:
-                       keycode = fujitsu->keycode2;
+                       keycode = fujitsu_bl->keycode2;
                         break;
                 case KEY3_CODE:
-                       keycode = fujitsu->keycode3;
+                       keycode = fujitsu_bl->keycode3;
                         break;
                 case KEY4_CODE:
-                       keycode = fujitsu->keycode4;
+                       keycode = fujitsu_bl->keycode4;
                         break;
                 case KEY5_CODE:
-                       keycode = fujitsu->keycode5;
+                       keycode = fujitsu_bl->keycode5;
                         break;
                 case 0:
                         keycode = 0;
@@ -1128,17 +1129,17 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
                 }
  
                 if (keycode > 0)
-                       acpi_fujitsu_hotkey_press(keycode);
+                       acpi_fujitsu_laptop_press(keycode);
                 else if (keycode == 0)
-                       acpi_fujitsu_hotkey_release();
+                       acpi_fujitsu_laptop_release();
         }
  
         /* On some models (first seen on the Skylake-based Lifebook
          * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
-        * handled in software; its state is queried using FUNC_RFKILL
+        * handled in software; its state is queried using FUNC_FLAGS
          */
-       if ((fujitsu_hotkey->rfkill_supported & BIT(26)) &&
-           (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) {
+       if ((fujitsu_laptop->flags_supported & BIT(26)) &&
+           (call_fext_func(FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26))) {
                 keycode = KEY_TOUCHPAD_TOGGLE;
                 input_report_key(input, keycode, 1);
                 input_sync(input);
@@ -1150,83 +1151,81 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
  
  /* Initialization */
  
-static const struct acpi_device_id fujitsu_device_ids[] = {
-       {ACPI_FUJITSU_HID, 0},
+static const struct acpi_device_id fujitsu_bl_device_ids[] = {
+       {ACPI_FUJITSU_BL_HID, 0},
         {"", 0},
  };
  
-static struct acpi_driver acpi_fujitsu_driver = {
-       .name = ACPI_FUJITSU_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_bl_driver = {
+       .name = ACPI_FUJITSU_BL_DRIVER_NAME,
         .class = ACPI_FUJITSU_CLASS,
-       .ids = fujitsu_device_ids,
+       .ids = fujitsu_bl_device_ids,
         .ops = {
-               .add = acpi_fujitsu_add,
-               .remove = acpi_fujitsu_remove,
-               .notify = acpi_fujitsu_notify,
+               .add = acpi_fujitsu_bl_add,
+               .remove = acpi_fujitsu_bl_remove,
+               .notify = acpi_fujitsu_bl_notify,
                 },
  };
  
-static const struct acpi_device_id fujitsu_hotkey_device_ids[] = {
-       {ACPI_FUJITSU_HOTKEY_HID, 0},
+static const struct acpi_device_id fujitsu_laptop_device_ids[] = {
+       {ACPI_FUJITSU_LAPTOP_HID, 0},
         {"", 0},
  };
  
-static struct acpi_driver acpi_fujitsu_hotkey_driver = {
-       .name = ACPI_FUJITSU_HOTKEY_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_laptop_driver = {
+       .name = ACPI_FUJITSU_LAPTOP_DRIVER_NAME,
         .class = ACPI_FUJITSU_CLASS,
-       .ids = fujitsu_hotkey_device_ids,
+       .ids = fujitsu_laptop_device_ids,
         .ops = {
-               .add = acpi_fujitsu_hotkey_add,
-               .remove = acpi_fujitsu_hotkey_remove,
-               .notify = acpi_fujitsu_hotkey_notify,
+               .add = acpi_fujitsu_laptop_add,
+               .remove = acpi_fujitsu_laptop_remove,
+               .notify = acpi_fujitsu_laptop_notify,
                 },
  };
  
  static const struct acpi_device_id fujitsu_ids[] __used = {
-       {ACPI_FUJITSU_HID, 0},
-       {ACPI_FUJITSU_HOTKEY_HID, 0},
+       {ACPI_FUJITSU_BL_HID, 0},
+       {ACPI_FUJITSU_LAPTOP_HID, 0},
         {"", 0}
  };
  MODULE_DEVICE_TABLE(acpi, fujitsu_ids);
  
  static int __init fujitsu_init(void)
  {
-       int ret, result, max_brightness;
+       int ret, max_brightness;
  
         if (acpi_disabled)
                 return -ENODEV;
  
-       fujitsu = kzalloc(sizeof(struct fujitsu_t), GFP_KERNEL);
-       if (!fujitsu)
+       fujitsu_bl = kzalloc(sizeof(struct fujitsu_bl), GFP_KERNEL);
+       if (!fujitsu_bl)
                 return -ENOMEM;
-       fujitsu->keycode1 = KEY_PROG1;
-       fujitsu->keycode2 = KEY_PROG2;
-       fujitsu->keycode3 = KEY_PROG3;
-       fujitsu->keycode4 = KEY_PROG4;
-       fujitsu->keycode5 = KEY_RFKILL;
+       fujitsu_bl->keycode1 = KEY_PROG1;
+       fujitsu_bl->keycode2 = KEY_PROG2;
+       fujitsu_bl->keycode3 = KEY_PROG3;
+       fujitsu_bl->keycode4 = KEY_PROG4;
+       fujitsu_bl->keycode5 = KEY_RFKILL;
         dmi_check_system(fujitsu_dmi_table);
  
-       result = acpi_bus_register_driver(&acpi_fujitsu_driver);
-       if (result < 0) {
-               ret = -ENODEV;
+       ret = acpi_bus_register_driver(&acpi_fujitsu_bl_driver);
+       if (ret)
                 goto fail_acpi;
-       }
  
         /* Register platform stuff */
  
-       fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1);
-       if (!fujitsu->pf_device) {
+       fujitsu_bl->pf_device = platform_device_alloc("fujitsu-laptop", -1);
+       if (!fujitsu_bl->pf_device) {
                 ret = -ENOMEM;
                 goto fail_platform_driver;
         }
  
-       ret = platform_device_add(fujitsu->pf_device);
+       ret = platform_device_add(fujitsu_bl->pf_device);
         if (ret)
                 goto fail_platform_device1;
  
         ret =
-           sysfs_create_group(&fujitsu->pf_device->dev.kobj,
-                              &fujitsupf_attribute_group);
+           sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj,
+                              &fujitsu_pf_attribute_group);
         if (ret)
                 goto fail_platform_device2;
  
@@ -1236,90 +1235,88 @@ static int __init fujitsu_init(void)
                 struct backlight_properties props;
  
                 memset(&props, 0, sizeof(struct backlight_properties));
-               max_brightness = fujitsu->max_brightness;
+               max_brightness = fujitsu_bl->max_brightness;
                 props.type = BACKLIGHT_PLATFORM;
                 props.max_brightness = max_brightness - 1;
-               fujitsu->bl_device = backlight_device_register("fujitsu-laptop",
-                                                              NULL, NULL,
-                                                              &fujitsubl_ops,
-                                                              &props);
-               if (IS_ERR(fujitsu->bl_device)) {
-                       ret = PTR_ERR(fujitsu->bl_device);
-                       fujitsu->bl_device = NULL;
+               fujitsu_bl->bl_device = backlight_device_register("fujitsu-laptop",
+                                                                 NULL, NULL,
+                                                                 &fujitsu_bl_ops,
+                                                                 &props);
+               if (IS_ERR(fujitsu_bl->bl_device)) {
+                       ret = PTR_ERR(fujitsu_bl->bl_device);
+                       fujitsu_bl->bl_device = NULL;
                         goto fail_sysfs_group;
                 }
-               fujitsu->bl_device->props.brightness = fujitsu->brightness_level;
+               fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level;
         }
  
-       ret = platform_driver_register(&fujitsupf_driver);
+       ret = platform_driver_register(&fujitsu_pf_driver);
         if (ret)
                 goto fail_backlight;
  
-       /* Register hotkey driver */
+       /* Register laptop driver */
  
-       fujitsu_hotkey = kzalloc(sizeof(struct fujitsu_hotkey_t), GFP_KERNEL);
-       if (!fujitsu_hotkey) {
+       fujitsu_laptop = kzalloc(sizeof(struct fujitsu_laptop), GFP_KERNEL);
+       if (!fujitsu_laptop) {
                 ret = -ENOMEM;
-               goto fail_hotkey;
+               goto fail_laptop;
         }
  
-       result = acpi_bus_register_driver(&acpi_fujitsu_hotkey_driver);
-       if (result < 0) {
-               ret = -ENODEV;
-               goto fail_hotkey1;
-       }
+       ret = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver);
+       if (ret)
+               goto fail_laptop1;
  
         /* Sync backlight power status (needs FUJ02E3 device, hence deferred) */
         if (acpi_video_get_backlight_type() == acpi_backlight_vendor) {
                 if (call_fext_func(FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3)
-                       fujitsu->bl_device->props.power = FB_BLANK_POWERDOWN;
+                       fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN;
                 else
-                       fujitsu->bl_device->props.power = FB_BLANK_UNBLANK;
+                       fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK;
         }
  
         pr_info("driver " FUJITSU_DRIVER_VERSION " successfully loaded\n");
  
         return 0;
  
-fail_hotkey1:
-       kfree(fujitsu_hotkey);
-fail_hotkey:
-       platform_driver_unregister(&fujitsupf_driver);
+fail_laptop1:
+       kfree(fujitsu_laptop);
+fail_laptop:
+       platform_driver_unregister(&fujitsu_pf_driver);
  fail_backlight:
-       backlight_device_unregister(fujitsu->bl_device);
+       backlight_device_unregister(fujitsu_bl->bl_device);
  fail_sysfs_group:
-       sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
-                          &fujitsupf_attribute_group);
+       sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
+                          &fujitsu_pf_attribute_group);
  fail_platform_device2:
-       platform_device_del(fujitsu->pf_device);
+       platform_device_del(fujitsu_bl->pf_device);
  fail_platform_device1:
-       platform_device_put(fujitsu->pf_device);
+       platform_device_put(fujitsu_bl->pf_device);
  fail_platform_driver:
-       acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+       acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
  fail_acpi:
-       kfree(fujitsu);
+       kfree(fujitsu_bl);
  
         return ret;
  }
  
  static void __exit fujitsu_cleanup(void)
  {
-       acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver);
+       acpi_bus_unregister_driver(&acpi_fujitsu_laptop_driver);
  
-       kfree(fujitsu_hotkey);
+       kfree(fujitsu_laptop);
  
-       platform_driver_unregister(&fujitsupf_driver);
+       platform_driver_unregister(&fujitsu_pf_driver);
  
-       backlight_device_unregister(fujitsu->bl_device);
+       backlight_device_unregister(fujitsu_bl->bl_device);
  
-       sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
-                          &fujitsupf_attribute_group);
+       sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
+                          &fujitsu_pf_attribute_group);
  
-       platform_device_unregister(fujitsu->pf_device);
+       platform_device_unregister(fujitsu_bl->pf_device);
  
-       acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+       acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
  
-       kfree(fujitsu);
+       kfree(fujitsu_bl);
  
         pr_info("driver unloaded\n");
  }
@@ -1341,7 +1338,3 @@ MODULE_AUTHOR("Jonathan Woithe, Peter Gruber, Tony Vroon");
  MODULE_DESCRIPTION("Fujitsu laptop extras support");
  MODULE_VERSION(FUJITSU_DRIVER_VERSION);
  MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*");
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1E6:*:cvrS6420:*");
-MODULE_ALIAS("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*");
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig

index b8cacccf18c8b8ecee42963c8bbbcd2a5e7a309a..13f1714cf6f79887ce7ccd9411a6c4d5bc443820 100644 (file)
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -67,6 +67,15 @@ config POWER_RESET_BRCMSTB
           Say Y here if you have a Broadcom STB board and you wish
           to have restart support.
  
+config POWER_RESET_GEMINI_POWEROFF
+       bool "Cortina Gemini power-off driver"
+       depends on ARCH_GEMINI || COMPILE_TEST
+       depends on OF && HAS_IOMEM
+       default ARCH_GEMINI
+       help
+         This driver supports turning off the Cortina Gemini SoC.
+         Select this if you're building a kernel with Gemini SoC support.
+
  config POWER_RESET_GPIO
         bool "GPIO power-off driver"
         depends on OF_GPIO
diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile

index 11dae3b56ff941b32f500495ad8b9b768056350a..58cf5b30559f7413cd3c0e99e817bc49896e2aa3 100644 (file)
--- a/drivers/power/reset/Makefile
+++ b/drivers/power/reset/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_POWER_RESET_AT91_SAMA5D2_SHDWC) += at91-sama5d2_shdwc.o
  obj-$(CONFIG_POWER_RESET_AXXIA) += axxia-reset.o
  obj-$(CONFIG_POWER_RESET_BRCMKONA) += brcm-kona-reset.o
  obj-$(CONFIG_POWER_RESET_BRCMSTB) += brcmstb-reboot.o
+obj-$(CONFIG_POWER_RESET_GEMINI_POWEROFF) += gemini-poweroff.o
  obj-$(CONFIG_POWER_RESET_GPIO) += gpio-poweroff.o
  obj-$(CONFIG_POWER_RESET_GPIO_RESTART) += gpio-restart.o
  obj-$(CONFIG_POWER_RESET_HISI) += hisi-reboot.o
diff --git a/drivers/power/reset/gemini-poweroff.c b/drivers/power/reset/gemini-poweroff.c

new file mode 100644 (file)

index 0000000..de878fd
--- /dev/null
+++ b/drivers/power/reset/gemini-poweroff.c
@@ -0,0 +1,160 @@
+/*
+ * Gemini power management controller
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ *
+ * Inspired by code from the SL3516 board support by Jason Lee
+ * Inspired by code from Janos Laube <janos.dev@gmail.com>
+ */
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/reboot.h>
+
+#define GEMINI_PWC_ID          0x00010500
+#define        GEMINI_PWC_IDREG        0x00
+#define        GEMINI_PWC_CTRLREG      0x04
+#define        GEMINI_PWC_STATREG      0x08
+
+#define GEMINI_CTRL_SHUTDOWN   BIT(0)
+#define GEMINI_CTRL_ENABLE     BIT(1)
+#define GEMINI_CTRL_IRQ_CLR    BIT(2)
+
+#define GEMINI_STAT_CIR                BIT(4)
+#define        GEMINI_STAT_RTC         BIT(5)
+#define        GEMINI_STAT_POWERBUTTON BIT(6)
+
+struct gemini_powercon {
+        struct device           *dev;
+        void __iomem            *base;
+};
+
+static irqreturn_t gemini_powerbutton_interrupt(int irq, void *data)
+{
+       struct gemini_powercon *gpw = data;
+       u32 val;
+
+       /* ACK the IRQ */
+       val = readl(gpw->base + GEMINI_PWC_CTRLREG);
+       val |= GEMINI_CTRL_IRQ_CLR;
+       writel(val, gpw->base + GEMINI_PWC_CTRLREG);
+
+       val = readl(gpw->base + GEMINI_PWC_STATREG);
+       val &= 0x70U;
+       switch (val) {
+       case GEMINI_STAT_CIR:
+               dev_info(gpw->dev, "infrared poweroff\n");
+               orderly_poweroff(true);
+               break;
+       case GEMINI_STAT_RTC:
+               dev_info(gpw->dev, "RTC poweroff\n");
+               orderly_poweroff(true);
+               break;
+       case GEMINI_STAT_POWERBUTTON:
+               dev_info(gpw->dev, "poweroff button pressed\n");
+               orderly_poweroff(true);
+               break;
+       default:
+               dev_info(gpw->dev, "other power management IRQ\n");
+               break;
+       }
+
+       return IRQ_HANDLED;
+}
+
+/* This callback needs this static local as it has void as argument */
+static struct gemini_powercon *gpw_poweroff;
+
+static void gemini_poweroff(void)
+{
+       struct gemini_powercon *gpw = gpw_poweroff;
+       u32 val;
+
+       dev_crit(gpw->dev, "Gemini power off\n");
+       val = readl(gpw->base + GEMINI_PWC_CTRLREG);
+       val |= GEMINI_CTRL_ENABLE | GEMINI_CTRL_IRQ_CLR;
+       writel(val, gpw->base + GEMINI_PWC_CTRLREG);
+
+       val &= ~GEMINI_CTRL_ENABLE;
+       val |= GEMINI_CTRL_SHUTDOWN;
+       writel(val, gpw->base + GEMINI_PWC_CTRLREG);
+}
+
+static int gemini_poweroff_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct resource *res;
+       struct gemini_powercon *gpw;
+       u32 val;
+       int irq;
+       int ret;
+
+       gpw = devm_kzalloc(dev, sizeof(*gpw), GFP_KERNEL);
+       if (!gpw)
+               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       gpw->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(gpw->base))
+               return PTR_ERR(gpw->base);
+
+       irq = platform_get_irq(pdev, 0);
+       if (!irq)
+               return -EINVAL;
+
+       gpw->dev = dev;
+
+       val = readl(gpw->base + GEMINI_PWC_IDREG);
+       val &= 0xFFFFFF00U;
+       if (val != GEMINI_PWC_ID) {
+               dev_err(dev, "wrong power controller ID: %08x\n",
+                       val);
+               return -ENODEV;
+       }
+
+       /* Clear the power management IRQ */
+       val = readl(gpw->base + GEMINI_PWC_CTRLREG);
+       val |= GEMINI_CTRL_IRQ_CLR;
+       writel(val, gpw->base + GEMINI_PWC_CTRLREG);
+
+       ret = devm_request_irq(dev, irq, gemini_powerbutton_interrupt, 0,
+                              "poweroff", gpw);
+       if (ret)
+               return ret;
+
+       pm_power_off = gemini_poweroff;
+       gpw_poweroff = gpw;
+
+       /*
+        * Enable the power controller. This is crucial on Gemini
+        * systems: if this is not done, pressing the power button
+        * will result in unconditional poweroff without any warning.
+        * This makes the kernel handle the poweroff.
+        */
+       val = readl(gpw->base + GEMINI_PWC_CTRLREG);
+       val |= GEMINI_CTRL_ENABLE;
+       writel(val, gpw->base + GEMINI_PWC_CTRLREG);
+
+       dev_info(dev, "Gemini poweroff driver registered\n");
+
+       return 0;
+}
+
+static const struct of_device_id gemini_poweroff_of_match[] = {
+       {
+               .compatible = "cortina,gemini-power-controller",
+       },
+       {}
+};
+
+static struct platform_driver gemini_poweroff_driver = {
+       .probe = gemini_poweroff_probe,
+       .driver = {
+               .name = "gemini-poweroff",
+               .of_match_table = gemini_poweroff_of_match,
+       },
+};
+builtin_platform_driver(gemini_poweroff_driver);
diff --git a/drivers/power/reset/syscon-poweroff.c b/drivers/power/reset/syscon-poweroff.c

index b68338399e5e1d1de2386179461d50834de94c59..f9f1cb54fbf9f2663b1f94d85427ede2a2bad5d6 100644 (file)
--- a/drivers/power/reset/syscon-poweroff.c
+++ b/drivers/power/reset/syscon-poweroff.c
@@ -28,12 +28,13 @@
  
  static struct regmap *map;
  static u32 offset;
+static u32 value;
  static u32 mask;
  
  static void syscon_poweroff(void)
  {
         /* Issue the poweroff */
-       regmap_write(map, offset, mask);
+       regmap_update_bits(map, offset, mask, value);
  
         mdelay(1000);
  
@@ -43,6 +44,7 @@ static void syscon_poweroff(void)
  static int syscon_poweroff_probe(struct platform_device *pdev)
  {
         char symname[KSYM_NAME_LEN];
+       int mask_err, value_err;
  
         map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "regmap");
         if (IS_ERR(map)) {
@@ -55,11 +57,22 @@ static int syscon_poweroff_probe(struct platform_device *pdev)
                 return -EINVAL;
         }
  
-       if (of_property_read_u32(pdev->dev.of_node, "mask", &mask)) {
-               dev_err(&pdev->dev, "unable to read 'mask'");
+       value_err = of_property_read_u32(pdev->dev.of_node, "value", &value);
+       mask_err = of_property_read_u32(pdev->dev.of_node, "mask", &mask);
+       if (value_err && mask_err) {
+               dev_err(&pdev->dev, "unable to read 'value' and 'mask'");
                 return -EINVAL;
         }
  
+       if (value_err) {
+               /* support old binding */
+               value = mask;
+               mask = 0xFFFFFFFF;
+       } else if (mask_err) {
+               /* support value without mask*/
+               mask = 0xFFFFFFFF;
+       }
+
         if (pm_power_off) {
                 lookup_symbol_name((ulong)pm_power_off, symname);
                 dev_err(&pdev->dev,
diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig

index da54ac88f068977965bec59fce129ecd0de7fc1b..da922756149fd7e2490f3e786e27f02f6d68dde4 100644 (file)
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -117,6 +117,12 @@ config BATTERY_DS2782
           Say Y here to enable support for the DS2782/DS2786 standalone battery
           gas-gauge.
  
+config BATTERY_LEGO_EV3
+       tristate "LEGO MINDSTORMS EV3 battery"
+       depends on OF && IIO && GPIOLIB
+       help
+         Say Y here to enable support for the LEGO MINDSTORMS EV3 battery.
+
  config BATTERY_PMU
         tristate "Apple PMU battery"
         depends on PPC32 && ADB_PMU
@@ -317,6 +323,14 @@ config BATTERY_RX51
           Say Y here to enable support for battery information on Nokia
           RX-51, also known as N900 tablet.
  
+config CHARGER_CPCAP
+       tristate "CPCAP PMIC Charger Driver"
+       depends on MFD_CPCAP && IIO
+       default MFD_CPCAP
+       help
+         Say Y to enable support for CPCAP PMIC charger driver for Motorola
+         mobile devices such as Droid 4.
+
  config CHARGER_ISP1704
         tristate "ISP1704 USB Charger Detection"
         depends on USB_PHY
@@ -438,6 +452,7 @@ config CHARGER_BQ2415X
  config CHARGER_BQ24190
         tristate "TI BQ24190 battery charger driver"
         depends on I2C
+       depends on EXTCON
         depends on GPIOLIB || COMPILE_TEST
         help
           Say Y to enable support for the TI BQ24190 battery charger.
diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile

index 3789a2c06fdf92dfc7d0b74bb65fc8221fe1ca71..39fc733e6cc4dc2dc7fde75f3525ed3c2aebeacd 100644 (file)
--- a/drivers/power/supply/Makefile
+++ b/drivers/power/supply/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_BATTERY_DS2781)  += ds2781_battery.o
  obj-$(CONFIG_BATTERY_DS2782)   += ds2782_battery.o
  obj-$(CONFIG_BATTERY_GAUGE_LTC2941)    += ltc2941-battery-gauge.o
  obj-$(CONFIG_BATTERY_GOLDFISH) += goldfish_battery.o
+obj-$(CONFIG_BATTERY_LEGO_EV3) += lego_ev3_battery.o
  obj-$(CONFIG_BATTERY_PMU)      += pmu_battery.o
  obj-$(CONFIG_BATTERY_OLPC)     += olpc_battery.o
  obj-$(CONFIG_BATTERY_TOSA)     += tosa_battery.o
@@ -51,6 +52,7 @@ obj-$(CONFIG_CHARGER_PCF50633)        += pcf50633-charger.o
  obj-$(CONFIG_BATTERY_JZ4740)   += jz4740-battery.o
  obj-$(CONFIG_BATTERY_RX51)     += rx51_battery.o
  obj-$(CONFIG_AB8500_BM)                += ab8500_bmdata.o ab8500_charger.o ab8500_fg.o ab8500_btemp.o abx500_chargalg.o pm2301_charger.o
+obj-$(CONFIG_CHARGER_CPCAP)    += cpcap-charger.o
  obj-$(CONFIG_CHARGER_ISP1704)  += isp1704_charger.o
  obj-$(CONFIG_CHARGER_MAX8903)  += max8903_charger.o
  obj-$(CONFIG_CHARGER_TWL4030)  += twl4030_charger.o
diff --git a/drivers/power/supply/ab8500_bmdata.c b/drivers/power/supply/ab8500_bmdata.c

index d2986453309348ce8a5fa832c694b27406e89456..8c49586015d01af18edee56c2f810c2fb64d42d9 100644 (file)
--- a/drivers/power/supply/ab8500_bmdata.c
+++ b/drivers/power/supply/ab8500_bmdata.c
@@ -430,10 +430,10 @@ static const struct abx500_maxim_parameters ab8500_maxi_params = {
  };
  
  static const struct abx500_maxim_parameters abx540_maxi_params = {
-        .ena_maxi = true,
-        .chg_curr = 3000,
-        .wait_cycles = 10,
-        .charger_curr_step = 200,
+       .ena_maxi = true,
+       .chg_curr = 3000,
+       .wait_cycles = 10,
+       .charger_curr_step = 200,
  };
  
  static const struct abx500_bm_charger_parameters chg = {
diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c

index a4f08492abebfb1769847db73b1b4f4c546bf6b5..bd9e5c3d8cc2880678f366fa0ec10c130f5c4e43 100644 (file)
--- a/drivers/power/supply/bq24190_charger.c
+++ b/drivers/power/supply/bq24190_charger.c
@@ -11,16 +11,15 @@
  #include <linux/module.h>
  #include <linux/interrupt.h>
  #include <linux/delay.h>
+#include <linux/extcon.h>
  #include <linux/of_irq.h>
  #include <linux/of_device.h>
  #include <linux/pm_runtime.h>
  #include <linux/power_supply.h>
+#include <linux/workqueue.h>
  #include <linux/gpio.h>
  #include <linux/i2c.h>
  
-#include <linux/power/bq24190_charger.h>
-
-
  #define        BQ24190_MANUFACTURER    "Texas Instruments"
  
  #define BQ24190_REG_ISC                0x00 /* Input Source Control */
@@ -39,6 +38,9 @@
  #define BQ24190_REG_POC_WDT_RESET_SHIFT                6
  #define BQ24190_REG_POC_CHG_CONFIG_MASK                (BIT(5) | BIT(4))
  #define BQ24190_REG_POC_CHG_CONFIG_SHIFT       4
+#define BQ24190_REG_POC_CHG_CONFIG_DISABLE             0x0
+#define BQ24190_REG_POC_CHG_CONFIG_CHARGE              0x1
+#define BQ24190_REG_POC_CHG_CONFIG_OTG                 0x2
  #define BQ24190_REG_POC_SYS_MIN_MASK           (BIT(3) | BIT(2) | BIT(1))
  #define BQ24190_REG_POC_SYS_MIN_SHIFT          1
  #define BQ24190_REG_POC_BOOST_LIM_MASK         BIT(0)
@@ -151,10 +153,12 @@ struct bq24190_dev_info {
         struct device                   *dev;
         struct power_supply             *charger;
         struct power_supply             *battery;
+       struct extcon_dev               *extcon;
+       struct notifier_block           extcon_nb;
+       struct delayed_work             extcon_work;
         char                            model_name[I2C_NAME_SIZE];
-       kernel_ulong_t                  model;
-       unsigned int                    gpio_int;
-       unsigned int                    irq;
+       bool                            initialized;
+       bool                            irq_event;
         struct mutex                    f_reg_lock;
         u8                              f_reg;
         u8                              ss_reg;
@@ -168,6 +172,12 @@ struct bq24190_dev_info {
   * number at that index in the array is the real-world value that it
   * represents.
   */
+
+/* REG00[2:0] (IINLIM) in uAh */
+static const int bq24190_isc_iinlim_values[] = {
+        100000,  150000,  500000,  900000, 1200000, 1500000, 2000000, 3000000
+};
+
  /* REG02[7:2] (ICHG) in uAh */
  static const int bq24190_ccc_ichg_values[] = {
          512000,  576000,  640000,  704000,  768000,  832000,  896000,  960000,
@@ -418,6 +428,7 @@ static ssize_t bq24190_sysfs_show(struct device *dev,
         struct power_supply *psy = dev_get_drvdata(dev);
         struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
         struct bq24190_sysfs_field_info *info;
+       ssize_t count;
         int ret;
         u8 v;
  
@@ -425,11 +436,20 @@ static ssize_t bq24190_sysfs_show(struct device *dev,
         if (!info)
                 return -EINVAL;
  
+       ret = pm_runtime_get_sync(bdi->dev);
+       if (ret < 0)
+               return ret;
+
         ret = bq24190_read_mask(bdi, info->reg, info->mask, info->shift, &v);
         if (ret)
-               return ret;
+               count = ret;
+       else
+               count = scnprintf(buf, PAGE_SIZE, "%hhx\n", v);
+
+       pm_runtime_mark_last_busy(bdi->dev);
+       pm_runtime_put_autosuspend(bdi->dev);
  
-       return scnprintf(buf, PAGE_SIZE, "%hhx\n", v);
+       return count;
  }
  
  static ssize_t bq24190_sysfs_store(struct device *dev,
@@ -449,9 +469,16 @@ static ssize_t bq24190_sysfs_store(struct device *dev,
         if (ret < 0)
                 return ret;
  
+       ret = pm_runtime_get_sync(bdi->dev);
+       if (ret < 0)
+               return ret;
+
         ret = bq24190_write_mask(bdi, info->reg, info->mask, info->shift, v);
         if (ret)
-               return ret;
+               count = ret;
+
+       pm_runtime_mark_last_busy(bdi->dev);
+       pm_runtime_put_autosuspend(bdi->dev);
  
         return count;
  }
@@ -523,16 +550,13 @@ static int bq24190_register_reset(struct bq24190_dev_info *bdi)
                 if (ret < 0)
                         return ret;
  
-               if (!v)
-                       break;
+               if (v == 0)
+                       return 0;
  
-               udelay(10);
+               usleep_range(100, 200);
         } while (--limit);
  
-       if (!limit)
-               return -EIO;
-
-       return 0;
+       return -EIO;
  }
  
  /* Charger power supply property routines */
@@ -793,7 +817,9 @@ static int bq24190_charger_get_property(struct power_supply *psy,
  
         dev_dbg(bdi->dev, "prop: %d\n", psp);
  
-       pm_runtime_get_sync(bdi->dev);
+       ret = pm_runtime_get_sync(bdi->dev);
+       if (ret < 0)
+               return ret;
  
         switch (psp) {
         case POWER_SUPPLY_PROP_CHARGE_TYPE:
@@ -833,7 +859,9 @@ static int bq24190_charger_get_property(struct power_supply *psy,
                 ret = -ENODATA;
         }
  
-       pm_runtime_put_sync(bdi->dev);
+       pm_runtime_mark_last_busy(bdi->dev);
+       pm_runtime_put_autosuspend(bdi->dev);
+
         return ret;
  }
  
@@ -846,7 +874,9 @@ static int bq24190_charger_set_property(struct power_supply *psy,
  
         dev_dbg(bdi->dev, "prop: %d\n", psp);
  
-       pm_runtime_get_sync(bdi->dev);
+       ret = pm_runtime_get_sync(bdi->dev);
+       if (ret < 0)
+               return ret;
  
         switch (psp) {
         case POWER_SUPPLY_PROP_CHARGE_TYPE:
@@ -862,7 +892,9 @@ static int bq24190_charger_set_property(struct power_supply *psy,
                 ret = -EINVAL;
         }
  
-       pm_runtime_put_sync(bdi->dev);
+       pm_runtime_mark_last_busy(bdi->dev);
+       pm_runtime_put_autosuspend(bdi->dev);
+
         return ret;
  }
  
@@ -1063,7 +1095,9 @@ static int bq24190_battery_get_property(struct power_supply *psy,
  
         dev_dbg(bdi->dev, "prop: %d\n", psp);
  
-       pm_runtime_get_sync(bdi->dev);
+       ret = pm_runtime_get_sync(bdi->dev);
+       if (ret < 0)
+               return ret;
  
         switch (psp) {
         case POWER_SUPPLY_PROP_STATUS:
@@ -1091,7 +1125,9 @@ static int bq24190_battery_get_property(struct power_supply *psy,
                 ret = -ENODATA;
         }
  
-       pm_runtime_put_sync(bdi->dev);
+       pm_runtime_mark_last_busy(bdi->dev);
+       pm_runtime_put_autosuspend(bdi->dev);
+
         return ret;
  }
  
@@ -1104,7 +1140,9 @@ static int bq24190_battery_set_property(struct power_supply *psy,
  
         dev_dbg(bdi->dev, "prop: %d\n", psp);
  
-       pm_runtime_get_sync(bdi->dev);
+       ret = pm_runtime_get_sync(bdi->dev);
+       if (ret < 0)
+               return ret;
  
         switch (psp) {
         case POWER_SUPPLY_PROP_ONLINE:
@@ -1117,7 +1155,9 @@ static int bq24190_battery_set_property(struct power_supply *psy,
                 ret = -EINVAL;
         }
  
-       pm_runtime_put_sync(bdi->dev);
+       pm_runtime_mark_last_busy(bdi->dev);
+       pm_runtime_put_autosuspend(bdi->dev);
+
         return ret;
  }
  
@@ -1157,9 +1197,8 @@ static const struct power_supply_desc bq24190_battery_desc = {
         .property_is_writeable  = bq24190_battery_property_is_writeable,
  };
  
-static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
+static void bq24190_check_status(struct bq24190_dev_info *bdi)
  {
-       struct bq24190_dev_info *bdi = data;
         const u8 battery_mask_ss = BQ24190_REG_SS_CHRG_STAT_MASK;
         const u8 battery_mask_f = BQ24190_REG_F_BAT_FAULT_MASK
                                 | BQ24190_REG_F_NTC_FAULT_MASK;
@@ -1167,12 +1206,10 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
         u8 ss_reg = 0, f_reg = 0;
         int i, ret;
  
-       pm_runtime_get_sync(bdi->dev);
-
         ret = bq24190_read(bdi, BQ24190_REG_SS, &ss_reg);
         if (ret < 0) {
                 dev_err(bdi->dev, "Can't read SS reg: %d\n", ret);
-               goto out;
+               return;
         }
  
         i = 0;
@@ -1180,12 +1217,17 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
                 ret = bq24190_read(bdi, BQ24190_REG_F, &f_reg);
                 if (ret < 0) {
                         dev_err(bdi->dev, "Can't read F reg: %d\n", ret);
-                       goto out;
+                       return;
                 }
         } while (f_reg && ++i < 2);
  
+       /* ignore over/under voltage fault after disconnect */
+       if (f_reg == (1 << BQ24190_REG_F_CHRG_FAULT_SHIFT) &&
+           !(ss_reg & BQ24190_REG_SS_PG_STAT_MASK))
+               f_reg = 0;
+
         if (f_reg != bdi->f_reg) {
-               dev_info(bdi->dev,
+               dev_warn(bdi->dev,
                         "Fault: boost %d, charge %d, battery %d, ntc %d\n",
                         !!(f_reg & BQ24190_REG_F_BOOST_FAULT_MASK),
                         !!(f_reg & BQ24190_REG_F_CHRG_FAULT_MASK),
@@ -1229,90 +1271,126 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
         if (alert_battery)
                 power_supply_changed(bdi->battery);
  
-out:
-       pm_runtime_put_sync(bdi->dev);
-
         dev_dbg(bdi->dev, "ss_reg: 0x%02x, f_reg: 0x%02x\n", ss_reg, f_reg);
+}
+
+static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
+{
+       struct bq24190_dev_info *bdi = data;
+       int error;
+
+       bdi->irq_event = true;
+       error = pm_runtime_get_sync(bdi->dev);
+       if (error < 0) {
+               dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+               pm_runtime_put_noidle(bdi->dev);
+               return IRQ_NONE;
+       }
+       bq24190_check_status(bdi);
+       pm_runtime_mark_last_busy(bdi->dev);
+       pm_runtime_put_autosuspend(bdi->dev);
+       bdi->irq_event = false;
  
         return IRQ_HANDLED;
  }
  
-static int bq24190_hw_init(struct bq24190_dev_info *bdi)
+static void bq24190_extcon_work(struct work_struct *work)
  {
+       struct bq24190_dev_info *bdi =
+               container_of(work, struct bq24190_dev_info, extcon_work.work);
+       int error, iinlim = 0;
         u8 v;
-       int ret;
-
-       pm_runtime_get_sync(bdi->dev);
  
-       /* First check that the device really is what its supposed to be */
-       ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS,
-                       BQ24190_REG_VPRS_PN_MASK,
-                       BQ24190_REG_VPRS_PN_SHIFT,
-                       &v);
-       if (ret < 0)
-               goto out;
+       error = pm_runtime_get_sync(bdi->dev);
+       if (error < 0) {
+               dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+               pm_runtime_put_noidle(bdi->dev);
+               return;
+       }
  
-       if (v != bdi->model) {
-               ret = -ENODEV;
-               goto out;
+       if      (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_SDP) == 1)
+               iinlim =  500000;
+       else if (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_CDP) == 1 ||
+                extcon_get_state(bdi->extcon, EXTCON_CHG_USB_ACA) == 1)
+               iinlim = 1500000;
+       else if (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_DCP) == 1)
+               iinlim = 2000000;
+
+       if (iinlim) {
+               error = bq24190_set_field_val(bdi, BQ24190_REG_ISC,
+                                             BQ24190_REG_ISC_IINLIM_MASK,
+                                             BQ24190_REG_ISC_IINLIM_SHIFT,
+                                             bq24190_isc_iinlim_values,
+                                             ARRAY_SIZE(bq24190_isc_iinlim_values),
+                                             iinlim);
+               if (error < 0)
+                       dev_err(bdi->dev, "Can't set IINLIM: %d\n", error);
         }
  
-       ret = bq24190_register_reset(bdi);
-       if (ret < 0)
-               goto out;
+       /* if no charger found and in USB host mode, set OTG 5V boost, else normal */
+       if (!iinlim && extcon_get_state(bdi->extcon, EXTCON_USB_HOST) == 1)
+               v = BQ24190_REG_POC_CHG_CONFIG_OTG;
+       else
+               v = BQ24190_REG_POC_CHG_CONFIG_CHARGE;
  
-       ret = bq24190_set_mode_host(bdi);
-       if (ret < 0)
-               goto out;
+       error = bq24190_write_mask(bdi, BQ24190_REG_POC,
+                                  BQ24190_REG_POC_CHG_CONFIG_MASK,
+                                  BQ24190_REG_POC_CHG_CONFIG_SHIFT,
+                                  v);
+       if (error < 0)
+               dev_err(bdi->dev, "Can't set CHG_CONFIG: %d\n", error);
  
-       ret = bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg);
-out:
-       pm_runtime_put_sync(bdi->dev);
-       return ret;
+       pm_runtime_mark_last_busy(bdi->dev);
+       pm_runtime_put_autosuspend(bdi->dev);
  }
  
-#ifdef CONFIG_OF
-static int bq24190_setup_dt(struct bq24190_dev_info *bdi)
+static int bq24190_extcon_event(struct notifier_block *nb, unsigned long event,
+                               void *param)
  {
-       bdi->irq = irq_of_parse_and_map(bdi->dev->of_node, 0);
-       if (bdi->irq <= 0)
-               return -1;
+       struct bq24190_dev_info *bdi =
+               container_of(nb, struct bq24190_dev_info, extcon_nb);
  
-       return 0;
-}
-#else
-static int bq24190_setup_dt(struct bq24190_dev_info *bdi)
-{
-       return -1;
+       /*
+        * The Power-Good detection may take up to 220ms, sometimes
+        * the external charger detection is quicker, and the bq24190 will
+        * reset to iinlim based on its own charger detection (which is not
+        * hooked up when using external charger detection) resulting in
+        * a too low default 500mA iinlim. Delay applying the extcon value
+        * for 300ms to avoid this.
+        */
+       queue_delayed_work(system_wq, &bdi->extcon_work, msecs_to_jiffies(300));
+
+       return NOTIFY_OK;
  }
-#endif
  
-static int bq24190_setup_pdata(struct bq24190_dev_info *bdi,
-               struct bq24190_platform_data *pdata)
+static int bq24190_hw_init(struct bq24190_dev_info *bdi)
  {
+       u8 v;
         int ret;
  
-       if (!gpio_is_valid(pdata->gpio_int))
-               return -1;
-
-       ret = gpio_request(pdata->gpio_int, dev_name(bdi->dev));
+       /* First check that the device really is what its supposed to be */
+       ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS,
+                       BQ24190_REG_VPRS_PN_MASK,
+                       BQ24190_REG_VPRS_PN_SHIFT,
+                       &v);
         if (ret < 0)
-               return -1;
+               return ret;
  
-       ret = gpio_direction_input(pdata->gpio_int);
-       if (ret < 0)
-               goto out;
+       if (v != BQ24190_REG_VPRS_PN_24190 &&
+           v != BQ24190_REG_VPRS_PN_24192I) {
+               dev_err(bdi->dev, "Error unknown model: 0x%02x\n", v);
+               return -ENODEV;
+       }
  
-       bdi->irq = gpio_to_irq(pdata->gpio_int);
-       if (!bdi->irq)
-               goto out;
+       ret = bq24190_register_reset(bdi);
+       if (ret < 0)
+               return ret;
  
-       bdi->gpio_int = pdata->gpio_int;
-       return 0;
+       ret = bq24190_set_mode_host(bdi);
+       if (ret < 0)
+               return ret;
  
-out:
-       gpio_free(pdata->gpio_int);
-       return -1;
+       return bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg);
  }
  
  static int bq24190_probe(struct i2c_client *client,
@@ -1320,9 +1398,9 @@ static int bq24190_probe(struct i2c_client *client,
  {
         struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
         struct device *dev = &client->dev;
-       struct bq24190_platform_data *pdata = client->dev.platform_data;
         struct power_supply_config charger_cfg = {}, battery_cfg = {};
         struct bq24190_dev_info *bdi;
+       const char *name;
         int ret;
  
         if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) {
@@ -1338,7 +1416,6 @@ static int bq24190_probe(struct i2c_client *client,
  
         bdi->client = client;
         bdi->dev = dev;
-       bdi->model = id->driver_data;
         strncpy(bdi->model_name, id->name, I2C_NAME_SIZE);
         mutex_init(&bdi->f_reg_lock);
         bdi->f_reg = 0;
@@ -1346,23 +1423,43 @@ static int bq24190_probe(struct i2c_client *client,
  
         i2c_set_clientdata(client, bdi);
  
-       if (dev->of_node)
-               ret = bq24190_setup_dt(bdi);
-       else
-               ret = bq24190_setup_pdata(bdi, pdata);
-
-       if (ret) {
+       if (!client->irq) {
                 dev_err(dev, "Can't get irq info\n");
                 return -EINVAL;
         }
  
+       /*
+        * Devicetree platforms should get extcon via phandle (not yet supported).
+        * On ACPI platforms, extcon clients may invoke us with:
+        * struct property_entry pe[] =
+        *   { PROPERTY_ENTRY_STRING("extcon-name", client_name), ... };
+        * struct i2c_board_info bi =
+        *   { .type = "bq24190", .addr = 0x6b, .properties = pe, .irq = irq };
+        * struct i2c_adapter ad = { ... };
+        * i2c_add_adapter(&ad);
+        * i2c_new_device(&ad, &bi);
+        */
+       if (device_property_read_string(dev, "extcon-name", &name) == 0) {
+               bdi->extcon = extcon_get_extcon_dev(name);
+               if (!bdi->extcon)
+                       return -EPROBE_DEFER;
+
+               dev_info(bdi->dev, "using extcon device %s\n", name);
+       }
+
         pm_runtime_enable(dev);
-       pm_runtime_resume(dev);
+       pm_runtime_use_autosuspend(dev);
+       pm_runtime_set_autosuspend_delay(dev, 600);
+       ret = pm_runtime_get_sync(dev);
+       if (ret < 0) {
+               dev_err(dev, "pm_runtime_get failed: %i\n", ret);
+               goto out_pmrt;
+       }
  
         ret = bq24190_hw_init(bdi);
         if (ret < 0) {
                 dev_err(dev, "Hardware init failed\n");
-               goto out1;
+               goto out_pmrt;
         }
  
         charger_cfg.drv_data = bdi;
@@ -1373,7 +1470,7 @@ static int bq24190_probe(struct i2c_client *client,
         if (IS_ERR(bdi->charger)) {
                 dev_err(dev, "Can't register charger\n");
                 ret = PTR_ERR(bdi->charger);
-               goto out1;
+               goto out_pmrt;
         }
  
         battery_cfg.drv_data = bdi;
@@ -1382,87 +1479,160 @@ static int bq24190_probe(struct i2c_client *client,
         if (IS_ERR(bdi->battery)) {
                 dev_err(dev, "Can't register battery\n");
                 ret = PTR_ERR(bdi->battery);
-               goto out2;
+               goto out_charger;
         }
  
         ret = bq24190_sysfs_create_group(bdi);
         if (ret) {
                 dev_err(dev, "Can't create sysfs entries\n");
-               goto out3;
+               goto out_battery;
         }
  
-       ret = devm_request_threaded_irq(dev, bdi->irq, NULL,
+       bdi->initialized = true;
+
+       ret = devm_request_threaded_irq(dev, client->irq, NULL,
                         bq24190_irq_handler_thread,
                         IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
                         "bq24190-charger", bdi);
         if (ret < 0) {
                 dev_err(dev, "Can't set up irq handler\n");
-               goto out4;
+               goto out_sysfs;
+       }
+
+       if (bdi->extcon) {
+               INIT_DELAYED_WORK(&bdi->extcon_work, bq24190_extcon_work);
+               bdi->extcon_nb.notifier_call = bq24190_extcon_event;
+               ret = devm_extcon_register_notifier_all(dev, bdi->extcon,
+                                                       &bdi->extcon_nb);
+               if (ret) {
+                       dev_err(dev, "Can't register extcon\n");
+                       goto out_sysfs;
+               }
+
+               /* Sync initial cable state */
+               queue_delayed_work(system_wq, &bdi->extcon_work, 0);
         }
  
+       enable_irq_wake(client->irq);
+
+       pm_runtime_mark_last_busy(dev);
+       pm_runtime_put_autosuspend(dev);
+
         return 0;
  
-out4:
+out_sysfs:
         bq24190_sysfs_remove_group(bdi);
  
-out3:
+out_battery:
         power_supply_unregister(bdi->battery);
  
-out2:
+out_charger:
         power_supply_unregister(bdi->charger);
  
-out1:
+out_pmrt:
+       pm_runtime_put_sync(dev);
+       pm_runtime_dont_use_autosuspend(dev);
         pm_runtime_disable(dev);
-       if (bdi->gpio_int)
-               gpio_free(bdi->gpio_int);
         return ret;
  }
  
  static int bq24190_remove(struct i2c_client *client)
  {
         struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+       int error;
  
-       pm_runtime_get_sync(bdi->dev);
-       bq24190_register_reset(bdi);
-       pm_runtime_put_sync(bdi->dev);
+       error = pm_runtime_get_sync(bdi->dev);
+       if (error < 0) {
+               dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+               pm_runtime_put_noidle(bdi->dev);
+       }
  
+       bq24190_register_reset(bdi);
         bq24190_sysfs_remove_group(bdi);
         power_supply_unregister(bdi->battery);
         power_supply_unregister(bdi->charger);
+       if (error >= 0)
+               pm_runtime_put_sync(bdi->dev);
+       pm_runtime_dont_use_autosuspend(bdi->dev);
         pm_runtime_disable(bdi->dev);
  
-       if (bdi->gpio_int)
-               gpio_free(bdi->gpio_int);
+       return 0;
+}
+
+static __maybe_unused int bq24190_runtime_suspend(struct device *dev)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+
+       if (!bdi->initialized)
+               return 0;
+
+       dev_dbg(bdi->dev, "%s\n", __func__);
+
+       return 0;
+}
+
+static __maybe_unused int bq24190_runtime_resume(struct device *dev)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+
+       if (!bdi->initialized)
+               return 0;
+
+       if (!bdi->irq_event) {
+               dev_dbg(bdi->dev, "checking events on possible wakeirq\n");
+               bq24190_check_status(bdi);
+       }
  
         return 0;
  }
  
-#ifdef CONFIG_PM_SLEEP
-static int bq24190_pm_suspend(struct device *dev)
+static __maybe_unused int bq24190_pm_suspend(struct device *dev)
  {
         struct i2c_client *client = to_i2c_client(dev);
         struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+       int error;
+
+       error = pm_runtime_get_sync(bdi->dev);
+       if (error < 0) {
+               dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+               pm_runtime_put_noidle(bdi->dev);
+       }
  
-       pm_runtime_get_sync(bdi->dev);
         bq24190_register_reset(bdi);
-       pm_runtime_put_sync(bdi->dev);
+
+       if (error >= 0) {
+               pm_runtime_mark_last_busy(bdi->dev);
+               pm_runtime_put_autosuspend(bdi->dev);
+       }
  
         return 0;
  }
  
-static int bq24190_pm_resume(struct device *dev)
+static __maybe_unused int bq24190_pm_resume(struct device *dev)
  {
         struct i2c_client *client = to_i2c_client(dev);
         struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+       int error;
  
         bdi->f_reg = 0;
         bdi->ss_reg = BQ24190_REG_SS_VBUS_STAT_MASK; /* impossible state */
  
-       pm_runtime_get_sync(bdi->dev);
+       error = pm_runtime_get_sync(bdi->dev);
+       if (error < 0) {
+               dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+               pm_runtime_put_noidle(bdi->dev);
+       }
+
         bq24190_register_reset(bdi);
         bq24190_set_mode_host(bdi);
         bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg);
-       pm_runtime_put_sync(bdi->dev);
+
+       if (error >= 0) {
+               pm_runtime_mark_last_busy(bdi->dev);
+               pm_runtime_put_autosuspend(bdi->dev);
+       }
  
         /* Things may have changed while suspended so alert upper layer */
         power_supply_changed(bdi->charger);
@@ -1470,17 +1640,16 @@ static int bq24190_pm_resume(struct device *dev)
  
         return 0;
  }
-#endif
  
-static SIMPLE_DEV_PM_OPS(bq24190_pm_ops, bq24190_pm_suspend, bq24190_pm_resume);
+static const struct dev_pm_ops bq24190_pm_ops = {
+       SET_RUNTIME_PM_OPS(bq24190_runtime_suspend, bq24190_runtime_resume,
+                          NULL)
+       SET_SYSTEM_SLEEP_PM_OPS(bq24190_pm_suspend, bq24190_pm_resume)
+};
  
-/*
- * Only support the bq24190 right now.  The bq24192, bq24192i, and bq24193
- * are similar but not identical so the driver needs to be extended to
- * support them.
- */
  static const struct i2c_device_id bq24190_i2c_ids[] = {
-       { "bq24190", BQ24190_REG_VPRS_PN_24190 },
+       { "bq24190" },
+       { "bq24192i" },
         { },
  };
  MODULE_DEVICE_TABLE(i2c, bq24190_i2c_ids);
diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c

index f993a55cde20f34a00d60d46b762e37ecef7af64..8e2c41ded171cebe0946cb3cbca219e992a32ac8 100644 (file)
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -723,7 +723,7 @@ static int bq25890_irq_probe(struct bq25890_device *bq)
  {
         struct gpio_desc *irq;
  
-       irq = devm_gpiod_get_index(bq->dev, BQ25890_IRQ_PIN, 0, GPIOD_IN);
+       irq = devm_gpiod_get(bq->dev, BQ25890_IRQ_PIN, GPIOD_IN);
         if (IS_ERR(irq)) {
                 dev_err(bq->dev, "Could not probe irq pin.\n");
                 return PTR_ERR(irq);
diff --git a/drivers/power/supply/charger-manager.c b/drivers/power/supply/charger-manager.c

index e664ca7c0afd7c6136e7f8618551e26784fe7f47..adc3761831e1b433b2f68617892595f06107cea9 100644 (file)
--- a/drivers/power/supply/charger-manager.c
+++ b/drivers/power/supply/charger-manager.c
@@ -1198,7 +1198,7 @@ static int charger_extcon_notifier(struct notifier_block *self,
  static int charger_extcon_init(struct charger_manager *cm,
                 struct charger_cable *cable)
  {
-       int ret = 0;
+       int ret;
  
         /*
          * Charger manager use Extcon framework to identify
@@ -1232,7 +1232,7 @@ static int charger_manager_register_extcon(struct charger_manager *cm)
  {
         struct charger_desc *desc = cm->desc;
         struct charger_regulator *charger;
-       int ret = 0;
+       int ret;
         int i;
         int j;
  
@@ -1255,15 +1255,14 @@ static int charger_manager_register_extcon(struct charger_manager *cm)
                         if (ret < 0) {
                                 dev_err(cm->dev, "Cannot initialize charger(%s)\n",
                                         charger->regulator_name);
-                               goto err;
+                               return ret;
                         }
                         cable->charger = charger;
                         cable->cm = cm;
                 }
         }
  
-err:
-       return ret;
+       return 0;
  }
  
  /* help function of sysfs node to control charger(regulator) */
@@ -1372,7 +1371,7 @@ static int charger_manager_register_sysfs(struct charger_manager *cm)
         int chargers_externally_control = 1;
         char buf[11];
         char *str;
-       int ret = 0;
+       int ret;
         int i;
  
         /* Create sysfs entry to control charger(regulator) */
@@ -1382,10 +1381,9 @@ static int charger_manager_register_sysfs(struct charger_manager *cm)
                 snprintf(buf, 10, "charger.%d", i);
                 str = devm_kzalloc(cm->dev,
                                 sizeof(char) * (strlen(buf) + 1), GFP_KERNEL);
-               if (!str) {
-                       ret = -ENOMEM;
-                       goto err;
-               }
+               if (!str)
+                       return -ENOMEM;
+
                 strcpy(str, buf);
  
                 charger->attrs[0] = &charger->attr_name.attr;
@@ -1426,19 +1424,16 @@ static int charger_manager_register_sysfs(struct charger_manager *cm)
                 if (ret < 0) {
                         dev_err(cm->dev, "Cannot create sysfs entry of %s regulator\n",
                                 charger->regulator_name);
-                       ret = -EINVAL;
-                       goto err;
+                       return ret;
                 }
         }
  
         if (chargers_externally_control) {
                 dev_err(cm->dev, "Cannot register regulator because charger-manager must need at least one charger for charging battery\n");
-               ret = -EINVAL;
-               goto err;
+               return -EINVAL;
         }
  
-err:
-       return ret;
+       return 0;
  }
  
  static int cm_init_thermal_data(struct charger_manager *cm,
@@ -1626,7 +1621,7 @@ static int charger_manager_probe(struct platform_device *pdev)
  {
         struct charger_desc *desc = cm_get_drv_data(pdev);
         struct charger_manager *cm;
-       int ret = 0, i = 0;
+       int ret, i = 0;
         int j = 0;
         union power_supply_propval val;
         struct power_supply *fuel_gauge;
@@ -1887,14 +1882,12 @@ MODULE_DEVICE_TABLE(platform, charger_manager_id);
  
  static int cm_suspend_noirq(struct device *dev)
  {
-       int ret = 0;
-
         if (device_may_wakeup(dev)) {
                 device_set_wakeup_capable(dev, false);
-               ret = -EAGAIN;
+               return -EAGAIN;
         }
  
-       return ret;
+       return 0;
  }
  
  static bool cm_need_to_awake(void)
diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c

new file mode 100644 (file)

index 0000000..543a1bd
--- /dev/null
+++ b/drivers/power/supply/cpcap-charger.c
@@ -0,0 +1,681 @@
+/*
+ * Motorola CPCAP PMIC battery charger driver
+ *
+ * Copyright (C) 2017 Tony Lindgren <tony@atomide.com>
+ *
+ * Rewritten for Linux power framework with some parts based on
+ * on earlier driver found in the Motorola Linux kernel:
+ *
+ * Copyright (C) 2009-2010 Motorola, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/atomic.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/regmap.h>
+
+#include <linux/gpio/consumer.h>
+#include <linux/usb/phy_companion.h>
+#include <linux/phy/omap_usb.h>
+#include <linux/usb/otg.h>
+#include <linux/iio/consumer.h>
+#include <linux/mfd/motorola-cpcap.h>
+
+/* CPCAP_REG_CRM register bits */
+#define CPCAP_REG_CRM_UNUSED_641_15    BIT(15) /* 641 = register number */
+#define CPCAP_REG_CRM_UNUSED_641_14    BIT(14) /* 641 = register number */
+#define CPCAP_REG_CRM_CHRG_LED_EN      BIT(13)
+#define CPCAP_REG_CRM_RVRSMODE         BIT(12)
+#define CPCAP_REG_CRM_ICHRG_TR1                BIT(11)
+#define CPCAP_REG_CRM_ICHRG_TR0                BIT(10)
+#define CPCAP_REG_CRM_FET_OVRD         BIT(9)
+#define CPCAP_REG_CRM_FET_CTRL         BIT(8)
+#define CPCAP_REG_CRM_VCHRG3           BIT(7)
+#define CPCAP_REG_CRM_VCHRG2           BIT(6)
+#define CPCAP_REG_CRM_VCHRG1           BIT(5)
+#define CPCAP_REG_CRM_VCHRG0           BIT(4)
+#define CPCAP_REG_CRM_ICHRG3           BIT(3)
+#define CPCAP_REG_CRM_ICHRG2           BIT(2)
+#define CPCAP_REG_CRM_ICHRG1           BIT(1)
+#define CPCAP_REG_CRM_ICHRG0           BIT(0)
+
+/* CPCAP_REG_CRM trickle charge voltages */
+#define CPCAP_REG_CRM_TR(val)          (((val) & 0x3) << 10)
+#define CPCAP_REG_CRM_TR_0A00          CPCAP_REG_CRM_TR(0x0)
+#define CPCAP_REG_CRM_TR_0A24          CPCAP_REG_CRM_TR(0x1)
+#define CPCAP_REG_CRM_TR_0A48          CPCAP_REG_CRM_TR(0x2)
+#define CPCAP_REG_CRM_TR_0A72          CPCAP_REG_CRM_TR(0x4)
+
+/* CPCAP_REG_CRM charge voltages */
+#define CPCAP_REG_CRM_VCHRG(val)       (((val) & 0xf) << 4)
+#define CPCAP_REG_CRM_VCHRG_3V80       CPCAP_REG_CRM_VCHRG(0x0)
+#define CPCAP_REG_CRM_VCHRG_4V10       CPCAP_REG_CRM_VCHRG(0x1)
+#define CPCAP_REG_CRM_VCHRG_4V15       CPCAP_REG_CRM_VCHRG(0x2)
+#define CPCAP_REG_CRM_VCHRG_4V20       CPCAP_REG_CRM_VCHRG(0x3)
+#define CPCAP_REG_CRM_VCHRG_4V22       CPCAP_REG_CRM_VCHRG(0x4)
+#define CPCAP_REG_CRM_VCHRG_4V24       CPCAP_REG_CRM_VCHRG(0x5)
+#define CPCAP_REG_CRM_VCHRG_4V26       CPCAP_REG_CRM_VCHRG(0x6)
+#define CPCAP_REG_CRM_VCHRG_4V28       CPCAP_REG_CRM_VCHRG(0x7)
+#define CPCAP_REG_CRM_VCHRG_4V30       CPCAP_REG_CRM_VCHRG(0x8)
+#define CPCAP_REG_CRM_VCHRG_4V32       CPCAP_REG_CRM_VCHRG(0x9)
+#define CPCAP_REG_CRM_VCHRG_4V34       CPCAP_REG_CRM_VCHRG(0xa)
+#define CPCAP_REG_CRM_VCHRG_4V36       CPCAP_REG_CRM_VCHRG(0xb)
+#define CPCAP_REG_CRM_VCHRG_4V38       CPCAP_REG_CRM_VCHRG(0xc)
+#define CPCAP_REG_CRM_VCHRG_4V40       CPCAP_REG_CRM_VCHRG(0xd)
+#define CPCAP_REG_CRM_VCHRG_4V42       CPCAP_REG_CRM_VCHRG(0xe)
+#define CPCAP_REG_CRM_VCHRG_4V44       CPCAP_REG_CRM_VCHRG(0xf)
+
+/* CPCAP_REG_CRM charge currents */
+#define CPCAP_REG_CRM_ICHRG(val)       (((val) & 0xf) << 0)
+#define CPCAP_REG_CRM_ICHRG_0A000      CPCAP_REG_CRM_ICHRG(0x0)
+#define CPCAP_REG_CRM_ICHRG_0A070      CPCAP_REG_CRM_ICHRG(0x1)
+#define CPCAP_REG_CRM_ICHRG_0A176      CPCAP_REG_CRM_ICHRG(0x2)
+#define CPCAP_REG_CRM_ICHRG_0A264      CPCAP_REG_CRM_ICHRG(0x3)
+#define CPCAP_REG_CRM_ICHRG_0A352      CPCAP_REG_CRM_ICHRG(0x4)
+#define CPCAP_REG_CRM_ICHRG_0A440      CPCAP_REG_CRM_ICHRG(0x5)
+#define CPCAP_REG_CRM_ICHRG_0A528      CPCAP_REG_CRM_ICHRG(0x6)
+#define CPCAP_REG_CRM_ICHRG_0A616      CPCAP_REG_CRM_ICHRG(0x7)
+#define CPCAP_REG_CRM_ICHRG_0A704      CPCAP_REG_CRM_ICHRG(0x8)
+#define CPCAP_REG_CRM_ICHRG_0A792      CPCAP_REG_CRM_ICHRG(0x9)
+#define CPCAP_REG_CRM_ICHRG_0A880      CPCAP_REG_CRM_ICHRG(0xa)
+#define CPCAP_REG_CRM_ICHRG_0A968      CPCAP_REG_CRM_ICHRG(0xb)
+#define CPCAP_REG_CRM_ICHRG_1A056      CPCAP_REG_CRM_ICHRG(0xc)
+#define CPCAP_REG_CRM_ICHRG_1A144      CPCAP_REG_CRM_ICHRG(0xd)
+#define CPCAP_REG_CRM_ICHRG_1A584      CPCAP_REG_CRM_ICHRG(0xe)
+#define CPCAP_REG_CRM_ICHRG_NO_LIMIT   CPCAP_REG_CRM_ICHRG(0xf)
+
+enum {
+       CPCAP_CHARGER_IIO_BATTDET,
+       CPCAP_CHARGER_IIO_VOLTAGE,
+       CPCAP_CHARGER_IIO_VBUS,
+       CPCAP_CHARGER_IIO_CHRG_CURRENT,
+       CPCAP_CHARGER_IIO_BATT_CURRENT,
+       CPCAP_CHARGER_IIO_NR,
+};
+
+struct cpcap_charger_ddata {
+       struct device *dev;
+       struct regmap *reg;
+       struct list_head irq_list;
+       struct delayed_work detect_work;
+       struct delayed_work vbus_work;
+       struct gpio_desc *gpio[2];              /* gpio_reven0 & 1 */
+
+       struct iio_channel *channels[CPCAP_CHARGER_IIO_NR];
+
+       struct power_supply *usb;
+
+       struct phy_companion comparator;        /* For USB VBUS */
+       bool vbus_enabled;
+       atomic_t active;
+
+       int status;
+};
+
+struct cpcap_interrupt_desc {
+       int irq;
+       struct list_head node;
+       const char *name;
+};
+
+struct cpcap_charger_ints_state {
+       bool chrg_det;
+       bool rvrs_chrg;
+       bool vbusov;
+
+       bool chrg_se1b;
+       bool rvrs_mode;
+       bool chrgcurr1;
+       bool vbusvld;
+
+       bool battdetb;
+};
+
+static enum power_supply_property cpcap_charger_props[] = {
+       POWER_SUPPLY_PROP_STATUS,
+       POWER_SUPPLY_PROP_ONLINE,
+       POWER_SUPPLY_PROP_VOLTAGE_NOW,
+       POWER_SUPPLY_PROP_CURRENT_NOW,
+};
+
+static bool cpcap_charger_battery_found(struct cpcap_charger_ddata *ddata)
+{
+       struct iio_channel *channel;
+       int error, value;
+
+       channel = ddata->channels[CPCAP_CHARGER_IIO_BATTDET];
+       error = iio_read_channel_raw(channel, &value);
+       if (error < 0) {
+               dev_warn(ddata->dev, "%s failed: %i\n", __func__, error);
+
+               return false;
+       }
+
+       return value == 1;
+}
+
+static int cpcap_charger_get_charge_voltage(struct cpcap_charger_ddata *ddata)
+{
+       struct iio_channel *channel;
+       int error, value = 0;
+
+       channel = ddata->channels[CPCAP_CHARGER_IIO_VOLTAGE];
+       error = iio_read_channel_processed(channel, &value);
+       if (error < 0) {
+               dev_warn(ddata->dev, "%s failed: %i\n", __func__, error);
+
+               return 0;
+       }
+
+       return value;
+}
+
+static int cpcap_charger_get_charge_current(struct cpcap_charger_ddata *ddata)
+{
+       struct iio_channel *channel;
+       int error, value = 0;
+
+       channel = ddata->channels[CPCAP_CHARGER_IIO_CHRG_CURRENT];
+       error = iio_read_channel_processed(channel, &value);
+       if (error < 0) {
+               dev_warn(ddata->dev, "%s failed: %i\n", __func__, error);
+
+               return 0;
+       }
+
+       return value;
+}
+
+static int cpcap_charger_get_property(struct power_supply *psy,
+                                     enum power_supply_property psp,
+                                     union power_supply_propval *val)
+{
+       struct cpcap_charger_ddata *ddata = dev_get_drvdata(psy->dev.parent);
+
+       switch (psp) {
+       case POWER_SUPPLY_PROP_STATUS:
+               val->intval = ddata->status;
+               break;
+       case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+               if (ddata->status == POWER_SUPPLY_STATUS_CHARGING)
+                       val->intval = cpcap_charger_get_charge_voltage(ddata) *
+                               1000;
+               else
+                       val->intval = 0;
+               break;
+       case POWER_SUPPLY_PROP_CURRENT_NOW:
+               if (ddata->status == POWER_SUPPLY_STATUS_CHARGING)
+                       val->intval = cpcap_charger_get_charge_current(ddata) *
+                               1000;
+               else
+                       val->intval = 0;
+               break;
+       case POWER_SUPPLY_PROP_ONLINE:
+               val->intval = ddata->status == POWER_SUPPLY_STATUS_CHARGING;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void cpcap_charger_set_cable_path(struct cpcap_charger_ddata *ddata,
+                                        bool enabled)
+{
+       if (!ddata->gpio[0])
+               return;
+
+       gpiod_set_value(ddata->gpio[0], enabled);
+}
+
+static void cpcap_charger_set_inductive_path(struct cpcap_charger_ddata *ddata,
+                                            bool enabled)
+{
+       if (!ddata->gpio[1])
+               return;
+
+       gpiod_set_value(ddata->gpio[1], enabled);
+}
+
+static int cpcap_charger_set_state(struct cpcap_charger_ddata *ddata,
+                                  int max_voltage, int charge_current,
+                                  int trickle_current)
+{
+       bool enable;
+       int error;
+
+       enable = max_voltage && (charge_current || trickle_current);
+       dev_dbg(ddata->dev, "%s enable: %i\n", __func__, enable);
+
+       if (!enable) {
+               error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM,
+                                          0x3fff,
+                                          CPCAP_REG_CRM_FET_OVRD |
+                                          CPCAP_REG_CRM_FET_CTRL);
+               if (error) {
+                       ddata->status = POWER_SUPPLY_STATUS_UNKNOWN;
+                       goto out_err;
+               }
+
+               ddata->status = POWER_SUPPLY_STATUS_DISCHARGING;
+
+               return 0;
+       }
+
+       error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM, 0x3fff,
+                                  CPCAP_REG_CRM_CHRG_LED_EN |
+                                  trickle_current |
+                                  CPCAP_REG_CRM_FET_OVRD |
+                                  CPCAP_REG_CRM_FET_CTRL |
+                                  max_voltage |
+                                  charge_current);
+       if (error) {
+               ddata->status = POWER_SUPPLY_STATUS_UNKNOWN;
+               goto out_err;
+       }
+
+       ddata->status = POWER_SUPPLY_STATUS_CHARGING;
+
+       return 0;
+
+out_err:
+       dev_err(ddata->dev, "%s failed with %i\n", __func__, error);
+
+       return error;
+}
+
+static bool cpcap_charger_vbus_valid(struct cpcap_charger_ddata *ddata)
+{
+       int error, value = 0;
+       struct iio_channel *channel =
+               ddata->channels[CPCAP_CHARGER_IIO_VBUS];
+
+       error = iio_read_channel_processed(channel, &value);
+       if (error >= 0)
+               return value > 3900 ? true : false;
+
+       dev_err(ddata->dev, "error reading VBUS: %i\n", error);
+
+       return false;
+}
+
+/* VBUS control functions for the USB PHY companion */
+
+static void cpcap_charger_vbus_work(struct work_struct *work)
+{
+       struct cpcap_charger_ddata *ddata;
+       bool vbus = false;
+       int error;
+
+       ddata = container_of(work, struct cpcap_charger_ddata,
+                            vbus_work.work);
+
+       if (ddata->vbus_enabled) {
+               vbus = cpcap_charger_vbus_valid(ddata);
+               if (vbus) {
+                       dev_info(ddata->dev, "VBUS already provided\n");
+
+                       return;
+               }
+
+               cpcap_charger_set_cable_path(ddata, false);
+               cpcap_charger_set_inductive_path(ddata, false);
+
+               error = cpcap_charger_set_state(ddata, 0, 0, 0);
+               if (error)
+                       goto out_err;
+
+               error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM,
+                                          CPCAP_REG_CRM_RVRSMODE,
+                                          CPCAP_REG_CRM_RVRSMODE);
+               if (error)
+                       goto out_err;
+       } else {
+               error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM,
+                                          CPCAP_REG_CRM_RVRSMODE, 0);
+               if (error)
+                       goto out_err;
+
+               cpcap_charger_set_cable_path(ddata, true);
+               cpcap_charger_set_inductive_path(ddata, true);
+       }
+
+       return;
+
+out_err:
+       dev_err(ddata->dev, "%s could not %s vbus: %i\n", __func__,
+               ddata->vbus_enabled ? "enable" : "disable", error);
+}
+
+static int cpcap_charger_set_vbus(struct phy_companion *comparator,
+                                 bool enabled)
+{
+       struct cpcap_charger_ddata *ddata =
+               container_of(comparator, struct cpcap_charger_ddata,
+                            comparator);
+
+       ddata->vbus_enabled = enabled;
+       schedule_delayed_work(&ddata->vbus_work, 0);
+
+       return 0;
+}
+
+/* Charger interrupt handling functions */
+
+static int cpcap_charger_get_ints_state(struct cpcap_charger_ddata *ddata,
+                                       struct cpcap_charger_ints_state *s)
+{
+       int val, error;
+
+       error = regmap_read(ddata->reg, CPCAP_REG_INTS1, &val);
+       if (error)
+               return error;
+
+       s->chrg_det = val & BIT(13);
+       s->rvrs_chrg = val & BIT(12);
+       s->vbusov = val & BIT(11);
+
+       error = regmap_read(ddata->reg, CPCAP_REG_INTS2, &val);
+       if (error)
+               return error;
+
+       s->chrg_se1b = val & BIT(13);
+       s->rvrs_mode = val & BIT(6);
+       s->chrgcurr1 = val & BIT(4);
+       s->vbusvld = val & BIT(3);
+
+       error = regmap_read(ddata->reg, CPCAP_REG_INTS4, &val);
+       if (error)
+               return error;
+
+       s->battdetb = val & BIT(6);
+
+       return 0;
+}
+
+static void cpcap_usb_detect(struct work_struct *work)
+{
+       struct cpcap_charger_ddata *ddata;
+       struct cpcap_charger_ints_state s;
+       int error;
+
+       ddata = container_of(work, struct cpcap_charger_ddata,
+                            detect_work.work);
+
+       error = cpcap_charger_get_ints_state(ddata, &s);
+       if (error)
+               return;
+
+       if (cpcap_charger_vbus_valid(ddata) && s.chrgcurr1) {
+               int max_current;
+
+               if (cpcap_charger_battery_found(ddata))
+                       max_current = CPCAP_REG_CRM_ICHRG_1A584;
+               else
+                       max_current = CPCAP_REG_CRM_ICHRG_0A528;
+
+               error = cpcap_charger_set_state(ddata,
+                                               CPCAP_REG_CRM_VCHRG_4V20,
+                                               max_current,
+                                               CPCAP_REG_CRM_TR_0A72);
+               if (error)
+                       goto out_err;
+       } else {
+               error = cpcap_charger_set_state(ddata, 0, 0, 0);
+               if (error)
+                       goto out_err;
+       }
+
+       return;
+
+out_err:
+       dev_err(ddata->dev, "%s failed with %i\n", __func__, error);
+}
+
+static irqreturn_t cpcap_charger_irq_thread(int irq, void *data)
+{
+       struct cpcap_charger_ddata *ddata = data;
+
+       if (!atomic_read(&ddata->active))
+               return IRQ_NONE;
+
+       schedule_delayed_work(&ddata->detect_work, 0);
+
+       return IRQ_HANDLED;
+}
+
+static int cpcap_usb_init_irq(struct platform_device *pdev,
+                             struct cpcap_charger_ddata *ddata,
+                             const char *name)
+{
+       struct cpcap_interrupt_desc *d;
+       int irq, error;
+
+       irq = platform_get_irq_byname(pdev, name);
+       if (!irq)
+               return -ENODEV;
+
+       error = devm_request_threaded_irq(ddata->dev, irq, NULL,
+                                         cpcap_charger_irq_thread,
+                                         IRQF_SHARED,
+                                         name, ddata);
+       if (error) {
+               dev_err(ddata->dev, "could not get irq %s: %i\n",
+                       name, error);
+
+               return error;
+       }
+
+       d = devm_kzalloc(ddata->dev, sizeof(*d), GFP_KERNEL);
+       if (!d)
+               return -ENOMEM;
+
+       d->name = name;
+       d->irq = irq;
+       list_add(&d->node, &ddata->irq_list);
+
+       return 0;
+}
+
+static const char * const cpcap_charger_irqs[] = {
+       /* REG_INT_0 */
+       "chrg_det", "rvrs_chrg",
+
+       /* REG_INT1 */
+       "chrg_se1b", "se0conn", "rvrs_mode", "chrgcurr1", "vbusvld",
+
+       /* REG_INT_3 */
+       "battdetb",
+};
+
+static int cpcap_usb_init_interrupts(struct platform_device *pdev,
+                                    struct cpcap_charger_ddata *ddata)
+{
+       int i, error;
+
+       for (i = 0; i < ARRAY_SIZE(cpcap_charger_irqs); i++) {
+               error = cpcap_usb_init_irq(pdev, ddata, cpcap_charger_irqs[i]);
+               if (error)
+                       return error;
+       }
+
+       return 0;
+}
+
+static void cpcap_charger_init_optional_gpios(struct cpcap_charger_ddata *ddata)
+{
+       int i;
+
+       for (i = 0; i < 2; i++) {
+               ddata->gpio[i] = devm_gpiod_get_index(ddata->dev, "mode",
+                                                     i, GPIOD_OUT_HIGH);
+               if (IS_ERR(ddata->gpio[i])) {
+                       dev_info(ddata->dev, "no mode change GPIO%i: %li\n",
+                                i, PTR_ERR(ddata->gpio[i]));
+                                ddata->gpio[i] = NULL;
+               }
+       }
+}
+
+static int cpcap_charger_init_iio(struct cpcap_charger_ddata *ddata)
+{
+       const char * const names[CPCAP_CHARGER_IIO_NR] = {
+               "battdetb", "battp", "vbus", "chg_isense", "batti",
+       };
+       int error, i;
+
+       for (i = 0; i < CPCAP_CHARGER_IIO_NR; i++) {
+               ddata->channels[i] = devm_iio_channel_get(ddata->dev,
+                                                         names[i]);
+               if (IS_ERR(ddata->channels[i])) {
+                       error = PTR_ERR(ddata->channels[i]);
+                       goto out_err;
+               }
+
+               if (!ddata->channels[i]->indio_dev) {
+                       error = -ENXIO;
+                       goto out_err;
+               }
+       }
+
+       return 0;
+
+out_err:
+       dev_err(ddata->dev, "could not initialize VBUS or ID IIO: %i\n",
+               error);
+
+       return error;
+}
+
+static const struct power_supply_desc cpcap_charger_usb_desc = {
+       .name           = "cpcap_usb",
+       .type           = POWER_SUPPLY_TYPE_USB,
+       .properties     = cpcap_charger_props,
+       .num_properties = ARRAY_SIZE(cpcap_charger_props),
+       .get_property   = cpcap_charger_get_property,
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id cpcap_charger_id_table[] = {
+       {
+               .compatible = "motorola,mapphone-cpcap-charger",
+       },
+       {},
+};
+MODULE_DEVICE_TABLE(of, cpcap_charger_id_table);
+#endif
+
+static int cpcap_charger_probe(struct platform_device *pdev)
+{
+       struct cpcap_charger_ddata *ddata;
+       const struct of_device_id *of_id;
+       int error;
+
+       of_id = of_match_device(of_match_ptr(cpcap_charger_id_table),
+                               &pdev->dev);
+       if (!of_id)
+               return -EINVAL;
+
+       ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL);
+       if (!ddata)
+               return -ENOMEM;
+
+       ddata->dev = &pdev->dev;
+
+       ddata->reg = dev_get_regmap(ddata->dev->parent, NULL);
+       if (!ddata->reg)
+               return -ENODEV;
+
+       INIT_LIST_HEAD(&ddata->irq_list);
+       INIT_DELAYED_WORK(&ddata->detect_work, cpcap_usb_detect);
+       INIT_DELAYED_WORK(&ddata->vbus_work, cpcap_charger_vbus_work);
+       platform_set_drvdata(pdev, ddata);
+
+       error = cpcap_charger_init_iio(ddata);
+       if (error)
+               return error;
+
+       atomic_set(&ddata->active, 1);
+
+       ddata->usb = devm_power_supply_register(ddata->dev,
+                                               &cpcap_charger_usb_desc,
+                                               NULL);
+       if (IS_ERR(ddata->usb)) {
+               error = PTR_ERR(ddata->usb);
+               dev_err(ddata->dev, "failed to register USB charger: %i\n",
+                       error);
+
+               return error;
+       }
+
+       error = cpcap_usb_init_interrupts(pdev, ddata);
+       if (error)
+               return error;
+
+       ddata->comparator.set_vbus = cpcap_charger_set_vbus;
+       error = omap_usb2_set_comparator(&ddata->comparator);
+       if (error == -ENODEV) {
+               dev_info(ddata->dev, "charger needs phy, deferring probe\n");
+               return -EPROBE_DEFER;
+       }
+
+       cpcap_charger_init_optional_gpios(ddata);
+
+       schedule_delayed_work(&ddata->detect_work, 0);
+
+       return 0;
+}
+
+static int cpcap_charger_remove(struct platform_device *pdev)
+{
+       struct cpcap_charger_ddata *ddata = platform_get_drvdata(pdev);
+       int error;
+
+       atomic_set(&ddata->active, 0);
+       error = omap_usb2_set_comparator(NULL);
+       if (error)
+               dev_warn(ddata->dev, "could not clear USB comparator: %i\n",
+                        error);
+
+       error = cpcap_charger_set_state(ddata, 0, 0, 0);
+       if (error)
+               dev_warn(ddata->dev, "could not clear charger: %i\n",
+                        error);
+       cancel_delayed_work_sync(&ddata->vbus_work);
+       cancel_delayed_work_sync(&ddata->detect_work);
+
+       return 0;
+}
+
+static struct platform_driver cpcap_charger_driver = {
+       .probe = cpcap_charger_probe,
+       .driver = {
+               .name   = "cpcap-charger",
+               .of_match_table = of_match_ptr(cpcap_charger_id_table),
+       },
+       .remove = cpcap_charger_remove,
+};
+module_platform_driver(cpcap_charger_driver);
+
+MODULE_AUTHOR("Tony Lindgren <tony@atomide.com>");
+MODULE_DESCRIPTION("CPCAP Battery Charger Interface driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:cpcap-charger");
diff --git a/drivers/power/supply/lego_ev3_battery.c b/drivers/power/supply/lego_ev3_battery.c

new file mode 100644 (file)

index 0000000..7b993d6
--- /dev/null
+++ b/drivers/power/supply/lego_ev3_battery.c
@@ -0,0 +1,228 @@
+/*
+ * Battery driver for LEGO MINDSTORMS EV3
+ *
+ * Copyright (C) 2017 David Lechner <david@lechnology.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/iio/consumer.h>
+#include <linux/iio/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+
+struct lego_ev3_battery {
+       struct iio_channel *iio_v;
+       struct iio_channel *iio_i;
+       struct gpio_desc *rechargeable_gpio;
+       struct power_supply *psy;
+       int technology;
+       int v_max;
+       int v_min;
+};
+
+static int lego_ev3_battery_get_property(struct power_supply *psy,
+                                        enum power_supply_property psp,
+                                        union power_supply_propval *val)
+{
+       struct lego_ev3_battery *batt = power_supply_get_drvdata(psy);
+       int val2;
+
+       switch (psp) {
+       case POWER_SUPPLY_PROP_TECHNOLOGY:
+               val->intval = batt->technology;
+               break;
+       case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+               /* battery voltage is iio channel * 2 + Vce of transistor */
+               iio_read_channel_processed(batt->iio_v, &val->intval);
+               val->intval *= 2000;
+               val->intval += 200000;
+               /* plus adjust for shunt resistor drop */
+               iio_read_channel_processed(batt->iio_i, &val2);
+               val2 *= 1000;
+               val2 /= 15;
+               val->intval += val2;
+               break;
+       case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+               val->intval = batt->v_max;
+               break;
+       case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+               val->intval = batt->v_min;
+               break;
+       case POWER_SUPPLY_PROP_CURRENT_NOW:
+               /* battery current is iio channel / 15 / 0.05 ohms */
+               iio_read_channel_processed(batt->iio_i, &val->intval);
+               val->intval *= 20000;
+               val->intval /= 15;
+               break;
+       case POWER_SUPPLY_PROP_SCOPE:
+               val->intval = POWER_SUPPLY_SCOPE_SYSTEM;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int lego_ev3_battery_set_property(struct power_supply *psy,
+                                        enum power_supply_property psp,
+                                        const union power_supply_propval *val)
+{
+       struct lego_ev3_battery *batt = power_supply_get_drvdata(psy);
+
+       switch (psp) {
+       case POWER_SUPPLY_PROP_TECHNOLOGY:
+               /*
+                * Only allow changing technology from Unknown to NiMH. Li-ion
+                * batteries are automatically detected and should not be
+                * overridden. Rechargeable AA batteries, on the other hand,
+                * cannot be automatically detected, and so must be manually
+                * specified. This should only be set once during system init,
+                * so there is no mechanism to go back to Unknown.
+                */
+               if (batt->technology != POWER_SUPPLY_TECHNOLOGY_UNKNOWN)
+                       return -EINVAL;
+               switch (val->intval) {
+               case POWER_SUPPLY_TECHNOLOGY_NiMH:
+                       batt->technology = POWER_SUPPLY_TECHNOLOGY_NiMH;
+                       batt->v_max = 7800000;
+                       batt->v_min = 5400000;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int lego_ev3_battery_property_is_writeable(struct power_supply *psy,
+                                                 enum power_supply_property psp)
+{
+       struct lego_ev3_battery *batt = power_supply_get_drvdata(psy);
+
+       return psp == POWER_SUPPLY_PROP_TECHNOLOGY &&
+               batt->technology == POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
+}
+
+static enum power_supply_property lego_ev3_battery_props[] = {
+       POWER_SUPPLY_PROP_TECHNOLOGY,
+       POWER_SUPPLY_PROP_VOLTAGE_NOW,
+       POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+       POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+       POWER_SUPPLY_PROP_CURRENT_NOW,
+       POWER_SUPPLY_PROP_SCOPE,
+};
+
+static const struct power_supply_desc lego_ev3_battery_desc = {
+       .name                   = "lego-ev3-battery",
+       .type                   = POWER_SUPPLY_TYPE_BATTERY,
+       .properties             = lego_ev3_battery_props,
+       .num_properties         = ARRAY_SIZE(lego_ev3_battery_props),
+       .get_property           = lego_ev3_battery_get_property,
+       .set_property           = lego_ev3_battery_set_property,
+       .property_is_writeable  = lego_ev3_battery_property_is_writeable,
+};
+
+static int lego_ev3_battery_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct lego_ev3_battery *batt;
+       struct power_supply_config psy_cfg = {};
+       int err;
+
+       batt = devm_kzalloc(dev, sizeof(*batt), GFP_KERNEL);
+       if (!batt)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, batt);
+
+       batt->iio_v = devm_iio_channel_get(dev, "voltage");
+       err = PTR_ERR_OR_ZERO(batt->iio_v);
+       if (err) {
+               if (err != -EPROBE_DEFER)
+                       dev_err(dev, "Failed to get voltage iio channel\n");
+               return err;
+       }
+
+       batt->iio_i = devm_iio_channel_get(dev, "current");
+       err = PTR_ERR_OR_ZERO(batt->iio_i);
+       if (err) {
+               if (err != -EPROBE_DEFER)
+                       dev_err(dev, "Failed to get current iio channel\n");
+               return err;
+       }
+
+       batt->rechargeable_gpio = devm_gpiod_get(dev, "rechargeable", GPIOD_IN);
+       err = PTR_ERR_OR_ZERO(batt->rechargeable_gpio);
+       if (err) {
+               if (err != -EPROBE_DEFER)
+                       dev_err(dev, "Failed to get rechargeable gpio\n");
+               return err;
+       }
+
+       /*
+        * The rechargeable battery indication switch cannot be changed without
+        * removing the battery, so we only need to read it once.
+        */
+       if (gpiod_get_value(batt->rechargeable_gpio)) {
+               /* 2-cell Li-ion, 7.4V nominal */
+               batt->technology = POWER_SUPPLY_TECHNOLOGY_LION;
+               batt->v_max = 84000000;
+               batt->v_min = 60000000;
+       } else {
+               /* 6x AA Alkaline, 9V nominal */
+               batt->technology = POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
+               batt->v_max = 90000000;
+               batt->v_min = 48000000;
+       }
+
+       psy_cfg.of_node = pdev->dev.of_node;
+       psy_cfg.drv_data = batt;
+
+       batt->psy = devm_power_supply_register(dev, &lego_ev3_battery_desc,
+                                              &psy_cfg);
+       err = PTR_ERR_OR_ZERO(batt->psy);
+       if (err) {
+               dev_err(dev, "failed to register power supply\n");
+               return err;
+       }
+
+       return 0;
+}
+
+static const struct of_device_id of_lego_ev3_battery_match[] = {
+       { .compatible = "lego,ev3-battery", },
+       { }
+};
+MODULE_DEVICE_TABLE(of, of_lego_ev3_battery_match);
+
+static struct platform_driver lego_ev3_battery_driver = {
+       .driver = {
+               .name           = "lego-ev3-battery",
+               .of_match_table = of_lego_ev3_battery_match,
+       },
+       .probe  = lego_ev3_battery_probe,
+};
+module_platform_driver(lego_ev3_battery_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David Lechner <david@lechnology.com>");
+MODULE_DESCRIPTION("LEGO MINDSTORMS EV3 Battery Driver");
diff --git a/drivers/power/supply/lp8788-charger.c b/drivers/power/supply/lp8788-charger.c

index 509e2b341bd64c54aec3ec68b69b1854dc593550..677f7c40b25a16bf535796ca944c4cebd549e98e 100644 (file)
--- a/drivers/power/supply/lp8788-charger.c
+++ b/drivers/power/supply/lp8788-charger.c
@@ -651,7 +651,7 @@ static ssize_t lp8788_show_eoc_time(struct device *dev,
  {
         struct lp8788_charger *pchg = dev_get_drvdata(dev);
         char *stime[] = { "400ms", "5min", "10min", "15min",
-                       "20min", "25min", "30min" "No timeout" };
+                       "20min", "25min", "30min", "No timeout" };
         u8 val;
  
         lp8788_read_byte(pchg->lp, LP8788_CHG_EOC, &val);
diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c

index 4adf2ba021ceb2a2f99510d4ed28bfbae51617c4..7efb908f4451262137d3e77bd4da80d2294f79aa 100644 (file)
--- a/drivers/power/supply/ltc2941-battery-gauge.c
+++ b/drivers/power/supply/ltc2941-battery-gauge.c
@@ -9,6 +9,7 @@
   */
  #include <linux/kernel.h>
  #include <linux/module.h>
+#include <linux/of_device.h>
  #include <linux/types.h>
  #include <linux/errno.h>
  #include <linux/swab.h>
@@ -61,7 +62,7 @@ struct ltc294x_info {
         struct power_supply *supply;    /* Supply pointer */
         struct power_supply_desc supply_desc;   /* Supply description */
         struct delayed_work work;       /* Work scheduler */
-       int num_regs;   /* Number of registers (chip type) */
+       unsigned long num_regs; /* Number of registers (chip type) */
         int charge;     /* Last charge register content */
         int r_sense;    /* mOhm */
         int Qlsb;       /* nAh */
@@ -387,7 +388,7 @@ static int ltc294x_i2c_probe(struct i2c_client *client,
  
         np = of_node_get(client->dev.of_node);
  
-       info->num_regs = id->driver_data;
+       info->num_regs = (unsigned long)of_device_get_match_data(&client->dev);
         info->supply_desc.name = np->name;
  
         /* r_sense can be negative, when sense+ is connected to the battery
@@ -497,9 +498,23 @@ static const struct i2c_device_id ltc294x_i2c_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, ltc294x_i2c_id);
  
+static const struct of_device_id ltc294x_i2c_of_match[] = {
+       {
+               .compatible = "lltc,ltc2941",
+               .data = (void *)LTC2941_NUM_REGS
+       },
+       {
+               .compatible = "lltc,ltc2943",
+               .data = (void *)LTC2943_NUM_REGS
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, ltc294x_i2c_of_match);
+
  static struct i2c_driver ltc294x_driver = {
         .driver = {
                 .name   = "LTC2941",
+               .of_match_table = ltc294x_i2c_of_match,
                 .pm     = LTC294X_PM_OPS,
         },
         .probe          = ltc294x_i2c_probe,
diff --git a/drivers/power/supply/max17040_battery.c b/drivers/power/supply/max17040_battery.c

index e7c3649b31a080232efc580d1c2d72c5c9357bd1..33c40f79d23d5e016e69ecde3d0f5785ba4048c0 100644 (file)
--- a/drivers/power/supply/max17040_battery.c
+++ b/drivers/power/supply/max17040_battery.c
@@ -277,9 +277,17 @@ static const struct i2c_device_id max17040_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, max17040_id);
  
+static const struct of_device_id max17040_of_match[] = {
+       { .compatible = "maxim,max17040" },
+       { .compatible = "maxim,max77836-battery" },
+       { },
+};
+MODULE_DEVICE_TABLE(of, max17040_of_match);
+
  static struct i2c_driver max17040_i2c_driver = {
         .driver = {
                 .name   = "max17040",
+               .of_match_table = max17040_of_match,
                 .pm     = MAX17040_PM_OPS,
         },
         .probe          = max17040_probe,
diff --git a/drivers/power/supply/sbs-charger.c b/drivers/power/supply/sbs-charger.c

index 353765a5f44cfd8096c29edb680bc0f3d09c703e..15947dbb511e320dfd80d8e9968f313d4c54550a 100644 (file)
--- a/drivers/power/supply/sbs-charger.c
+++ b/drivers/power/supply/sbs-charger.c
@@ -137,10 +137,7 @@ static enum power_supply_property sbs_properties[] = {
  
  static bool sbs_readable_reg(struct device *dev, unsigned int reg)
  {
-       if (reg < SBS_CHARGER_REG_SPEC_INFO)
-               return false;
-       else
-               return true;
+       return reg >= SBS_CHARGER_REG_SPEC_INFO;
  }
  
  static bool sbs_volatile_reg(struct device *dev, unsigned int reg)
diff --git a/drivers/power/supply/tps65217_charger.c b/drivers/power/supply/tps65217_charger.c

index 29b61e81b38541f749982de0c7d8d1d6eddeb353..1f5234098aaf3f9dc27a7cbdef98d42fcce34324 100644 (file)
--- a/drivers/power/supply/tps65217_charger.c
+++ b/drivers/power/supply/tps65217_charger.c
@@ -58,8 +58,6 @@ static int tps65217_config_charger(struct tps65217_charger *charger)
  {
         int ret;
  
-       dev_dbg(charger->dev, "%s\n", __func__);
-
         /*
          * tps65217 rev. G, p. 31 (see p. 32 for NTC schematic)
          *
@@ -205,8 +203,6 @@ static int tps65217_charger_probe(struct platform_device *pdev)
         int ret;
         int i;
  
-       dev_dbg(&pdev->dev, "%s\n", __func__);
-
         charger = devm_kzalloc(&pdev->dev, sizeof(*charger), GFP_KERNEL);
         if (!charger)
                 return -ENOMEM;
diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c

index bcd4dc304f270c50739e2b2ddfc2850e1efe3bf0..990ff3d218bcc7572bd1be15da7731fd36894736 100644 (file)
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -1117,7 +1117,7 @@ fail:
         return ret;
  }
  
-static int __exit twl4030_bci_remove(struct platform_device *pdev)
+static int twl4030_bci_remove(struct platform_device *pdev)
  {
         struct twl4030_bci *bci = platform_get_drvdata(pdev);
  
@@ -1148,11 +1148,11 @@ MODULE_DEVICE_TABLE(of, twl_bci_of_match);
  
  static struct platform_driver twl4030_bci_driver = {
         .probe = twl4030_bci_probe,
+       .remove = twl4030_bci_remove,
         .driver = {
                 .name   = "twl4030_bci",
                 .of_match_table = of_match_ptr(twl_bci_of_match),
         },
-       .remove = __exit_p(twl4030_bci_remove),
  };
  module_platform_driver(twl4030_bci_driver);
  
diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c

index 09b4df74291e2618e95e842dc4f5d7bdb178ec93..bb865695d7a62d20fa66800c8ed421dcfa8cd8c2 100644 (file)
--- a/drivers/ptp/ptp_kvm.c
+++ b/drivers/ptp/ptp_kvm.c
@@ -193,10 +193,7 @@ static int __init ptp_kvm_init(void)
  
         kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL);
  
-       if (IS_ERR(kvm_ptp_clock.ptp_clock))
-               return PTR_ERR(kvm_ptp_clock.ptp_clock);
-
-       return 0;
+       return PTR_ERR_OR_ZERO(kvm_ptp_clock.ptp_clock);
  }
  
  module_init(ptp_kvm_init);
diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c

index 053088b9b66edb4c50c3b47c5e942b6e2e343c00..c1527cb645be8d0c038742559e270d1914ac8726 100644 (file)
--- a/drivers/pwm/pwm-lpss-pci.c
+++ b/drivers/pwm/pwm-lpss-pci.c
@@ -36,6 +36,14 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = {
         .clk_rate = 19200000,
         .npwm = 4,
         .base_unit_bits = 22,
+       .bypass = true,
+};
+
+/* Tangier */
+static const struct pwm_lpss_boardinfo pwm_lpss_tng_info = {
+       .clk_rate = 19200000,
+       .npwm = 4,
+       .base_unit_bits = 22,
  };
  
  static int pwm_lpss_probe_pci(struct pci_dev *pdev,
@@ -97,7 +105,7 @@ static const struct pci_device_id pwm_lpss_pci_ids[] = {
         { PCI_VDEVICE(INTEL, 0x0ac8), (unsigned long)&pwm_lpss_bxt_info},
         { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&pwm_lpss_byt_info},
         { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&pwm_lpss_byt_info},
-       { PCI_VDEVICE(INTEL, 0x11a5), (unsigned long)&pwm_lpss_bxt_info},
+       { PCI_VDEVICE(INTEL, 0x11a5), (unsigned long)&pwm_lpss_tng_info},
         { PCI_VDEVICE(INTEL, 0x1ac8), (unsigned long)&pwm_lpss_bxt_info},
         { PCI_VDEVICE(INTEL, 0x2288), (unsigned long)&pwm_lpss_bsw_info},
         { PCI_VDEVICE(INTEL, 0x2289), (unsigned long)&pwm_lpss_bsw_info},
diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c

index b22b6fdadb9ae14e0e55f28ecbfcece7e971eb1f..5d6ed1507d29284f2ba28f2cc781f4b797067f01 100644 (file)
--- a/drivers/pwm/pwm-lpss-platform.c
+++ b/drivers/pwm/pwm-lpss-platform.c
@@ -37,6 +37,7 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = {
         .clk_rate = 19200000,
         .npwm = 4,
         .base_unit_bits = 22,
+       .bypass = true,
  };
  
  static int pwm_lpss_probe_platform(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c

index 689d2c1cbead80f5b6540dac13f1f0e56edfecfa..8db0d40ccacde84a61d292936f6bbdeeed7ac358 100644 (file)
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -57,7 +57,7 @@ static inline void pwm_lpss_write(const struct pwm_device *pwm, u32 value)
         writel(value, lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM);
  }
  
-static int pwm_lpss_update(struct pwm_device *pwm)
+static int pwm_lpss_wait_for_update(struct pwm_device *pwm)
  {
         struct pwm_lpss_chip *lpwm = to_lpwm(pwm->chip);
         const void __iomem *addr = lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM;
@@ -65,8 +65,6 @@ static int pwm_lpss_update(struct pwm_device *pwm)
         u32 val;
         int err;
  
-       pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
-
         /*
          * PWM Configuration register has SW_UPDATE bit that is set when a new
          * configuration is written to the register. The bit is automatically
@@ -122,6 +120,12 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm,
         pwm_lpss_write(pwm, ctrl);
  }
  
+static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond)
+{
+       if (cond)
+               pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE);
+}
+
  static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                           struct pwm_state *state)
  {
@@ -137,18 +141,21 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                                 return ret;
                         }
                         pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
-                       ret = pwm_lpss_update(pwm);
+                       pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
+                       pwm_lpss_cond_enable(pwm, lpwm->info->bypass == false);
+                       ret = pwm_lpss_wait_for_update(pwm);
                         if (ret) {
                                 pm_runtime_put(chip->dev);
                                 return ret;
                         }
-                       pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE);
+                       pwm_lpss_cond_enable(pwm, lpwm->info->bypass == true);
                 } else {
                         ret = pwm_lpss_is_updating(pwm);
                         if (ret)
                                 return ret;
                         pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
-                       return pwm_lpss_update(pwm);
+                       pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
+                       return pwm_lpss_wait_for_update(pwm);
                 }
         } else if (pwm_is_enabled(pwm)) {
                 pwm_lpss_write(pwm, pwm_lpss_read(pwm) & ~PWM_ENABLE);
diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h

index c94cd7c2695da72181830f35d52c2ce12c6afc18..98306bb02cfe71c0775eb430e7cf623fdc431889 100644 (file)
--- a/drivers/pwm/pwm-lpss.h
+++ b/drivers/pwm/pwm-lpss.h
@@ -22,6 +22,7 @@ struct pwm_lpss_boardinfo {
         unsigned long clk_rate;
         unsigned int npwm;
         unsigned long base_unit_bits;
+       bool bypass;
  };
  
  struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c

index ef89df1f7336c77a70c3a4de42b6d71ec96162ba..744d56197286a45eb148de437d31a3c3ca8fbd1a 100644 (file)
--- a/drivers/pwm/pwm-rockchip.c
+++ b/drivers/pwm/pwm-rockchip.c
@@ -191,6 +191,28 @@ static int rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
         return 0;
  }
  
+static int rockchip_pwm_enable(struct pwm_chip *chip,
+                        struct pwm_device *pwm,
+                        bool enable,
+                        enum pwm_polarity polarity)
+{
+       struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
+       int ret;
+
+       if (enable) {
+               ret = clk_enable(pc->clk);
+               if (ret)
+                       return ret;
+       }
+
+       pc->data->set_enable(chip, pwm, enable, polarity);
+
+       if (!enable)
+               clk_disable(pc->clk);
+
+       return 0;
+}
+
  static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                               struct pwm_state *state)
  {
@@ -207,22 +229,26 @@ static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                 return ret;
  
         if (state->polarity != curstate.polarity && enabled) {
-               pc->data->set_enable(chip, pwm, false, state->polarity);
+               ret = rockchip_pwm_enable(chip, pwm, false, state->polarity);
+               if (ret)
+                       goto out;
                 enabled = false;
         }
  
         ret = rockchip_pwm_config(chip, pwm, state->duty_cycle, state->period);
         if (ret) {
                 if (enabled != curstate.enabled)
-                       pc->data->set_enable(chip, pwm, !enabled,
-                                            state->polarity);
-
+                       rockchip_pwm_enable(chip, pwm, !enabled,
+                                     state->polarity);
                 goto out;
         }
  
-       if (state->enabled != enabled)
-               pc->data->set_enable(chip, pwm, state->enabled,
-                                    state->polarity);
+       if (state->enabled != enabled) {
+               ret = rockchip_pwm_enable(chip, pwm, state->enabled,
+                                   state->polarity);
+               if (ret)
+                       goto out;
+       }
  
         /*
          * Update the state with the real hardware, which can differ a bit
diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c

index 9d19b9a62011b376be541b247336d455952bb42b..315a4be8dc1e64f429fb6bd5bab9700a0d254f29 100644 (file)
--- a/drivers/rapidio/devices/tsi721.c
+++ b/drivers/rapidio/devices/tsi721.c
@@ -37,8 +37,8 @@
  #include "tsi721.h"
  
  #ifdef DEBUG
-u32 dbg_level;
-module_param(dbg_level, uint, S_IWUSR | S_IRUGO);
+u32 tsi_dbg_level;
+module_param_named(dbg_level, tsi_dbg_level, uint, S_IWUSR | S_IRUGO);
  MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)");
  #endif
  
diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h

index 5941437cbdd164c7e0c491f33117401bc6fe54e2..957eadc5815095045f06291dbc2b83bbaea795d7 100644 (file)
--- a/drivers/rapidio/devices/tsi721.h
+++ b/drivers/rapidio/devices/tsi721.h
@@ -40,11 +40,11 @@ enum {
  };
  
  #ifdef DEBUG
-extern u32 dbg_level;
+extern u32 tsi_dbg_level;
  
  #define tsi_debug(level, dev, fmt, arg...)                             \
         do {                                                            \
-               if (DBG_##level & dbg_level)                            \
+               if (DBG_##level & tsi_dbg_level)                                \
                         dev_dbg(dev, "%s: " fmt "\n", __func__, ##arg); \
         } while (0)
  #else
diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig

index 65f86bc24c07c7032726700e09a1d9ef3cdfb3c2..1dc43fc5f65f38d8028388548452bd12218641fc 100644 (file)
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -76,7 +76,7 @@ config QCOM_ADSP_PIL
         depends on OF && ARCH_QCOM
         depends on REMOTEPROC
         depends on QCOM_SMEM
-       depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+       depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
         select MFD_SYSCON
         select QCOM_MDT_LOADER
         select QCOM_RPROC_COMMON
@@ -93,7 +93,7 @@ config QCOM_Q6V5_PIL
         depends on OF && ARCH_QCOM
         depends on QCOM_SMEM
         depends on REMOTEPROC
-       depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+       depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
         select MFD_SYSCON
         select QCOM_RPROC_COMMON
         select QCOM_SCM
@@ -104,7 +104,7 @@ config QCOM_Q6V5_PIL
  config QCOM_WCNSS_PIL
         tristate "Qualcomm WCNSS Peripheral Image Loader"
         depends on OF && ARCH_QCOM
-       depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+       depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
         depends on QCOM_SMEM
         depends on REMOTEPROC
         select QCOM_MDT_LOADER
diff --git a/drivers/reset/core.c b/drivers/reset/core.c

index f1e5e65388bb525b186f9257794afcf3564d2c3f..cd739d2fa160387c91b08e1b4d4f470394599c57 100644 (file)
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -275,7 +275,7 @@ int reset_control_status(struct reset_control *rstc)
  }
  EXPORT_SYMBOL_GPL(reset_control_status);
  
-static struct reset_control *__reset_control_get(
+static struct reset_control *__reset_control_get_internal(
                                 struct reset_controller_dev *rcdev,
                                 unsigned int index, bool shared)
  {
@@ -308,7 +308,7 @@ static struct reset_control *__reset_control_get(
         return rstc;
  }
  
-static void __reset_control_put(struct reset_control *rstc)
+static void __reset_control_put_internal(struct reset_control *rstc)
  {
         lockdep_assert_held(&reset_list_mutex);
  
@@ -377,7 +377,7 @@ struct reset_control *__of_reset_control_get(struct device_node *node,
         }
  
         /* reset_list_mutex also protects the rcdev's reset_control list */
-       rstc = __reset_control_get(rcdev, rstc_id, shared);
+       rstc = __reset_control_get_internal(rcdev, rstc_id, shared);
  
         mutex_unlock(&reset_list_mutex);
  
@@ -385,6 +385,17 @@ struct reset_control *__of_reset_control_get(struct device_node *node,
  }
  EXPORT_SYMBOL_GPL(__of_reset_control_get);
  
+struct reset_control *__reset_control_get(struct device *dev, const char *id,
+                                         int index, bool shared, bool optional)
+{
+       if (dev->of_node)
+               return __of_reset_control_get(dev->of_node, id, index, shared,
+                                             optional);
+
+       return optional ? NULL : ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL_GPL(__reset_control_get);
+
  /**
   * reset_control_put - free the reset controller
   * @rstc: reset controller
@@ -396,7 +407,7 @@ void reset_control_put(struct reset_control *rstc)
                 return;
  
         mutex_lock(&reset_list_mutex);
-       __reset_control_put(rstc);
+       __reset_control_put_internal(rstc);
         mutex_unlock(&reset_list_mutex);
  }
  EXPORT_SYMBOL_GPL(reset_control_put);
@@ -417,8 +428,7 @@ struct reset_control *__devm_reset_control_get(struct device *dev,
         if (!ptr)
                 return ERR_PTR(-ENOMEM);
  
-       rstc = __of_reset_control_get(dev ? dev->of_node : NULL,
-                                     id, index, shared, optional);
+       rstc = __reset_control_get(dev, id, index, shared, optional);
         if (!IS_ERR(rstc)) {
                 *ptr = rstc;
                 devres_add(dev, ptr);
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c

index 40f1136f55688981d70767bcf4eb55524a603cc6..058db724b5a28a8390856aeb51da30a639f3cf6b 100644 (file)
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -572,6 +572,12 @@ int pkey_sec2protkey(u16 cardnr, u16 domain,
                 rc = -EIO;
                 goto out;
         }
+       if (prepcblk->ccp_rscode != 0) {
+               DEBUG_WARN(
+                       "pkey_sec2protkey unwrap secure key warning, card response %d/%d\n",
+                       (int) prepcblk->ccp_rtcode,
+                       (int) prepcblk->ccp_rscode);
+       }
  
         /* process response cprb param block */
         prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
@@ -761,9 +767,10 @@ out:
  }
  
  /*
- * Fetch just the mkvp value via query_crypto_facility from adapter.
+ * Fetch the current and old mkvp values via
+ * query_crypto_facility from adapter.
   */
-static int fetch_mkvp(u16 cardnr, u16 domain, u64 *mkvp)
+static int fetch_mkvp(u16 cardnr, u16 domain, u64 mkvp[2])
  {
         int rc, found = 0;
         size_t rlen, vlen;
@@ -779,9 +786,10 @@ static int fetch_mkvp(u16 cardnr, u16 domain, u64 *mkvp)
         rc = query_crypto_facility(cardnr, domain, "STATICSA",
                                    rarray, &rlen, varray, &vlen);
         if (rc == 0 && rlen > 8*8 && vlen > 184+8) {
-               if (rarray[64] == '2') {
+               if (rarray[8*8] == '2') {
                         /* current master key state is valid */
-                       *mkvp = *((u64 *)(varray + 184));
+                       mkvp[0] = *((u64 *)(varray + 184));
+                       mkvp[1] = *((u64 *)(varray + 172));
                         found = 1;
                 }
         }
@@ -796,14 +804,14 @@ struct mkvp_info {
         struct list_head list;
         u16 cardnr;
         u16 domain;
-       u64 mkvp;
+       u64 mkvp[2];
  };
  
  /* a list with mkvp_info entries */
  static LIST_HEAD(mkvp_list);
  static DEFINE_SPINLOCK(mkvp_list_lock);
  
-static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp)
+static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 mkvp[2])
  {
         int rc = -ENOENT;
         struct mkvp_info *ptr;
@@ -812,7 +820,7 @@ static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp)
         list_for_each_entry(ptr, &mkvp_list, list) {
                 if (ptr->cardnr == cardnr &&
                     ptr->domain == domain) {
-                       *mkvp = ptr->mkvp;
+                       memcpy(mkvp, ptr->mkvp, 2 * sizeof(u64));
                         rc = 0;
                         break;
                 }
@@ -822,7 +830,7 @@ static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp)
         return rc;
  }
  
-static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp)
+static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp[2])
  {
         int found = 0;
         struct mkvp_info *ptr;
@@ -831,7 +839,7 @@ static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp)
         list_for_each_entry(ptr, &mkvp_list, list) {
                 if (ptr->cardnr == cardnr &&
                     ptr->domain == domain) {
-                       ptr->mkvp = mkvp;
+                       memcpy(ptr->mkvp, mkvp, 2 * sizeof(u64));
                         found = 1;
                         break;
                 }
@@ -844,7 +852,7 @@ static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp)
                 }
                 ptr->cardnr = cardnr;
                 ptr->domain = domain;
-               ptr->mkvp = mkvp;
+               memcpy(ptr->mkvp, mkvp, 2 * sizeof(u64));
                 list_add(&ptr->list, &mkvp_list);
         }
         spin_unlock_bh(&mkvp_list_lock);
@@ -888,8 +896,8 @@ int pkey_findcard(const struct pkey_seckey *seckey,
         struct secaeskeytoken *t = (struct secaeskeytoken *) seckey;
         struct zcrypt_device_matrix *device_matrix;
         u16 card, dom;
-       u64 mkvp;
-       int i, rc;
+       u64 mkvp[2];
+       int i, rc, oi = -1;
  
         /* mkvp must not be zero */
         if (t->mkvp == 0)
@@ -910,14 +918,14 @@ int pkey_findcard(const struct pkey_seckey *seckey,
                     device_matrix->device[i].functions & 0x04) {
                         /* an enabled CCA Coprocessor card */
                         /* try cached mkvp */
-                       if (mkvp_cache_fetch(card, dom, &mkvp) == 0 &&
-                           t->mkvp == mkvp) {
+                       if (mkvp_cache_fetch(card, dom, mkvp) == 0 &&
+                           t->mkvp == mkvp[0]) {
                                 if (!verify)
                                         break;
                                 /* verify: fetch mkvp from adapter */
-                               if (fetch_mkvp(card, dom, &mkvp) == 0) {
+                               if (fetch_mkvp(card, dom, mkvp) == 0) {
                                         mkvp_cache_update(card, dom, mkvp);
-                                       if (t->mkvp == mkvp)
+                                       if (t->mkvp == mkvp[0])
                                                 break;
                                 }
                         }
@@ -936,14 +944,21 @@ int pkey_findcard(const struct pkey_seckey *seckey,
                         card = AP_QID_CARD(device_matrix->device[i].qid);
                         dom = AP_QID_QUEUE(device_matrix->device[i].qid);
                         /* fresh fetch mkvp from adapter */
-                       if (fetch_mkvp(card, dom, &mkvp) == 0) {
+                       if (fetch_mkvp(card, dom, mkvp) == 0) {
                                 mkvp_cache_update(card, dom, mkvp);
-                               if (t->mkvp == mkvp)
+                               if (t->mkvp == mkvp[0])
                                         break;
+                               if (t->mkvp == mkvp[1] && oi < 0)
+                                       oi = i;
                         }
                 }
+               if (i >= MAX_ZDEV_ENTRIES && oi >= 0) {
+                       /* old mkvp matched, use this card then */
+                       card = AP_QID_CARD(device_matrix->device[oi].qid);
+                       dom = AP_QID_QUEUE(device_matrix->device[oi].qid);
+               }
         }
-       if (i < MAX_ZDEV_ENTRIES) {
+       if (i < MAX_ZDEV_ENTRIES || oi >= 0) {
                 if (pcardnr)
                         *pcardnr = card;
                 if (pdomain)
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h

index e7addea8741b799066644052cba4e9a99f3a3335..d9561e39c3b237078a2c1fe888042e1321be7883 100644 (file)
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -961,7 +961,8 @@ int qeth_bridgeport_query_ports(struct qeth_card *card,
  int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role);
  int qeth_bridgeport_an_set(struct qeth_card *card, int enable);
  int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
-int qeth_get_elements_no(struct qeth_card *, struct sk_buff *, int);
+int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb,
+                        int extra_elems, int data_offset);
  int qeth_get_elements_for_frags(struct sk_buff *);
  int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *,
                         struct sk_buff *, struct qeth_hdr *, int, int, int);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c

index 315d8a2db7c066a0b8eb3739021edc9fde698c19..9a5f99ccb122bab3918f136f403bdec04b98d2f4 100644 (file)
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3837,6 +3837,7 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
   * @card:                      qeth card structure, to check max. elems.
   * @skb:                       SKB address
   * @extra_elems:               extra elems needed, to check against max.
+ * @data_offset:               range starts at skb->data + data_offset
   *
   * Returns the number of pages, and thus QDIO buffer elements, needed to cover
   * skb data, including linear part and fragments. Checks if the result plus
@@ -3844,10 +3845,10 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
   * Note: extra_elems is not included in the returned result.
   */
  int qeth_get_elements_no(struct qeth_card *card,
-                    struct sk_buff *skb, int extra_elems)
+                    struct sk_buff *skb, int extra_elems, int data_offset)
  {
         int elements = qeth_get_elements_for_range(
-                               (addr_t)skb->data,
+                               (addr_t)skb->data + data_offset,
                                 (addr_t)skb->data + skb_headlen(skb)) +
                         qeth_get_elements_for_frags(skb);
  
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c

index bea483307618996240cb90cc3382950ab8b38354..af4e6a639fecf20fc5d4c15b5a398ae4b460c4d5 100644 (file)
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -849,7 +849,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
          * chaining we can not send long frag lists
          */
         if ((card->info.type != QETH_CARD_TYPE_IQD) &&
-           !qeth_get_elements_no(card, new_skb, 0)) {
+           !qeth_get_elements_no(card, new_skb, 0, 0)) {
                 int lin_rc = skb_linearize(new_skb);
  
                 if (card->options.performance_stats) {
@@ -894,7 +894,8 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
                 }
         }
  
-       elements = qeth_get_elements_no(card, new_skb, elements_needed);
+       elements = qeth_get_elements_no(card, new_skb, elements_needed,
+                                       (data_offset > 0) ? data_offset : 0);
         if (!elements) {
                 if (data_offset >= 0)
                         kmem_cache_free(qeth_core_header_cache, hdr);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c

index 06d0addcc058dcccd4333a8f3edcdd96fccb2de4..653f0fb76573ab66c0178615ebcd81a5fe04c16e 100644 (file)
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2609,17 +2609,13 @@ static void qeth_l3_fill_af_iucv_hdr(struct qeth_card *card,
         char daddr[16];
         struct af_iucv_trans_hdr *iucv_hdr;
  
-       skb_pull(skb, 14);
-       card->dev->header_ops->create(skb, card->dev, 0,
-                                     card->dev->dev_addr, card->dev->dev_addr,
-                                     card->dev->addr_len);
-       skb_pull(skb, 14);
-       iucv_hdr = (struct af_iucv_trans_hdr *)skb->data;
         memset(hdr, 0, sizeof(struct qeth_hdr));
         hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3;
         hdr->hdr.l3.ext_flags = 0;
-       hdr->hdr.l3.length = skb->len;
+       hdr->hdr.l3.length = skb->len - ETH_HLEN;
         hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST;
+
+       iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN);
         memset(daddr, 0, sizeof(daddr));
         daddr[0] = 0xfe;
         daddr[1] = 0x80;
@@ -2823,10 +2819,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
         if ((card->info.type == QETH_CARD_TYPE_IQD) &&
             !skb_is_nonlinear(skb)) {
                 new_skb = skb;
-               if (new_skb->protocol == ETH_P_AF_IUCV)
-                       data_offset = 0;
-               else
-                       data_offset = ETH_HLEN;
+               data_offset = ETH_HLEN;
                 hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
                 if (!hdr)
                         goto tx_drop;
@@ -2867,7 +2860,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
          */
         if ((card->info.type != QETH_CARD_TYPE_IQD) &&
             ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) ||
-            (!use_tso && !qeth_get_elements_no(card, new_skb, 0)))) {
+            (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0)))) {
                 int lin_rc = skb_linearize(new_skb);
  
                 if (card->options.performance_stats) {
@@ -2909,7 +2902,8 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
  
         elements = use_tso ?
                    qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) :
-                  qeth_get_elements_no(card, new_skb, hdr_elements);
+                  qeth_get_elements_no(card, new_skb, hdr_elements,
+                                       (data_offset > 0) ? data_offset : 0);
         if (!elements) {
                 if (data_offset >= 0)
                         kmem_cache_free(qeth_core_header_cache, hdr);
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c

index 6ff61dad5e21570f27ed68474bbe60f9e7095948..62fed9dc893ef41ba6cdaafba34b24027a7c03f1 100644 (file)
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -183,11 +183,33 @@ static void jsfd_read(char *buf, unsigned long p, size_t togo) {
         }
  }
  
-static void jsfd_do_request(struct request_queue *q)
+static int jsfd_queue;
+
+static struct request *jsfd_next_request(void)
+{
+       struct request_queue *q;
+       struct request *rq;
+       int old_pos = jsfd_queue;
+
+       do {
+               q = jsfd_disk[jsfd_queue]->queue;
+               if (++jsfd_queue == JSF_MAX)
+                       jsfd_queue = 0;
+               if (q) {
+                       rq = blk_fetch_request(q);
+                       if (rq)
+                               return rq;
+               }
+       } while (jsfd_queue != old_pos);
+
+       return NULL;
+}
+
+static void jsfd_request(void)
  {
         struct request *req;
  
-       req = blk_fetch_request(q);
+       req = jsfd_next_request();
         while (req) {
                 struct jsfd_part *jdp = req->rq_disk->private_data;
                 unsigned long offset = blk_rq_pos(req) << 9;
@@ -211,10 +233,15 @@ static void jsfd_do_request(struct request_queue *q)
                 err = 0;
         end:
                 if (!__blk_end_request_cur(req, err))
-                       req = blk_fetch_request(q);
+                       req = jsfd_next_request();
         }
  }
  
+static void jsfd_do_request(struct request_queue *q)
+{
+       jsfd_request();
+}
+
  /*
   * The memory devices use the full 32/64 bits of the offset, and so we cannot
   * check against negative addresses: they are ok. The return value is weird,
@@ -544,8 +571,6 @@ static int jsflash_init(void)
         return 0;
  }
  
-static struct request_queue *jsf_queue;
-
  static int jsfd_init(void)
  {
         static DEFINE_SPINLOCK(lock);
@@ -562,6 +587,11 @@ static int jsfd_init(void)
                 struct gendisk *disk = alloc_disk(1);
                 if (!disk)
                         goto out;
+               disk->queue = blk_init_queue(jsfd_do_request, &lock);
+               if (!disk->queue) {
+                       put_disk(disk);
+                       goto out;
+               }
                 jsfd_disk[i] = disk;
         }
  
@@ -570,13 +600,6 @@ static int jsfd_init(void)
                 goto out;
         }
  
-       jsf_queue = blk_init_queue(jsfd_do_request, &lock);
-       if (!jsf_queue) {
-               err = -ENOMEM;
-               unregister_blkdev(JSFD_MAJOR, "jsfd");
-               goto out;
-       }
-
         for (i = 0; i < JSF_MAX; i++) {
                 struct gendisk *disk = jsfd_disk[i];
                 if ((i & JSF_PART_MASK) >= JSF_NPART) continue;
@@ -589,7 +612,6 @@ static int jsfd_init(void)
                 disk->fops = &jsfd_fops;
                 set_capacity(disk, jdp->dsize >> 9);
                 disk->private_data = jdp;
-               disk->queue = jsf_queue;
                 add_disk(disk);
                 set_disk_ro(disk, 1);
         }
@@ -619,6 +641,7 @@ static void __exit jsflash_cleanup_module(void)
         for (i = 0; i < JSF_MAX; i++) {
                 if ((i & JSF_PART_MASK) >= JSF_NPART) continue;
                 del_gendisk(jsfd_disk[i]);
+               blk_cleanup_queue(jsfd_disk[i]->queue);
                 put_disk(jsfd_disk[i]);
         }
         if (jsf0.busy)
@@ -628,7 +651,6 @@ static void __exit jsflash_cleanup_module(void)
  
         misc_deregister(&jsf_dev);
         unregister_blkdev(JSFD_MAJOR, "jsfd");
-       blk_cleanup_queue(jsf_queue);
  }
  
  module_init(jsflash_init_module);
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig

index 230043c1c90ffcfe5781bcdea23cc52dc9bc6732..3c52867dfe28e33b04f85858dfdb9285eef11ba2 100644 (file)
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1241,16 +1241,15 @@ config SCSI_LPFC
         tristate "Emulex LightPulse Fibre Channel Support"
         depends on PCI && SCSI
         depends on SCSI_FC_ATTRS
-       depends on NVME_FC && NVME_TARGET_FC
         select CRC_T10DIF
-       help
+       ---help---
            This lpfc driver supports the Emulex LightPulse
            Family of Fibre Channel PCI host adapters.
  
  config SCSI_LPFC_DEBUG_FS
         bool "Emulex LightPulse Fibre Channel debugfs Support"
         depends on SCSI_LPFC && DEBUG_FS
-       help
+       ---help---
           This makes debugging information from the lpfc driver
           available via the debugfs filesystem.
  
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile

index fc2855565a51fd7b65f318f4f96c4caaf059e001..93dbe58c47c845ddbe7faf25e540401f67d6c0ab 100644 (file)
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -166,6 +166,7 @@ scsi_mod-y                  += scsi_scan.o scsi_sysfs.o scsi_devinfo.o
  scsi_mod-$(CONFIG_SCSI_NETLINK)        += scsi_netlink.o
  scsi_mod-$(CONFIG_SYSCTL)      += scsi_sysctl.o
  scsi_mod-$(CONFIG_SCSI_PROC_FS)        += scsi_proc.o
+scsi_mod-$(CONFIG_BLK_DEBUG_FS)        += scsi_debugfs.o
  scsi_mod-y                     += scsi_trace.o scsi_logging.o
  scsi_mod-$(CONFIG_PM)          += scsi_pm.o
  scsi_mod-$(CONFIG_SCSI_DH)     += scsi_dh.o
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h

index d036a806f31c47917e2a35cac4a2666bfffdb3eb..d281492009fb4457131b5ef87b04b58d8f3d94c3 100644 (file)
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1690,9 +1690,6 @@ struct aac_dev
  #define aac_adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4) \
         (dev)->a_ops.adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4)
  
-#define aac_adapter_check_health(dev) \
-       (dev)->a_ops.adapter_check_health(dev)
-
  #define aac_adapter_restart(dev, bled, reset_type) \
         ((dev)->a_ops.adapter_restart(dev, bled, reset_type))
  
@@ -2615,6 +2612,14 @@ static inline unsigned int cap_to_cyls(sector_t capacity, unsigned divisor)
         return capacity;
  }
  
+static inline int aac_adapter_check_health(struct aac_dev *dev)
+{
+       if (unlikely(pci_channel_offline(dev->pdev)))
+               return -1;
+
+       return (dev)->a_ops.adapter_check_health(dev);
+}
+
  /* SCp.phase values */
  #define AAC_OWNER_MIDLEVEL     0x101
  #define AAC_OWNER_LOWLEVEL     0x102
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c

index a3ad042934870d4bd8bd5ded29a00516156d9168..1f4918355fdb00a9abab06da1e154b710fb86b0e 100644 (file)
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1873,7 +1873,8 @@ int aac_check_health(struct aac_dev * aac)
         spin_unlock_irqrestore(&aac->fib_lock, flagv);
  
         if (BlinkLED < 0) {
-               printk(KERN_ERR "%s: Host adapter dead %d\n", aac->name, BlinkLED);
+               printk(KERN_ERR "%s: Host adapter is dead (or got a PCI error) %d\n",
+                               aac->name, BlinkLED);
                 goto out;
         }
  
@@ -2056,7 +2057,6 @@ static int fillup_pools(struct aac_dev *dev, struct hw_fib **hw_fib_pool,
  {
         struct hw_fib **hw_fib_p;
         struct fib **fib_p;
-       int rcode = 1;
  
         hw_fib_p = hw_fib_pool;
         fib_p = fib_pool;
@@ -2074,11 +2074,11 @@ static int fillup_pools(struct aac_dev *dev, struct hw_fib **hw_fib_pool,
                 }
         }
  
+       /*
+        * Get the actual number of allocated fibs
+        */
         num = hw_fib_p - hw_fib_pool;
-       if (!num)
-               rcode = 0;
-
-       return rcode;
+       return num;
  }
  
  static void wakeup_fibctx_threads(struct aac_dev *dev,
@@ -2186,7 +2186,6 @@ static void aac_process_events(struct aac_dev *dev)
         struct fib *fib;
         unsigned long flags;
         spinlock_t *t_lock;
-       unsigned int rcode;
  
         t_lock = dev->queues->queue[HostNormCmdQueue].lock;
         spin_lock_irqsave(t_lock, flags);
@@ -2269,8 +2268,8 @@ static void aac_process_events(struct aac_dev *dev)
                  * Fill up fib pointer pools with actual fibs
                  * and hw_fibs
                  */
-               rcode = fillup_pools(dev, hw_fib_pool, fib_pool, num);
-               if (!rcode)
+               num = fillup_pools(dev, hw_fib_pool, fib_pool, num);
+               if (!num)
                         goto free_mem;
  
                 /*
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c

index 2e5338dec621fbff89c8a68acc3ba241173f3239..7b0410e0f569481cc40101fab38a768c7e274c8e 100644 (file)
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -468,7 +468,7 @@ err_out:
         return -1;
  
  err_blink:
-       return (status > 16) & 0xFF;
+       return (status >> 16) & 0xFF;
  }
  
  static inline u32 aac_get_vector(struct aac_dev *dev)
diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c

index 109e2c99e6c162e01a4b569292bad7b4e68fd3dc..95d8f25cbccab7056dc4c7967814cd5932fd3507 100644 (file)
--- a/drivers/scsi/aic7xxx/aic79xx_core.c
+++ b/drivers/scsi/aic7xxx/aic79xx_core.c
@@ -6278,7 +6278,7 @@ ahd_reset(struct ahd_softc *ahd, int reinit)
                  * does not disable its parity logic prior to
                  * the start of the reset.  This may cause a
                  * parity error to be detected and thus a
-                * spurious SERR or PERR assertion.  Disble
+                * spurious SERR or PERR assertion.  Disable
                  * PERR and SERR responses during the CHIPRST.
                  */
                 mod_cmd = cmd & ~(PCIM_CMD_PERRESPEN|PCIM_CMD_SERRESPEN);
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c

index 48e200102221c518dacba0a7137c77170e81c53a..c01b47e5b55a899a48b28553a41747b04d2903cc 100644 (file)
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -113,7 +113,7 @@ struct alua_queue_data {
  #define ALUA_POLICY_SWITCH_ALL         1
  
  static void alua_rtpg_work(struct work_struct *work);
-static void alua_rtpg_queue(struct alua_port_group *pg,
+static bool alua_rtpg_queue(struct alua_port_group *pg,
                             struct scsi_device *sdev,
                             struct alua_queue_data *qdata, bool force);
  static void alua_check(struct scsi_device *sdev, bool force);
@@ -862,7 +862,13 @@ static void alua_rtpg_work(struct work_struct *work)
         kref_put(&pg->kref, release_port_group);
  }
  
-static void alua_rtpg_queue(struct alua_port_group *pg,
+/**
+ * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
+ *
+ * Returns true if and only if alua_rtpg_work() will be called asynchronously.
+ * That function is responsible for calling @qdata->fn().
+ */
+static bool alua_rtpg_queue(struct alua_port_group *pg,
                             struct scsi_device *sdev,
                             struct alua_queue_data *qdata, bool force)
  {
@@ -870,8 +876,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
         unsigned long flags;
         struct workqueue_struct *alua_wq = kaluad_wq;
  
-       if (!pg)
-               return;
+       if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
+               return false;
  
         spin_lock_irqsave(&pg->lock, flags);
         if (qdata) {
@@ -884,14 +890,12 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
                 pg->flags |= ALUA_PG_RUN_RTPG;
                 kref_get(&pg->kref);
                 pg->rtpg_sdev = sdev;
-               scsi_device_get(sdev);
                 start_queue = 1;
         } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
                 pg->flags |= ALUA_PG_RUN_RTPG;
                 /* Do not queue if the worker is already running */
                 if (!(pg->flags & ALUA_PG_RUNNING)) {
                         kref_get(&pg->kref);
-                       sdev = NULL;
                         start_queue = 1;
                 }
         }
@@ -900,13 +904,17 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
                 alua_wq = kaluad_sync_wq;
         spin_unlock_irqrestore(&pg->lock, flags);
  
-       if (start_queue &&
-           !queue_delayed_work(alua_wq, &pg->rtpg_work,
-                               msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) {
-               if (sdev)
-                       scsi_device_put(sdev);
-               kref_put(&pg->kref, release_port_group);
+       if (start_queue) {
+               if (queue_delayed_work(alua_wq, &pg->rtpg_work,
+                               msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
+                       sdev = NULL;
+               else
+                       kref_put(&pg->kref, release_port_group);
         }
+       if (sdev)
+               scsi_device_put(sdev);
+
+       return true;
  }
  
  /*
@@ -1007,11 +1015,13 @@ static int alua_activate(struct scsi_device *sdev,
                 mutex_unlock(&h->init_mutex);
                 goto out;
         }
-       fn = NULL;
         rcu_read_unlock();
         mutex_unlock(&h->init_mutex);
  
-       alua_rtpg_queue(pg, sdev, qdata, true);
+       if (alua_rtpg_queue(pg, sdev, qdata, true))
+               fn = NULL;
+       else
+               err = SCSI_DH_DEV_OFFLINED;
         kref_put(&pg->kref, release_port_group);
  out:
         if (fn)
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c

index 524a0c755ed7e74cd790778ec7c04ae452cc853d..9d659aaace15d0e3ec28c69a14688b81a544d8b7 100644 (file)
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -2956,7 +2956,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
         /* fill_cmd can't fail here, no data buffer to map. */
         (void) fill_cmd(c, reset_type, h, NULL, 0, 0,
                         scsi3addr, TYPE_MSG);
-       rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT);
+       rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
         if (rc) {
                 dev_warn(&h->pdev->dev, "Failed to send reset command\n");
                 goto out;
@@ -3714,7 +3714,7 @@ exit_failed:
   *  # (integer code indicating one of several NOT READY states
   *     describing why a volume is to be kept offline)
   */
-static int hpsa_volume_offline(struct ctlr_info *h,
+static unsigned char hpsa_volume_offline(struct ctlr_info *h,
                                         unsigned char scsi3addr[])
  {
         struct CommandList *c;
@@ -3735,7 +3735,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
                                         DEFAULT_TIMEOUT);
         if (rc) {
                 cmd_free(h, c);
-               return 0;
+               return HPSA_VPD_LV_STATUS_UNSUPPORTED;
         }
         sense = c->err_info->SenseInfo;
         if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo))
@@ -3746,19 +3746,13 @@ static int hpsa_volume_offline(struct ctlr_info *h,
         cmd_status = c->err_info->CommandStatus;
         scsi_status = c->err_info->ScsiStatus;
         cmd_free(h, c);
-       /* Is the volume 'not ready'? */
-       if (cmd_status != CMD_TARGET_STATUS ||
-               scsi_status != SAM_STAT_CHECK_CONDITION ||
-               sense_key != NOT_READY ||
-               asc != ASC_LUN_NOT_READY)  {
-               return 0;
-       }
  
         /* Determine the reason for not ready state */
         ldstat = hpsa_get_volume_status(h, scsi3addr);
  
         /* Keep volume offline in certain cases: */
         switch (ldstat) {
+       case HPSA_LV_FAILED:
         case HPSA_LV_UNDERGOING_ERASE:
         case HPSA_LV_NOT_AVAILABLE:
         case HPSA_LV_UNDERGOING_RPI:
@@ -3780,7 +3774,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
         default:
                 break;
         }
-       return 0;
+       return HPSA_LV_OK;
  }
  
  /*
@@ -3853,10 +3847,10 @@ static int hpsa_update_device_info(struct ctlr_info *h,
         /* Do an inquiry to the device to see what it is. */
         if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
                 (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
-               /* Inquiry failed (msg printed already) */
                 dev_err(&h->pdev->dev,
-                       "hpsa_update_device_info: inquiry failed\n");
-               rc = -EIO;
+                       "%s: inquiry failed, device will be skipped.\n",
+                       __func__);
+               rc = HPSA_INQUIRY_FAILED;
                 goto bail_out;
         }
  
@@ -3885,15 +3879,20 @@ static int hpsa_update_device_info(struct ctlr_info *h,
         if ((this_device->devtype == TYPE_DISK ||
                 this_device->devtype == TYPE_ZBC) &&
                 is_logical_dev_addr_mode(scsi3addr)) {
-               int volume_offline;
+               unsigned char volume_offline;
  
                 hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level);
                 if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
                         hpsa_get_ioaccel_status(h, scsi3addr, this_device);
                 volume_offline = hpsa_volume_offline(h, scsi3addr);
-               if (volume_offline < 0 || volume_offline > 0xff)
-                       volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED;
-               this_device->volume_offline = volume_offline & 0xff;
+               this_device->volume_offline = volume_offline;
+               if (volume_offline == HPSA_LV_FAILED) {
+                       rc = HPSA_LV_FAILED;
+                       dev_err(&h->pdev->dev,
+                               "%s: LV failed, device will be skipped.\n",
+                               __func__);
+                       goto bail_out;
+               }
         } else {
                 this_device->raid_level = RAID_UNKNOWN;
                 this_device->offload_config = 0;
@@ -4379,8 +4378,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h)
                         goto out;
                 }
                 if (rc) {
-                       dev_warn(&h->pdev->dev,
-                               "Inquiry failed, skipping device.\n");
+                       h->drv_req_rescan = 1;
                         continue;
                 }
  
@@ -5558,7 +5556,7 @@ static void hpsa_scan_complete(struct ctlr_info *h)
  
         spin_lock_irqsave(&h->scan_lock, flags);
         h->scan_finished = 1;
-       wake_up_all(&h->scan_wait_queue);
+       wake_up(&h->scan_wait_queue);
         spin_unlock_irqrestore(&h->scan_lock, flags);
  }
  
@@ -5576,11 +5574,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
         if (unlikely(lockup_detected(h)))
                 return hpsa_scan_complete(h);
  
+       /*
+        * If a scan is already waiting to run, no need to add another
+        */
+       spin_lock_irqsave(&h->scan_lock, flags);
+       if (h->scan_waiting) {
+               spin_unlock_irqrestore(&h->scan_lock, flags);
+               return;
+       }
+
+       spin_unlock_irqrestore(&h->scan_lock, flags);
+
         /* wait until any scan already in progress is finished. */
         while (1) {
                 spin_lock_irqsave(&h->scan_lock, flags);
                 if (h->scan_finished)
                         break;
+               h->scan_waiting = 1;
                 spin_unlock_irqrestore(&h->scan_lock, flags);
                 wait_event(h->scan_wait_queue, h->scan_finished);
                 /* Note: We don't need to worry about a race between this
@@ -5590,6 +5600,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
                  */
         }
         h->scan_finished = 0; /* mark scan as in progress */
+       h->scan_waiting = 0;
         spin_unlock_irqrestore(&h->scan_lock, flags);
  
         if (unlikely(lockup_detected(h)))
@@ -8792,6 +8803,7 @@ reinit_after_soft_reset:
         init_waitqueue_head(&h->event_sync_wait_queue);
         mutex_init(&h->reset_mutex);
         h->scan_finished = 1; /* no scan currently in progress */
+       h->scan_waiting = 0;
  
         pci_set_drvdata(pdev, h);
         h->ndevices = 0;
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h

index bf6cdc1066544fa5fe2df6f5396d17ca4b4c8909..6f04f2ad412530a76d615b394250d502221d5457 100644 (file)
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -201,6 +201,7 @@ struct ctlr_info {
         dma_addr_t              errinfo_pool_dhandle;
         unsigned long           *cmd_pool_bits;
         int                     scan_finished;
+       u8                      scan_waiting : 1;
         spinlock_t              scan_lock;
         wait_queue_head_t       scan_wait_queue;
  
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h

index a584cdf0705846ef13a0375ecb2e1579513ecf92..5961705eef767526f66a6dbc1bbb1e7feec70c85 100644 (file)
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -156,6 +156,7 @@
  #define CFGTBL_BusType_Fibre2G  0x00000200l
  
  /* VPD Inquiry types */
+#define HPSA_INQUIRY_FAILED            0x02
  #define HPSA_VPD_SUPPORTED_PAGES        0x00
  #define HPSA_VPD_LV_DEVICE_ID           0x83
  #define HPSA_VPD_LV_DEVICE_GEOMETRY     0xC1
@@ -166,6 +167,7 @@
  /* Logical volume states */
  #define HPSA_VPD_LV_STATUS_UNSUPPORTED                 0xff
  #define HPSA_LV_OK                                      0x0
+#define HPSA_LV_FAILED                                 0x01
  #define HPSA_LV_NOT_AVAILABLE                          0x0b
  #define HPSA_LV_UNDERGOING_ERASE                       0x0F
  #define HPSA_LV_UNDERGOING_RPI                         0x12
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c

index b29afafc28857e95bffd8946598748907ab77b17..5d5e272fd815a3ed076eb52e2df47d3ff765fd3a 100644 (file)
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6293,7 +6293,12 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg,
                 break;
         case IPR_IOASC_MED_DO_NOT_REALLOC: /* prevent retries */
         case IPR_IOASA_IR_DUAL_IOA_DISABLED:
-               scsi_cmd->result |= (DID_PASSTHROUGH << 16);
+               /*
+                * exception: do not set DID_PASSTHROUGH on CHECK CONDITION
+                * so SCSI mid-layer and upper layers handle it accordingly.
+                */
+               if (scsi_cmd->result != SAM_STAT_CHECK_CONDITION)
+                       scsi_cmd->result |= (DID_PASSTHROUGH << 16);
                 break;
         case IPR_IOASC_BUS_WAS_RESET:
         case IPR_IOASC_BUS_WAS_RESET_BY_OTHER:
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c

index 07c08ce68d70af2fc09be51a3f4c8ba67c5a1d9d..894b1e3ebd56f4a141a8ed49a298819e3abbe8b2 100644 (file)
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -561,8 +561,12 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
         WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
         task->state = state;
  
-       if (!list_empty(&task->running))
+       spin_lock_bh(&conn->taskqueuelock);
+       if (!list_empty(&task->running)) {
+               pr_debug_once("%s while task on list", __func__);
                 list_del_init(&task->running);
+       }
+       spin_unlock_bh(&conn->taskqueuelock);
  
         if (conn->task == task)
                 conn->task = NULL;
@@ -784,7 +788,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                 if (session->tt->xmit_task(task))
                         goto free_task;
         } else {
+               spin_lock_bh(&conn->taskqueuelock);
                 list_add_tail(&task->running, &conn->mgmtqueue);
+               spin_unlock_bh(&conn->taskqueuelock);
                 iscsi_conn_queue_work(conn);
         }
  
@@ -1475,8 +1481,10 @@ void iscsi_requeue_task(struct iscsi_task *task)
          * this may be on the requeue list already if the xmit_task callout
          * is handling the r2ts while we are adding new ones
          */
+       spin_lock_bh(&conn->taskqueuelock);
         if (list_empty(&task->running))
                 list_add_tail(&task->running, &conn->requeue);
+       spin_unlock_bh(&conn->taskqueuelock);
         iscsi_conn_queue_work(conn);
  }
  EXPORT_SYMBOL_GPL(iscsi_requeue_task);
@@ -1513,22 +1521,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
          * only have one nop-out as a ping from us and targets should not
          * overflow us with nop-ins
          */
+       spin_lock_bh(&conn->taskqueuelock);
  check_mgmt:
         while (!list_empty(&conn->mgmtqueue)) {
                 conn->task = list_entry(conn->mgmtqueue.next,
                                          struct iscsi_task, running);
                 list_del_init(&conn->task->running);
+               spin_unlock_bh(&conn->taskqueuelock);
                 if (iscsi_prep_mgmt_task(conn, conn->task)) {
                         /* regular RX path uses back_lock */
                         spin_lock_bh(&conn->session->back_lock);
                         __iscsi_put_task(conn->task);
                         spin_unlock_bh(&conn->session->back_lock);
                         conn->task = NULL;
+                       spin_lock_bh(&conn->taskqueuelock);
                         continue;
                 }
                 rc = iscsi_xmit_task(conn);
                 if (rc)
                         goto done;
+               spin_lock_bh(&conn->taskqueuelock);
         }
  
         /* process pending command queue */
@@ -1536,19 +1548,24 @@ check_mgmt:
                 conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
                                         running);
                 list_del_init(&conn->task->running);
+               spin_unlock_bh(&conn->taskqueuelock);
                 if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
                         fail_scsi_task(conn->task, DID_IMM_RETRY);
+                       spin_lock_bh(&conn->taskqueuelock);
                         continue;
                 }
                 rc = iscsi_prep_scsi_cmd_pdu(conn->task);
                 if (rc) {
                         if (rc == -ENOMEM || rc == -EACCES) {
+                               spin_lock_bh(&conn->taskqueuelock);
                                 list_add_tail(&conn->task->running,
                                               &conn->cmdqueue);
                                 conn->task = NULL;
+                               spin_unlock_bh(&conn->taskqueuelock);
                                 goto done;
                         } else
                                 fail_scsi_task(conn->task, DID_ABORT);
+                       spin_lock_bh(&conn->taskqueuelock);
                         continue;
                 }
                 rc = iscsi_xmit_task(conn);
@@ -1559,6 +1576,7 @@ check_mgmt:
                  * we need to check the mgmt queue for nops that need to
                  * be sent to aviod starvation
                  */
+               spin_lock_bh(&conn->taskqueuelock);
                 if (!list_empty(&conn->mgmtqueue))
                         goto check_mgmt;
         }
@@ -1578,12 +1596,15 @@ check_mgmt:
                 conn->task = task;
                 list_del_init(&conn->task->running);
                 conn->task->state = ISCSI_TASK_RUNNING;
+               spin_unlock_bh(&conn->taskqueuelock);
                 rc = iscsi_xmit_task(conn);
                 if (rc)
                         goto done;
+               spin_lock_bh(&conn->taskqueuelock);
                 if (!list_empty(&conn->mgmtqueue))
                         goto check_mgmt;
         }
+       spin_unlock_bh(&conn->taskqueuelock);
         spin_unlock_bh(&conn->session->frwd_lock);
         return -ENODATA;
  
@@ -1739,7 +1760,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
                         goto prepd_reject;
                 }
         } else {
+               spin_lock_bh(&conn->taskqueuelock);
                 list_add_tail(&task->running, &conn->cmdqueue);
+               spin_unlock_bh(&conn->taskqueuelock);
                 iscsi_conn_queue_work(conn);
         }
  
@@ -2897,6 +2920,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
         INIT_LIST_HEAD(&conn->mgmtqueue);
         INIT_LIST_HEAD(&conn->cmdqueue);
         INIT_LIST_HEAD(&conn->requeue);
+       spin_lock_init(&conn->taskqueuelock);
         INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
  
         /* allocate login_task used for the login/text sequences */
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c

index 763f012fdeca00e21f086be86efa1ec12d65c5f0..87f5e694dbedd8a7eda74c1a5dccd0e9008e8c93 100644 (file)
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -221,7 +221,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
                 task->num_scatter = qc->n_elem;
         } else {
                 for_each_sg(qc->sg, sg, qc->n_elem, si)
-                       xfer += sg->length;
+                       xfer += sg_dma_len(sg);
  
                 task->total_xfer_len = xfer;
                 task->num_scatter = si;
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h

index 0bba2e30b4f09f62ef096ce73df629681a1646b6..6d7840b096e6f0899823e99d98e12153683e4f07 100644 (file)
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -56,7 +56,7 @@ struct lpfc_sli2_slim;
  #define LPFC_MAX_SG_SEG_CNT    4096    /* sg element count per scsi cmnd */
  #define LPFC_MAX_SGL_SEG_CNT   512     /* SGL element count per scsi cmnd */
  #define LPFC_MAX_BPL_SEG_CNT   4096    /* BPL element count per scsi cmnd */
-#define LPFC_MIN_NVME_SEG_CNT  254
+#define LPFC_MAX_NVME_SEG_CNT  128     /* max SGL element cnt per NVME cmnd */
  
  #define LPFC_MAX_SGE_SIZE       0x80000000 /* Maximum data allowed in a SGE */
  #define LPFC_IOCB_LIST_CNT     2250    /* list of IOCBs for fast-path usage. */
@@ -99,12 +99,13 @@ struct lpfc_sli2_slim;
  #define FC_MAX_ADPTMSG         64
  
  #define MAX_HBAEVT     32
+#define MAX_HBAS_NO_RESET 16
  
  /* Number of MSI-X vectors the driver uses */
  #define LPFC_MSIX_VECTORS      2
  
  /* lpfc wait event data ready flag */
-#define LPFC_DATA_READY                (1<<0)
+#define LPFC_DATA_READY                0       /* bit 0 */
  
  /* queue dump line buffer size */
  #define LPFC_LBUF_SZ           128
@@ -473,6 +474,8 @@ struct lpfc_vport {
         unsigned long rcv_buffer_time_stamp;
         uint32_t vport_flag;
  #define STATIC_VPORT   1
+#define FAWWPN_SET     2
+#define FAWWPN_PARAM_CHG       4
  
         uint16_t fdmi_num_disc;
         uint32_t fdmi_hba_mask;
@@ -692,6 +695,7 @@ struct lpfc_hba {
                                          * capability
                                          */
  #define HBA_NVME_IOQ_FLUSH      0x80000 /* NVME IO queues flushed. */
+#define NVME_XRI_ABORT_EVENT   0x100000
  
         uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
         struct lpfc_dmabuf slim2p;
@@ -779,6 +783,7 @@ struct lpfc_hba {
         uint32_t cfg_nvmet_fb_size;
         uint32_t cfg_total_seg_cnt;
         uint32_t cfg_sg_seg_cnt;
+       uint32_t cfg_nvme_seg_cnt;
         uint32_t cfg_sg_dma_buf_size;
         uint64_t cfg_soft_wwnn;
         uint64_t cfg_soft_wwpn;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c

index 5c783ef7f260612e881dd199bccd767d9179a99d..513fd07715cdf7ee65df3e588d0ecadb7c1f51d1 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -2292,6 +2292,8 @@ lpfc_soft_wwn_enable_store(struct device *dev, struct device_attribute *attr,
         struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
         struct lpfc_hba   *phba = vport->phba;
         unsigned int cnt = count;
+       uint8_t vvvl = vport->fc_sparam.cmn.valid_vendor_ver_level;
+       u32 *fawwpn_key = (uint32_t *)&vport->fc_sparam.un.vendorVersion[0];
  
         /*
          * We're doing a simple sanity check for soft_wwpn setting.
@@ -2305,6 +2307,12 @@ lpfc_soft_wwn_enable_store(struct device *dev, struct device_attribute *attr,
          * here. The intent is to protect against the random user or
          * application that is just writing attributes.
          */
+       if (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) {
+               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                "0051 "LPFC_DRIVER_NAME" soft wwpn can not"
+                                " be enabled: fawwpn is enabled\n");
+               return -EINVAL;
+       }
  
         /* count may include a LF at end of string */
         if (buf[cnt-1] == '\n')
@@ -3010,6 +3018,12 @@ MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:"
  static DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR,
                    lpfc_poll_show, lpfc_poll_store);
  
+int lpfc_no_hba_reset_cnt;
+unsigned long lpfc_no_hba_reset[MAX_HBAS_NO_RESET] = {
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+module_param_array(lpfc_no_hba_reset, ulong, &lpfc_no_hba_reset_cnt, 0444);
+MODULE_PARM_DESC(lpfc_no_hba_reset, "WWPN of HBAs that should not be reset");
+
  LPFC_ATTR(sli_mode, 0, 0, 3,
         "SLI mode selector:"
         " 0 - auto (SLI-3 if supported),"
@@ -3309,9 +3323,9 @@ LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
   * lpfc_enable_fc4_type: Defines what FC4 types are supported.
   * Supported Values:  1 - register just FCP
   *                    3 - register both FCP and NVME
- * Supported values are [1,3]. Default value is 3
+ * Supported values are [1,3]. Default value is 1
   */
-LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
+LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP,
             LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
             "Define fc4 type to register with fabric.");
  
@@ -3329,7 +3343,7 @@ LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
   * percentage will go to NVME.
   */
  LPFC_ATTR_R(xri_split, 50, 10, 90,
-            "Division of XRI resources between SCSI and NVME");
+           "Division of XRI resources between SCSI and NVME");
  
  /*
  # lpfc_log_verbose: Only turn this flag on if you are willing to risk being
@@ -4451,7 +4465,8 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
                 return -EINVAL;
  
         phba->cfg_fcp_imax = (uint32_t)val;
-       for (i = 0; i < phba->io_channel_irqs; i++)
+
+       for (i = 0; i < phba->io_channel_irqs; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
                 lpfc_modify_hba_eq_delay(phba, i);
  
         return strlen(buf);
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c

index 18157d2840a3b0d8fb4d51fff52844908fee298b..a1686c2d863c546d1d30342dd0a75200b82ff9b1 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -2486,6 +2486,10 @@ static int lpfcdiag_loop_self_reg(struct lpfc_hba *phba, uint16_t *rpi)
                                 mbox, *rpi);
         else {
                 *rpi = lpfc_sli4_alloc_rpi(phba);
+               if (*rpi == LPFC_RPI_ALLOC_ERROR) {
+                       mempool_free(mbox, phba->mbox_mem_pool);
+                       return -EBUSY;
+               }
                 status = lpfc_reg_rpi(phba, phba->pport->vpi,
                                 phba->pport->fc_myDID,
                                 (uint8_t *)&phba->pport->fc_sparam,
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h

index 843dd73004da0239089442a8591bd2c37e141f16..944b32ca493144f3fc66d625cc2f960ad484373d 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -24,6 +24,7 @@ typedef int (*node_filter)(struct lpfc_nodelist *, void *);
  
  struct fc_rport;
  struct fc_frame_header;
+struct lpfc_nvmet_rcv_ctx;
  void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
  void lpfc_sli_read_link_ste(struct lpfc_hba *);
  void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t);
@@ -99,7 +100,7 @@ void lpfc_issue_reg_vpi(struct lpfc_hba *, struct lpfc_vport *);
  
  int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *,
                         struct lpfc_iocbq *, struct lpfc_nodelist *);
-void lpfc_nlp_init(struct lpfc_vport *, struct lpfc_nodelist *, uint32_t);
+struct lpfc_nodelist *lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did);
  struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *);
  int  lpfc_nlp_put(struct lpfc_nodelist *);
  int  lpfc_nlp_not_used(struct lpfc_nodelist *ndlp);
@@ -245,6 +246,10 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
  void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
  struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba);
  void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab);
+void lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
+                       struct lpfc_dmabuf *mp);
+int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
+                              struct fc_frame_header *fc_hdr);
  void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
                         uint16_t);
  int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
@@ -302,6 +307,8 @@ int lpfc_sli_check_eratt(struct lpfc_hba *);
  void lpfc_sli_handle_slow_ring_event(struct lpfc_hba *,
                                     struct lpfc_sli_ring *, uint32_t);
  void lpfc_sli4_handle_received_buffer(struct lpfc_hba *, struct hbq_dmabuf *);
+void lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
+                            struct fc_frame_header *fc_hdr, bool aborted);
  void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
  void lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *, LPFC_MBOXQ_t *);
  int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t,
@@ -384,7 +391,7 @@ void lpfc_free_sysfs_attr(struct lpfc_vport *);
  extern struct device_attribute *lpfc_hba_attrs[];
  extern struct device_attribute *lpfc_vport_attrs[];
  extern struct scsi_host_template lpfc_template;
-extern struct scsi_host_template lpfc_template_s3;
+extern struct scsi_host_template lpfc_template_no_hr;
  extern struct scsi_host_template lpfc_template_nvme;
  extern struct scsi_host_template lpfc_vport_template;
  extern struct fc_function_template lpfc_transport_functions;
@@ -554,3 +561,5 @@ void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
                                 struct lpfc_wcqe_complete *abts_cmpl);
  extern int lpfc_enable_nvmet_cnt;
  extern unsigned long long lpfc_enable_nvmet[];
+extern int lpfc_no_hba_reset_cnt;
+extern unsigned long lpfc_no_hba_reset[];
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c

index c22bb3f887e15b767c8cc53334389cba053c50a9..1487406aea778411476b1d95f6c454c790c559bb 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -537,19 +537,53 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
         }
  }
  
+static void
+lpfc_ns_rsp_audit_did(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
+{
+       struct lpfc_hba *phba = vport->phba;
+       struct lpfc_nodelist *ndlp = NULL;
+       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+
+       /*
+        * To conserve rpi's, filter out addresses for other
+        * vports on the same physical HBAs.
+        */
+       if (Did != vport->fc_myDID &&
+           (!lpfc_find_vport_by_did(phba, Did) ||
+            vport->cfg_peer_port_login)) {
+               if (!phba->nvmet_support) {
+                       /* FCPI/NVMEI path. Process Did */
+                       lpfc_prep_node_fc4type(vport, Did, fc4_type);
+                       return;
+               }
+               /* NVMET path.  NVMET only cares about NVMEI nodes. */
+               list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+                       if (ndlp->nlp_type != NLP_NVME_INITIATOR ||
+                           ndlp->nlp_state != NLP_STE_UNMAPPED_NODE)
+                               continue;
+                       spin_lock_irq(shost->host_lock);
+                       if (ndlp->nlp_DID == Did)
+                               ndlp->nlp_flag &= ~NLP_NVMET_RECOV;
+                       else
+                               ndlp->nlp_flag |= NLP_NVMET_RECOV;
+                       spin_unlock_irq(shost->host_lock);
+               }
+       }
+}
+
  static int
  lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type,
             uint32_t Size)
  {
-       struct lpfc_hba  *phba = vport->phba;
         struct lpfc_sli_ct_request *Response =
                 (struct lpfc_sli_ct_request *) mp->virt;
-       struct lpfc_nodelist *ndlp = NULL;
         struct lpfc_dmabuf *mlast, *next_mp;
         uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType;
         uint32_t Did, CTentry;
         int Cnt;
         struct list_head head;
+       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+       struct lpfc_nodelist *ndlp = NULL;
  
         lpfc_set_disctmo(vport);
         vport->num_disc_nodes = 0;
@@ -574,19 +608,7 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type,
                         /* Get next DID from NameServer List */
                         CTentry = *ctptr++;
                         Did = ((be32_to_cpu(CTentry)) & Mask_DID);
-
-                       ndlp = NULL;
-
-                       /*
-                        * Check for rscn processing or not
-                        * To conserve rpi's, filter out addresses for other
-                        * vports on the same physical HBAs.
-                        */
-                       if ((Did != vport->fc_myDID) &&
-                           ((lpfc_find_vport_by_did(phba, Did) == NULL) ||
-                            vport->cfg_peer_port_login))
-                               lpfc_prep_node_fc4type(vport, Did, fc4_type);
-
+                       lpfc_ns_rsp_audit_did(vport, Did, fc4_type);
                         if (CTentry & (cpu_to_be32(SLI_CT_LAST_ENTRY)))
                                 goto nsout1;
  
@@ -596,6 +618,22 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type,
  
         }
  
+       /* All GID_FT entries processed.  If the driver is running in
+        * in target mode, put impacted nodes into recovery and drop
+        * the RPI to flush outstanding IO.
+        */
+       if (vport->phba->nvmet_support) {
+               list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+                       if (!(ndlp->nlp_flag & NLP_NVMET_RECOV))
+                               continue;
+                       lpfc_disc_state_machine(vport, ndlp, NULL,
+                                               NLP_EVT_DEVICE_RECOVERY);
+                       spin_lock_irq(shost->host_lock);
+                       ndlp->nlp_flag &= ~NLP_NVMET_RECOV;
+                       spin_lock_irq(shost->host_lock);
+               }
+       }
+
  nsout1:
         list_del(&head);
         return 0;
@@ -939,8 +977,8 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                          "FC4 x%08x, Data: x%08x x%08x\n",
                                          ndlp, did, ndlp->nlp_fc4_type,
                                          FC_TYPE_FCP, FC_TYPE_NVME);
+                       ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
                 }
-               ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
                 lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
                 lpfc_issue_els_prli(vport, ndlp, 0);
         } else
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c

index 9f4798e9d9380dab26d1a506fcf5fe1c43dd7493..fce549a91911c197d8e616bce9e0c13cb17de535 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -745,73 +745,102 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
  {
         struct lpfc_hba   *phba = vport->phba;
         struct lpfc_nvmet_tgtport *tgtp;
+       struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
         int len = 0;
+       int cnt;
  
         if (phba->nvmet_support) {
                 if (!phba->targetport)
                         return len;
                 tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-               len += snprintf(buf+len, size-len,
+               len += snprintf(buf + len, size - len,
                                 "\nNVME Targetport Statistics\n");
  
-               len += snprintf(buf+len, size-len,
+               len += snprintf(buf + len, size - len,
                                 "LS: Rcv %08x Drop %08x Abort %08x\n",
                                 atomic_read(&tgtp->rcv_ls_req_in),
                                 atomic_read(&tgtp->rcv_ls_req_drop),
                                 atomic_read(&tgtp->xmt_ls_abort));
                 if (atomic_read(&tgtp->rcv_ls_req_in) !=
                     atomic_read(&tgtp->rcv_ls_req_out)) {
-                       len += snprintf(buf+len, size-len,
+                       len += snprintf(buf + len, size - len,
                                         "Rcv LS: in %08x != out %08x\n",
                                         atomic_read(&tgtp->rcv_ls_req_in),
                                         atomic_read(&tgtp->rcv_ls_req_out));
                 }
  
-               len += snprintf(buf+len, size-len,
+               len += snprintf(buf + len, size - len,
                                 "LS: Xmt %08x Drop %08x Cmpl %08x Err %08x\n",
                                 atomic_read(&tgtp->xmt_ls_rsp),
                                 atomic_read(&tgtp->xmt_ls_drop),
                                 atomic_read(&tgtp->xmt_ls_rsp_cmpl),
                                 atomic_read(&tgtp->xmt_ls_rsp_error));
  
-               len += snprintf(buf+len, size-len,
+               len += snprintf(buf + len, size - len,
                                 "FCP: Rcv %08x Drop %08x\n",
                                 atomic_read(&tgtp->rcv_fcp_cmd_in),
                                 atomic_read(&tgtp->rcv_fcp_cmd_drop));
  
                 if (atomic_read(&tgtp->rcv_fcp_cmd_in) !=
                     atomic_read(&tgtp->rcv_fcp_cmd_out)) {
-                       len += snprintf(buf+len, size-len,
+                       len += snprintf(buf + len, size - len,
                                         "Rcv FCP: in %08x != out %08x\n",
                                         atomic_read(&tgtp->rcv_fcp_cmd_in),
                                         atomic_read(&tgtp->rcv_fcp_cmd_out));
                 }
  
-               len += snprintf(buf+len, size-len,
-                               "FCP Rsp: read %08x readrsp %08x write %08x rsp %08x\n",
+               len += snprintf(buf + len, size - len,
+                               "FCP Rsp: read %08x readrsp %08x "
+                               "write %08x rsp %08x\n",
                                 atomic_read(&tgtp->xmt_fcp_read),
                                 atomic_read(&tgtp->xmt_fcp_read_rsp),
                                 atomic_read(&tgtp->xmt_fcp_write),
                                 atomic_read(&tgtp->xmt_fcp_rsp));
  
-               len += snprintf(buf+len, size-len,
+               len += snprintf(buf + len, size - len,
                                 "FCP Rsp: abort %08x drop %08x\n",
                                 atomic_read(&tgtp->xmt_fcp_abort),
                                 atomic_read(&tgtp->xmt_fcp_drop));
  
-               len += snprintf(buf+len, size-len,
+               len += snprintf(buf + len, size - len,
                                 "FCP Rsp Cmpl: %08x err %08x drop %08x\n",
                                 atomic_read(&tgtp->xmt_fcp_rsp_cmpl),
                                 atomic_read(&tgtp->xmt_fcp_rsp_error),
                                 atomic_read(&tgtp->xmt_fcp_rsp_drop));
  
-               len += snprintf(buf+len, size-len,
+               len += snprintf(buf + len, size - len,
                                 "ABORT: Xmt %08x Err %08x Cmpl %08x",
                                 atomic_read(&tgtp->xmt_abort_rsp),
                                 atomic_read(&tgtp->xmt_abort_rsp_error),
                                 atomic_read(&tgtp->xmt_abort_cmpl));
  
-               len +=  snprintf(buf+len, size-len, "\n");
+               len +=  snprintf(buf + len, size - len, "\n");
+
+               cnt = 0;
+               spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+               list_for_each_entry_safe(ctxp, next_ctxp,
+                               &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+                               list) {
+                       cnt++;
+               }
+               spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+               if (cnt) {
+                       len += snprintf(buf + len, size - len,
+                                       "ABORT: %d ctx entries\n", cnt);
+                       spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+                       list_for_each_entry_safe(ctxp, next_ctxp,
+                                   &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+                                   list) {
+                               if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ))
+                                       break;
+                               len += snprintf(buf + len, size - len,
+                                               "Entry: oxid %x state %x "
+                                               "flag %x\n",
+                                               ctxp->oxid, ctxp->state,
+                                               ctxp->flag);
+                       }
+                       spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+               }
         } else {
                 if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
                         return len;
@@ -3128,8 +3157,6 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
                         datqp->queue_id, datqp->entry_count,
                         datqp->entry_size, datqp->host_index,
                         datqp->hba_index);
-       len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
-
         return len;
  }
  
@@ -3653,17 +3680,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
                         idiag.ptr_private = phba->sli4_hba.nvmels_cq;
                         goto pass_check;
                 }
-               /* NVME LS complete queue */
-               if (phba->sli4_hba.nvmels_cq &&
-                   phba->sli4_hba.nvmels_cq->queue_id == queid) {
-                       /* Sanity check */
-                       rc = lpfc_idiag_que_param_check(
-                                       phba->sli4_hba.nvmels_cq, index, count);
-                       if (rc)
-                               goto error_out;
-                       idiag.ptr_private = phba->sli4_hba.nvmels_cq;
-                       goto pass_check;
-               }
                 /* FCP complete queue */
                 if (phba->sli4_hba.fcp_cq) {
                         for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
@@ -3738,17 +3754,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
                         idiag.ptr_private = phba->sli4_hba.nvmels_wq;
                         goto pass_check;
                 }
-               /* NVME LS work queue */
-               if (phba->sli4_hba.nvmels_wq &&
-                   phba->sli4_hba.nvmels_wq->queue_id == queid) {
-                       /* Sanity check */
-                       rc = lpfc_idiag_que_param_check(
-                                       phba->sli4_hba.nvmels_wq, index, count);
-                       if (rc)
-                               goto error_out;
-                       idiag.ptr_private = phba->sli4_hba.nvmels_wq;
-                       goto pass_check;
-               }
                 /* FCP work queue */
                 if (phba->sli4_hba.fcp_wq) {
                         for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
@@ -5722,10 +5727,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
  #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
         struct lpfc_hba   *phba = vport->phba;
  
-       if (vport->disc_trc) {
-               kfree(vport->disc_trc);
-               vport->disc_trc = NULL;
-       }
+       kfree(vport->disc_trc);
+       vport->disc_trc = NULL;
  
         debugfs_remove(vport->debug_disc_trc); /* discovery_trace */
         vport->debug_disc_trc = NULL;
@@ -5792,10 +5795,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
                 debugfs_remove(phba->debug_readRef); /* readRef */
                 phba->debug_readRef = NULL;
  
-               if (phba->slow_ring_trc) {
-                       kfree(phba->slow_ring_trc);
-                       phba->slow_ring_trc = NULL;
-               }
+               kfree(phba->slow_ring_trc);
+               phba->slow_ring_trc = NULL;
  
                 /* slow_ring_trace */
                 debugfs_remove(phba->debug_slow_ring_trc);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h

index c05f56c3023f1edb2ace78b50de25ef99f4b829e..7b7d314af0e0878f4a37eef337ebf29de84a3d1c 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -44,14 +44,6 @@
  /* hbqinfo output buffer size */
  #define LPFC_HBQINFO_SIZE 8192
  
-enum {
-       DUMP_FCP,
-       DUMP_NVME,
-       DUMP_MBX,
-       DUMP_ELS,
-       DUMP_NVMELS,
-};
-
  /* nvmestat output buffer size */
  #define LPFC_NVMESTAT_SIZE 8192
  #define LPFC_NVMEKTIME_SIZE 8192
@@ -283,8 +275,22 @@ struct lpfc_idiag {
         struct lpfc_idiag_offset offset;
         void *ptr_private;
  };
+
+#else
+
+#define lpfc_nvmeio_data(phba, fmt, arg...) \
+       no_printk(fmt, ##arg)
+
  #endif
  
+enum {
+       DUMP_FCP,
+       DUMP_NVME,
+       DUMP_MBX,
+       DUMP_ELS,
+       DUMP_NVMELS,
+};
+
  /* Mask for discovery_trace */
  #define LPFC_DISC_TRC_ELS_CMD          0x1     /* Trace ELS commands */
  #define LPFC_DISC_TRC_ELS_RSP          0x2     /* Trace ELS response */
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h

index f4ff99d95db3433a84851a33d81737ac35452079..9d5a379f4b15734a484c643fc9cc81b0ba8b33e9 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -157,6 +157,7 @@ struct lpfc_node_rrq {
  #define NLP_LOGO_SND       0x00000100  /* sent LOGO request for this entry */
  #define NLP_RNID_SND       0x00000400  /* sent RNID request for this entry */
  #define NLP_ELS_SND_MASK   0x000007e0  /* sent ELS request for this entry */
+#define NLP_NVMET_RECOV    0x00001000   /* NVMET auditing node for recovery. */
  #define NLP_DEFER_RM       0x00010000  /* Remove this ndlp if no longer used */
  #define NLP_DELAY_TMO      0x00020000  /* delay timeout is running for node */
  #define NLP_NPR_2B_DISC    0x00040000  /* node is included in num_disc_nodes */
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c

index 2d26440e6f2fe6b3d9d2bc457c112397601409cf..67827e397431abe8b55955d9cc6497cbf680c054 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -603,9 +603,11 @@ lpfc_check_clean_addr_bit(struct lpfc_vport *vport,
                 memcmp(&vport->fabric_portname, &sp->portName,
                         sizeof(struct lpfc_name)) ||
                 memcmp(&vport->fabric_nodename, &sp->nodeName,
-                       sizeof(struct lpfc_name)))
+                       sizeof(struct lpfc_name)) ||
+               (vport->vport_flag & FAWWPN_PARAM_CHG)) {
                 fabric_param_changed = 1;
-
+               vport->vport_flag &= ~FAWWPN_PARAM_CHG;
+       }
         /*
          * Word 1 Bit 31 in common service parameter is overloaded.
          * Word 1 Bit 31 in FLOGI request is multiple NPort request
@@ -895,10 +897,9 @@ lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                          * Cannot find existing Fabric ndlp, so allocate a
                          * new one
                          */
-                       ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+                       ndlp = lpfc_nlp_init(vport, PT2PT_RemoteID);
                         if (!ndlp)
                                 goto fail;
-                       lpfc_nlp_init(vport, ndlp, PT2PT_RemoteID);
                 } else if (!NLP_CHK_NODE_ACT(ndlp)) {
                         ndlp = lpfc_enable_node(vport, ndlp,
                                                 NLP_STE_UNUSED_NODE);
@@ -1364,7 +1365,6 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba)
  int
  lpfc_initial_flogi(struct lpfc_vport *vport)
  {
-       struct lpfc_hba *phba = vport->phba;
         struct lpfc_nodelist *ndlp;
  
         vport->port_state = LPFC_FLOGI;
@@ -1374,10 +1374,9 @@ lpfc_initial_flogi(struct lpfc_vport *vport)
         ndlp = lpfc_findnode_did(vport, Fabric_DID);
         if (!ndlp) {
                 /* Cannot find existing Fabric ndlp, so allocate a new one */
-               ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+               ndlp = lpfc_nlp_init(vport, Fabric_DID);
                 if (!ndlp)
                         return 0;
-               lpfc_nlp_init(vport, ndlp, Fabric_DID);
                 /* Set the node type */
                 ndlp->nlp_type |= NLP_FABRIC;
                 /* Put ndlp onto node list */
@@ -1418,17 +1417,15 @@ lpfc_initial_flogi(struct lpfc_vport *vport)
  int
  lpfc_initial_fdisc(struct lpfc_vport *vport)
  {
-       struct lpfc_hba *phba = vport->phba;
         struct lpfc_nodelist *ndlp;
  
         /* First look for the Fabric ndlp */
         ndlp = lpfc_findnode_did(vport, Fabric_DID);
         if (!ndlp) {
                 /* Cannot find existing Fabric ndlp, so allocate a new one */
-               ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+               ndlp = lpfc_nlp_init(vport, Fabric_DID);
                 if (!ndlp)
                         return 0;
-               lpfc_nlp_init(vport, ndlp, Fabric_DID);
                 /* Put ndlp onto node list */
                 lpfc_enqueue_node(vport, ndlp);
         } else if (!NLP_CHK_NODE_ACT(ndlp)) {
@@ -1564,14 +1561,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
                                              phba->active_rrq_pool);
                         return ndlp;
                 }
-               new_ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC);
+               new_ndlp = lpfc_nlp_init(vport, ndlp->nlp_DID);
                 if (!new_ndlp) {
                         if (active_rrqs_xri_bitmap)
                                 mempool_free(active_rrqs_xri_bitmap,
                                              phba->active_rrq_pool);
                         return ndlp;
                 }
-               lpfc_nlp_init(vport, new_ndlp, ndlp->nlp_DID);
         } else if (!NLP_CHK_NODE_ACT(new_ndlp)) {
                 rc = memcmp(&ndlp->nlp_portname, name,
                             sizeof(struct lpfc_name));
@@ -2845,10 +2841,9 @@ lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
  
         ndlp = lpfc_findnode_did(vport, nportid);
         if (!ndlp) {
-               ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+               ndlp = lpfc_nlp_init(vport, nportid);
                 if (!ndlp)
                         return 1;
-               lpfc_nlp_init(vport, ndlp, nportid);
                 lpfc_enqueue_node(vport, ndlp);
         } else if (!NLP_CHK_NODE_ACT(ndlp)) {
                 ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE);
@@ -2938,10 +2933,9 @@ lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
  
         ndlp = lpfc_findnode_did(vport, nportid);
         if (!ndlp) {
-               ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+               ndlp = lpfc_nlp_init(vport, nportid);
                 if (!ndlp)
                         return 1;
-               lpfc_nlp_init(vport, ndlp, nportid);
                 lpfc_enqueue_node(vport, ndlp);
         } else if (!NLP_CHK_NODE_ACT(ndlp)) {
                 ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE);
@@ -4403,7 +4397,7 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
         pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
         memset(pcmd, 0, cmdsize);
  
-       *((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK));
+       *((uint32_t *)(pcmd)) = elsrspcmd;
         pcmd += sizeof(uint32_t);
  
         /* For PRLI, remainder of payload is PRLI parameter page */
@@ -5177,15 +5171,15 @@ lpfc_rdp_res_speed(struct fc_rdp_port_speed_desc *desc, struct lpfc_hba *phba)
  
  static uint32_t
  lpfc_rdp_res_diag_port_names(struct fc_rdp_port_name_desc *desc,
-               struct lpfc_hba *phba)
+               struct lpfc_vport *vport)
  {
  
         desc->tag = cpu_to_be32(RDP_PORT_NAMES_DESC_TAG);
  
-       memcpy(desc->port_names.wwnn, phba->wwnn,
+       memcpy(desc->port_names.wwnn, &vport->fc_nodename,
                         sizeof(desc->port_names.wwnn));
  
-       memcpy(desc->port_names.wwpn, phba->wwpn,
+       memcpy(desc->port_names.wwpn, &vport->fc_portname,
                         sizeof(desc->port_names.wwpn));
  
         desc->length = cpu_to_be32(sizeof(desc->port_names));
@@ -5279,7 +5273,7 @@ lpfc_els_rdp_cmpl(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context,
         len += lpfc_rdp_res_link_error((struct fc_rdp_link_error_status_desc *)
                                        (len + pcmd), &rdp_context->link_stat);
         len += lpfc_rdp_res_diag_port_names((struct fc_rdp_port_name_desc *)
-                                            (len + pcmd), phba);
+                                            (len + pcmd), vport);
         len += lpfc_rdp_res_attach_port_names((struct fc_rdp_port_name_desc *)
                                         (len + pcmd), vport, ndlp);
         len += lpfc_rdp_res_fec_desc((struct fc_fec_rdp_desc *)(len + pcmd),
@@ -5867,8 +5861,11 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport)
                     (ndlp->nlp_state == NLP_STE_UNUSED_NODE) ||
                     !lpfc_rscn_payload_check(vport, ndlp->nlp_DID))
                         continue;
+
+               /* NVME Target mode does not do RSCN Recovery. */
                 if (vport->phba->nvmet_support)
                         continue;
+
                 lpfc_disc_state_machine(vport, ndlp, NULL,
                                         NLP_EVT_DEVICE_RECOVERY);
                 lpfc_cancel_retry_delay_tmo(vport, ndlp);
@@ -6133,7 +6130,6 @@ int
  lpfc_els_handle_rscn(struct lpfc_vport *vport)
  {
         struct lpfc_nodelist *ndlp;
-       struct lpfc_hba *phba = vport->phba;
  
         /* Ignore RSCN if the port is being torn down. */
         if (vport->load_flag & FC_UNLOADING) {
@@ -6157,22 +6153,16 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport)
         ndlp = lpfc_findnode_did(vport, NameServer_DID);
         if (ndlp && NLP_CHK_NODE_ACT(ndlp)
             && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
-               /* Good ndlp, issue CT Request to NameServer */
+               /* Good ndlp, issue CT Request to NameServer.  Need to
+                * know how many gidfts were issued.  If none, then just
+                * flush the RSCN.  Otherwise, the outstanding requests
+                * need to complete.
+                */
                 vport->gidft_inp = 0;
-               if (lpfc_issue_gidft(vport) == 0)
-                       /* Wait for NameServer query cmpl before we can
-                        * continue
-                        */
+               if (lpfc_issue_gidft(vport) > 0)
                         return 1;
         } else {
-               /* If login to NameServer does not exist, issue one */
-               /* Good status, issue PLOGI to NameServer */
-               ndlp = lpfc_findnode_did(vport, NameServer_DID);
-               if (ndlp && NLP_CHK_NODE_ACT(ndlp))
-                       /* Wait for NameServer login cmpl before we can
-                          continue */
-                       return 1;
-
+               /* Nameserver login in question.  Revalidate. */
                 if (ndlp) {
                         ndlp = lpfc_enable_node(vport, ndlp,
                                                 NLP_STE_PLOGI_ISSUE);
@@ -6182,12 +6172,11 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport)
                         }
                         ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE;
                 } else {
-                       ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+                       ndlp = lpfc_nlp_init(vport, NameServer_DID);
                         if (!ndlp) {
                                 lpfc_els_flush_rscn(vport);
                                 return 0;
                         }
-                       lpfc_nlp_init(vport, ndlp, NameServer_DID);
                         ndlp->nlp_prev_state = ndlp->nlp_state;
                         lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
                 }
@@ -7746,11 +7735,9 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
         ndlp = lpfc_findnode_did(vport, did);
         if (!ndlp) {
                 /* Cannot find existing Fabric ndlp, so allocate a new one */
-               ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+               ndlp = lpfc_nlp_init(vport, did);
                 if (!ndlp)
                         goto dropit;
-
-               lpfc_nlp_init(vport, ndlp, did);
                 lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
                 newnode = 1;
                 if ((did & Fabric_DID_MASK) == Fabric_DID_MASK)
@@ -7968,7 +7955,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                         did, vport->port_state, ndlp->nlp_flag);
  
                 phba->fc_stat.elsRcvPRLI++;
-               if (vport->port_state < LPFC_DISC_AUTH) {
+               if ((vport->port_state < LPFC_DISC_AUTH) &&
+                   (vport->fc_flag & FC_FABRIC)) {
                         rjt_err = LSRJT_UNABLE_TPC;
                         rjt_exp = LSEXP_NOTHING_MORE;
                         break;
@@ -8192,7 +8180,6 @@ lpfc_els_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
  static void
  lpfc_start_fdmi(struct lpfc_vport *vport)
  {
-       struct lpfc_hba *phba = vport->phba;
         struct lpfc_nodelist *ndlp;
  
         /* If this is the first time, allocate an ndlp and initialize
@@ -8201,9 +8188,8 @@ lpfc_start_fdmi(struct lpfc_vport *vport)
          */
         ndlp = lpfc_findnode_did(vport, FDMI_DID);
         if (!ndlp) {
-               ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+               ndlp = lpfc_nlp_init(vport, FDMI_DID);
                 if (ndlp) {
-                       lpfc_nlp_init(vport, ndlp, FDMI_DID);
                         ndlp->nlp_type |= NLP_FABRIC;
                 } else {
                         return;
@@ -8256,7 +8242,7 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport)
  
         ndlp = lpfc_findnode_did(vport, NameServer_DID);
         if (!ndlp) {
-               ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+               ndlp = lpfc_nlp_init(vport, NameServer_DID);
                 if (!ndlp) {
                         if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
                                 lpfc_disc_start(vport);
@@ -8267,7 +8253,6 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport)
                                          "0251 NameServer login: no memory\n");
                         return;
                 }
-               lpfc_nlp_init(vport, ndlp, NameServer_DID);
         } else if (!NLP_CHK_NODE_ACT(ndlp)) {
                 ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE);
                 if (!ndlp) {
@@ -8371,11 +8356,17 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                         spin_lock_irq(shost->host_lock);
                         vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
                         spin_unlock_irq(shost->host_lock);
-                       if (vport->port_type == LPFC_PHYSICAL_PORT
-                               && !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG))
-                               lpfc_issue_init_vfi(vport);
-                       else
+                       if (mb->mbxStatus == MBX_NOT_FINISHED)
+                               break;
+                       if ((vport->port_type == LPFC_PHYSICAL_PORT) &&
+                           !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) {
+                               if (phba->sli_rev == LPFC_SLI_REV4)
+                                       lpfc_issue_init_vfi(vport);
+                               else
+                                       lpfc_initial_flogi(vport);
+                       } else {
                                 lpfc_initial_fdisc(vport);
+                       }
                         break;
                 }
         } else {
@@ -8764,7 +8755,7 @@ lpfc_issue_els_fdisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
         pcmd += sizeof(uint32_t); /* Node Name */
         pcmd += sizeof(uint32_t); /* Node Name */
         memcpy(pcmd, &vport->fc_nodename, 8);
-
+       memset(sp->un.vendorVersion, 0, sizeof(sp->un.vendorVersion));
         lpfc_set_disctmo(vport);
  
         phba->fc_stat.elsXmitFDISC++;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c

index 194a14d5f8a9821bf7c3ddc562049db294ce3e07..0482c558033104d3a44f75290750e1acdc3ee0d9 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -313,8 +313,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
                                  ndlp->nlp_state, ndlp->nlp_rpi);
         }
  
-       if (!(vport->load_flag & FC_UNLOADING) &&
-           !(ndlp->nlp_flag & NLP_DELAY_TMO) &&
+       if (!(ndlp->nlp_flag & NLP_DELAY_TMO) &&
             !(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
             (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
             (ndlp->nlp_state != NLP_STE_REG_LOGIN_ISSUE) &&
@@ -641,6 +640,8 @@ lpfc_work_done(struct lpfc_hba *phba)
                         lpfc_handle_rrq_active(phba);
                 if (phba->hba_flag & FCP_XRI_ABORT_EVENT)
                         lpfc_sli4_fcp_xri_abort_event_proc(phba);
+               if (phba->hba_flag & NVME_XRI_ABORT_EVENT)
+                       lpfc_sli4_nvme_xri_abort_event_proc(phba);
                 if (phba->hba_flag & ELS_XRI_ABORT_EVENT)
                         lpfc_sli4_els_xri_abort_event_proc(phba);
                 if (phba->hba_flag & ASYNC_EVENT)
@@ -2173,7 +2174,7 @@ lpfc_mbx_cmpl_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
         uint32_t boot_flag, addr_mode;
         uint16_t fcf_index, next_fcf_index;
         struct lpfc_fcf_rec *fcf_rec = NULL;
-       uint16_t vlan_id;
+       uint16_t vlan_id = LPFC_FCOE_NULL_VID;
         bool select_new_fcf;
         int rc;
  
@@ -3001,6 +3002,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
         MAILBOX_t *mb = &pmb->u.mb;
         struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1;
         struct lpfc_vport  *vport = pmb->vport;
+       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
         struct serv_parm *sp = &vport->fc_sparam;
         uint32_t ed_tov;
  
@@ -3030,6 +3032,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
         }
  
         lpfc_update_vport_wwn(vport);
+       fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn);
         if (vport->port_type == LPFC_PHYSICAL_PORT) {
                 memcpy(&phba->wwnn, &vport->fc_nodename, sizeof(phba->wwnn));
                 memcpy(&phba->wwpn, &vport->fc_portname, sizeof(phba->wwnn));
@@ -3308,6 +3311,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
         struct lpfc_sli_ring *pring;
         MAILBOX_t *mb = &pmb->u.mb;
         struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
+       uint8_t attn_type;
  
         /* Unblock ELS traffic */
         pring = lpfc_phba_elsring(phba);
@@ -3324,6 +3328,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
         }
  
         la = (struct lpfc_mbx_read_top *) &pmb->u.mb.un.varReadTop;
+       attn_type = bf_get(lpfc_mbx_read_top_att_type, la);
  
         memcpy(&phba->alpa_map[0], mp->virt, 128);
  
@@ -3336,7 +3341,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
  
         if (phba->fc_eventTag <= la->eventTag) {
                 phba->fc_stat.LinkMultiEvent++;
-               if (bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP)
+               if (attn_type == LPFC_ATT_LINK_UP)
                         if (phba->fc_eventTag != 0)
                                 lpfc_linkdown(phba);
         }
@@ -3352,7 +3357,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
         }
  
         phba->link_events++;
-       if ((bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP) &&
+       if ((attn_type == LPFC_ATT_LINK_UP) &&
             !(phba->sli.sli_flag & LPFC_MENLO_MAINT)) {
                 phba->fc_stat.LinkUp++;
                 if (phba->link_flag & LS_LOOPBACK_MODE) {
@@ -3378,8 +3383,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                                         phba->wait_4_mlo_maint_flg);
                 }
                 lpfc_mbx_process_link_up(phba, la);
-       } else if (bf_get(lpfc_mbx_read_top_att_type, la) ==
-                  LPFC_ATT_LINK_DOWN) {
+       } else if (attn_type == LPFC_ATT_LINK_DOWN ||
+                  attn_type == LPFC_ATT_UNEXP_WWPN) {
                 phba->fc_stat.LinkDown++;
                 if (phba->link_flag & LS_LOOPBACK_MODE)
                         lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
@@ -3388,6 +3393,14 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                                 "Data: x%x x%x x%x\n",
                                 la->eventTag, phba->fc_eventTag,
                                 phba->pport->port_state, vport->fc_flag);
+               else if (attn_type == LPFC_ATT_UNEXP_WWPN)
+                       lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
+                               "1313 Link Down UNEXP WWPN Event x%x received "
+                               "Data: x%x x%x x%x x%x x%x\n",
+                               la->eventTag, phba->fc_eventTag,
+                               phba->pport->port_state, vport->fc_flag,
+                               bf_get(lpfc_mbx_read_top_mm, la),
+                               bf_get(lpfc_mbx_read_top_fa, la));
                 else
                         lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
                                 "1305 Link Down Event x%x received "
@@ -3398,8 +3411,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                                 bf_get(lpfc_mbx_read_top_fa, la));
                 lpfc_mbx_issue_link_down(phba);
         }
-       if ((phba->sli.sli_flag & LPFC_MENLO_MAINT) &&
-           ((bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP))) {
+       if (phba->sli.sli_flag & LPFC_MENLO_MAINT &&
+           attn_type == LPFC_ATT_LINK_UP) {
                 if (phba->link_state != LPFC_LINK_DOWN) {
                         phba->fc_stat.LinkDown++;
                         lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
@@ -4020,9 +4033,11 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                 rdata = rport->dd_data;
                 /* break the link before dropping the ref */
                 ndlp->rport = NULL;
-               if (rdata && rdata->pnode == ndlp)
-                       lpfc_nlp_put(ndlp);
-               rdata->pnode = NULL;
+               if (rdata) {
+                       if (rdata->pnode == ndlp)
+                               lpfc_nlp_put(ndlp);
+                       rdata->pnode = NULL;
+               }
                 /* drop reference for earlier registeration */
                 put_device(&rport->dev);
         }
@@ -4133,7 +4148,6 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                        int old_state, int new_state)
  {
         struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
-       struct lpfc_hba *phba = vport->phba;
  
         if (new_state == NLP_STE_UNMAPPED_NODE) {
                 ndlp->nlp_flag &= ~NLP_NODEV_REMOVE;
@@ -4152,14 +4166,14 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                         lpfc_unregister_remote_port(ndlp);
                 }
  
-               /* Notify the NVME transport of this rport's loss */
-               if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-                    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
-                   (vport->phba->nvmet_support == 0) &&
-                   ((ndlp->nlp_fc4_type & NLP_FC4_NVME) ||
-                   (ndlp->nlp_DID == Fabric_DID))) {
+               /* Notify the NVME transport of this rport's loss on the
+                * Initiator.  For NVME Target, should upcall transport
+                * in the else clause when API available.
+                */
+               if (ndlp->nlp_fc4_type & NLP_FC4_NVME) {
                         vport->phba->nport_event_cnt++;
-                       lpfc_nvme_unregister_port(vport, ndlp);
+                       if (vport->phba->nvmet_support == 0)
+                               lpfc_nvme_unregister_port(vport, ndlp);
                 }
         }
  
@@ -4344,9 +4358,8 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
  {
         INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp);
         INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp);
-       init_timer(&ndlp->nlp_delayfunc);
-       ndlp->nlp_delayfunc.function = lpfc_els_retry_delay;
-       ndlp->nlp_delayfunc.data = (unsigned long)ndlp;
+       setup_timer(&ndlp->nlp_delayfunc, lpfc_els_retry_delay,
+                       (unsigned long)ndlp);
         ndlp->nlp_DID = did;
         ndlp->vport = vport;
         ndlp->phba = vport->phba;
@@ -4366,10 +4379,17 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
         uint32_t did;
         unsigned long flags;
         unsigned long *active_rrqs_xri_bitmap = NULL;
+       int rpi = LPFC_RPI_ALLOC_ERROR;
  
         if (!ndlp)
                 return NULL;
  
+       if (phba->sli_rev == LPFC_SLI_REV4) {
+               rpi = lpfc_sli4_alloc_rpi(vport->phba);
+               if (rpi == LPFC_RPI_ALLOC_ERROR)
+                       return NULL;
+       }
+
         spin_lock_irqsave(&phba->ndlp_lock, flags);
         /* The ndlp should not be in memory free mode */
         if (NLP_CHK_FREE_REQ(ndlp)) {
@@ -4379,7 +4399,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                                 "usgmap:x%x refcnt:%d\n",
                                 (void *)ndlp, ndlp->nlp_usg_map,
                                 kref_read(&ndlp->kref));
-               return NULL;
+               goto free_rpi;
         }
         /* The ndlp should not already be in active mode */
         if (NLP_CHK_NODE_ACT(ndlp)) {
@@ -4389,7 +4409,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                                 "usgmap:x%x refcnt:%d\n",
                                 (void *)ndlp, ndlp->nlp_usg_map,
                                 kref_read(&ndlp->kref));
-               return NULL;
+               goto free_rpi;
         }
  
         /* Keep the original DID */
@@ -4407,7 +4427,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
  
         spin_unlock_irqrestore(&phba->ndlp_lock, flags);
         if (vport->phba->sli_rev == LPFC_SLI_REV4) {
-               ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba);
+               ndlp->nlp_rpi = rpi;
                 lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
                                  "0008 rpi:%x DID:%x flg:%x refcnt:%d "
                                  "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID,
@@ -4424,6 +4444,11 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                 "node enable:       did:x%x",
                 ndlp->nlp_DID, 0, 0);
         return ndlp;
+
+free_rpi:
+       if (phba->sli_rev == LPFC_SLI_REV4)
+               lpfc_sli4_free_rpi(vport->phba, rpi);
+       return NULL;
  }
  
  void
@@ -4606,9 +4631,9 @@ lpfc_sli4_dequeue_nport_iocbs(struct lpfc_hba *phba,
                 pring = qp->pring;
                 if (!pring)
                         continue;
-               spin_lock_irq(&pring->ring_lock);
+               spin_lock(&pring->ring_lock);
                 __lpfc_dequeue_nport_iocbs(phba, ndlp, pring, dequeue_list);
-               spin_unlock_irq(&pring->ring_lock);
+               spin_unlock(&pring->ring_lock);
         }
         spin_unlock_irq(&phba->hbalock);
  }
@@ -5102,65 +5127,82 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
  
         ndlp = lpfc_findnode_did(vport, did);
         if (!ndlp) {
+               if (vport->phba->nvmet_support)
+                       return NULL;
                 if ((vport->fc_flag & FC_RSCN_MODE) != 0 &&
                     lpfc_rscn_payload_check(vport, did) == 0)
                         return NULL;
-               ndlp = (struct lpfc_nodelist *)
-                    mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL);
+               ndlp = lpfc_nlp_init(vport, did);
                 if (!ndlp)
                         return NULL;
-               lpfc_nlp_init(vport, ndlp, did);
                 lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
-               if (vport->phba->nvmet_support)
-                       return ndlp;
                 spin_lock_irq(shost->host_lock);
                 ndlp->nlp_flag |= NLP_NPR_2B_DISC;
                 spin_unlock_irq(shost->host_lock);
                 return ndlp;
         } else if (!NLP_CHK_NODE_ACT(ndlp)) {
+               if (vport->phba->nvmet_support)
+                       return NULL;
                 ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_NPR_NODE);
                 if (!ndlp)
                         return NULL;
-               if (vport->phba->nvmet_support)
-                       return ndlp;
                 spin_lock_irq(shost->host_lock);
                 ndlp->nlp_flag |= NLP_NPR_2B_DISC;
                 spin_unlock_irq(shost->host_lock);
                 return ndlp;
         }
  
+       /* The NVME Target does not want to actively manage an rport.
+        * The goal is to allow the target to reset its state and clear
+        * pending IO in preparation for the initiator to recover.
+        */
         if ((vport->fc_flag & FC_RSCN_MODE) &&
             !(vport->fc_flag & FC_NDISC_ACTIVE)) {
                 if (lpfc_rscn_payload_check(vport, did)) {
-                       /* If we've already received a PLOGI from this NPort
-                        * we don't need to try to discover it again.
-                        */
-                       if (ndlp->nlp_flag & NLP_RCV_PLOGI)
-                               return NULL;
  
                         /* Since this node is marked for discovery,
                          * delay timeout is not needed.
                          */
                         lpfc_cancel_retry_delay_tmo(vport, ndlp);
+
+                       /* NVME Target mode waits until rport is known to be
+                        * impacted by the RSCN before it transitions.  No
+                        * active management - just go to NPR provided the
+                        * node had a valid login.
+                        */
                         if (vport->phba->nvmet_support)
                                 return ndlp;
+
+                       /* If we've already received a PLOGI from this NPort
+                        * we don't need to try to discover it again.
+                        */
+                       if (ndlp->nlp_flag & NLP_RCV_PLOGI)
+                               return NULL;
+
                         spin_lock_irq(shost->host_lock);
                         ndlp->nlp_flag |= NLP_NPR_2B_DISC;
                         spin_unlock_irq(shost->host_lock);
                 } else
                         ndlp = NULL;
         } else {
-               /* If we've already received a PLOGI from this NPort,
-                * or we are already in the process of discovery on it,
-                * we don't need to try to discover it again.
+               /* If the initiator received a PLOGI from this NPort or if the
+                * initiator is already in the process of discovery on it,
+                * there's no need to try to discover it again.
                  */
                 if (ndlp->nlp_state == NLP_STE_ADISC_ISSUE ||
                     ndlp->nlp_state == NLP_STE_PLOGI_ISSUE ||
-                   ndlp->nlp_flag & NLP_RCV_PLOGI)
+                   (!vport->phba->nvmet_support &&
+                    ndlp->nlp_flag & NLP_RCV_PLOGI))
                         return NULL;
-               lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+
                 if (vport->phba->nvmet_support)
                         return ndlp;
+
+               /* Moving to NPR state clears unsolicited flags and
+                * allows for rediscovery
+                */
+               lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+
                 spin_lock_irq(shost->host_lock);
                 ndlp->nlp_flag |= NLP_NPR_2B_DISC;
                 spin_unlock_irq(shost->host_lock);
@@ -5885,16 +5927,31 @@ lpfc_find_vport_by_vpid(struct lpfc_hba *phba, uint16_t vpi)
         return NULL;
  }
  
-void
-lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
-             uint32_t did)
+struct lpfc_nodelist *
+lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did)
  {
+       struct lpfc_nodelist *ndlp;
+       int rpi = LPFC_RPI_ALLOC_ERROR;
+
+       if (vport->phba->sli_rev == LPFC_SLI_REV4) {
+               rpi = lpfc_sli4_alloc_rpi(vport->phba);
+               if (rpi == LPFC_RPI_ALLOC_ERROR)
+                       return NULL;
+       }
+
+       ndlp = mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL);
+       if (!ndlp) {
+               if (vport->phba->sli_rev == LPFC_SLI_REV4)
+                       lpfc_sli4_free_rpi(vport->phba, rpi);
+               return NULL;
+       }
+
         memset(ndlp, 0, sizeof (struct lpfc_nodelist));
  
         lpfc_initialize_node(vport, ndlp, did);
         INIT_LIST_HEAD(&ndlp->nlp_listp);
         if (vport->phba->sli_rev == LPFC_SLI_REV4) {
-               ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba);
+               ndlp->nlp_rpi = rpi;
                 lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
                                  "0007 rpi:%x DID:%x flg:%x refcnt:%d "
                                  "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID,
@@ -5916,7 +5973,7 @@ lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                 "node init:       did:x%x",
                 ndlp->nlp_DID, 0, 0);
  
-       return;
+       return ndlp;
  }
  
  /* This routine releases all resources associated with a specifc NPort's ndlp
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h

index 15ca2148415055f9aba14e9bf048d92379594458..26a5647e057e6de3baf3f48ec25b32161ae2aeb9 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -509,6 +509,8 @@ struct class_parms {
         uint8_t word3Reserved2; /* Fc Word 3, bit  0: 7 */
  };
  
+#define FAPWWN_KEY_VENDOR      0x42524344 /*valid vendor version fawwpn key*/
+
  struct serv_parm {     /* Structure is in Big Endian format */
         struct csp cmn;
         struct lpfc_name portName;
@@ -2885,6 +2887,7 @@ struct lpfc_mbx_read_top {
  #define LPFC_ATT_RESERVED    0x00      /* Reserved - attType */
  #define LPFC_ATT_LINK_UP     0x01      /* Link is up */
  #define LPFC_ATT_LINK_DOWN   0x02      /* Link is down */
+#define LPFC_ATT_UNEXP_WWPN  0x06      /* Link is down Unexpected WWWPN */
         uint32_t word3;
  #define lpfc_mbx_read_top_alpa_granted_SHIFT   24
  #define lpfc_mbx_read_top_alpa_granted_MASK    0x000000FF
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h

index cfdb068a3bfccb76046fbc8d0cb000e80bc905ff..1d12f2be36bcccd336f8892aa0a20d3109d7150c 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1001,7 +1001,7 @@ struct eq_delay_info {
         uint32_t phase;
         uint32_t delay_multi;
  };
-#define        LPFC_MAX_EQ_DELAY       8
+#define        LPFC_MAX_EQ_DELAY_EQID_CNT      8
  
  struct sgl_page_pairs {
         uint32_t sgl_pg0_addr_lo;
@@ -1070,7 +1070,7 @@ struct lpfc_mbx_modify_eq_delay {
         union {
                 struct {
                         uint32_t num_eq;
-                       struct eq_delay_info eq[LPFC_MAX_EQ_DELAY];
+                       struct eq_delay_info eq[LPFC_MAX_EQ_DELAY_EQID_CNT];
                 } request;
                 struct {
                         uint32_t word0;
@@ -2720,6 +2720,9 @@ struct lpfc_mbx_request_features {
  #define lpfc_mbx_rq_ftr_rq_ifip_SHIFT          7
  #define lpfc_mbx_rq_ftr_rq_ifip_MASK           0x00000001
  #define lpfc_mbx_rq_ftr_rq_ifip_WORD           word2
+#define lpfc_mbx_rq_ftr_rq_iaar_SHIFT          9
+#define lpfc_mbx_rq_ftr_rq_iaar_MASK           0x00000001
+#define lpfc_mbx_rq_ftr_rq_iaar_WORD           word2
  #define lpfc_mbx_rq_ftr_rq_perfh_SHIFT         11
  #define lpfc_mbx_rq_ftr_rq_perfh_MASK          0x00000001
  #define lpfc_mbx_rq_ftr_rq_perfh_WORD          word2
@@ -3853,6 +3856,7 @@ struct lpfc_acqe_fc_la {
  #define LPFC_FC_LA_TYPE_NO_HARD_ALPA   0x3
  #define LPFC_FC_LA_TYPE_MDS_LINK_DOWN  0x4
  #define LPFC_FC_LA_TYPE_MDS_LOOPBACK   0x5
+#define LPFC_FC_LA_TYPE_UNEXP_WWPN     0x6
  #define lpfc_acqe_fc_la_port_type_SHIFT                6
  #define lpfc_acqe_fc_la_port_type_MASK         0x00000003
  #define lpfc_acqe_fc_la_port_type_WORD         word0
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c

index 0ee429d773f394e826074e6580bbd73c212e26ba..90ae354a9c458c9151b03f6bdff444a248d0da0e 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -42,6 +42,10 @@
  #include <scsi/scsi_device.h>
  #include <scsi/scsi_host.h>
  #include <scsi/scsi_transport_fc.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <linux/nvme-fc-driver.h>
  
  #include "lpfc_hw4.h"
  #include "lpfc_hw.h"
@@ -52,6 +56,7 @@
  #include "lpfc.h"
  #include "lpfc_scsi.h"
  #include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
  #include "lpfc_logmsg.h"
  #include "lpfc_crtn.h"
  #include "lpfc_vport.h"
@@ -335,6 +340,9 @@ lpfc_dump_wakeup_param_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
  void
  lpfc_update_vport_wwn(struct lpfc_vport *vport)
  {
+       uint8_t vvvl = vport->fc_sparam.cmn.valid_vendor_ver_level;
+       u32 *fawwpn_key = (u32 *)&vport->fc_sparam.un.vendorVersion[0];
+
         /* If the soft name exists then update it using the service params */
         if (vport->phba->cfg_soft_wwnn)
                 u64_to_wwn(vport->phba->cfg_soft_wwnn,
@@ -354,9 +362,25 @@ lpfc_update_vport_wwn(struct lpfc_vport *vport)
                 memcpy(&vport->fc_sparam.nodeName, &vport->fc_nodename,
                         sizeof(struct lpfc_name));
  
-       if (vport->fc_portname.u.wwn[0] == 0 || vport->phba->cfg_soft_wwpn)
+       /*
+        * If the port name has changed, then set the Param changes flag
+        * to unreg the login
+        */
+       if (vport->fc_portname.u.wwn[0] != 0 &&
+               memcmp(&vport->fc_portname, &vport->fc_sparam.portName,
+                       sizeof(struct lpfc_name)))
+               vport->vport_flag |= FAWWPN_PARAM_CHG;
+
+       if (vport->fc_portname.u.wwn[0] == 0 ||
+           vport->phba->cfg_soft_wwpn ||
+           (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) ||
+           vport->vport_flag & FAWWPN_SET) {
                 memcpy(&vport->fc_portname, &vport->fc_sparam.portName,
                         sizeof(struct lpfc_name));
+               vport->vport_flag &= ~FAWWPN_SET;
+               if (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR)
+                       vport->vport_flag |= FAWWPN_SET;
+       }
         else
                 memcpy(&vport->fc_sparam.portName, &vport->fc_portname,
                         sizeof(struct lpfc_name));
@@ -1003,8 +1027,10 @@ static int
  lpfc_hba_down_post_s4(struct lpfc_hba *phba)
  {
         struct lpfc_scsi_buf *psb, *psb_next;
+       struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next;
         LIST_HEAD(aborts);
         LIST_HEAD(nvme_aborts);
+       LIST_HEAD(nvmet_aborts);
         unsigned long iflag = 0;
         struct lpfc_sglq *sglq_entry = NULL;
  
@@ -1027,16 +1053,10 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
         list_for_each_entry(sglq_entry,
                 &phba->sli4_hba.lpfc_abts_els_sgl_list, list)
                 sglq_entry->state = SGL_FREED;
-       list_for_each_entry(sglq_entry,
-               &phba->sli4_hba.lpfc_abts_nvmet_sgl_list, list)
-               sglq_entry->state = SGL_FREED;
  
         list_splice_init(&phba->sli4_hba.lpfc_abts_els_sgl_list,
                         &phba->sli4_hba.lpfc_els_sgl_list);
  
-       if (phba->sli4_hba.nvme_wq)
-               list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list,
-                                &phba->sli4_hba.lpfc_nvmet_sgl_list);
  
         spin_unlock(&phba->sli4_hba.sgl_list_lock);
         /* abts_scsi_buf_list_lock required because worker thread uses this
@@ -1053,6 +1073,8 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
                 spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
                 list_splice_init(&phba->sli4_hba.lpfc_abts_nvme_buf_list,
                                  &nvme_aborts);
+               list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+                                &nvmet_aborts);
                 spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
         }
  
@@ -1066,13 +1088,20 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
         list_splice(&aborts, &phba->lpfc_scsi_buf_list_put);
         spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
  
-       list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) {
-               psb->pCmd = NULL;
-               psb->status = IOSTAT_SUCCESS;
+       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+               list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) {
+                       psb->pCmd = NULL;
+                       psb->status = IOSTAT_SUCCESS;
+               }
+               spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag);
+               list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put);
+               spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag);
+
+               list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) {
+                       ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP);
+                       lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+               }
         }
-       spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag);
-       list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put);
-       spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag);
  
         lpfc_sli4_free_sp_events(phba);
         return 0;
@@ -2874,34 +2903,38 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba)
  {
         struct lpfc_nodelist  *ndlp, *next_ndlp;
         struct lpfc_vport **vports;
-       int i;
+       int i, rpi;
+       unsigned long flags;
  
         if (phba->sli_rev != LPFC_SLI_REV4)
                 return;
  
         vports = lpfc_create_vport_work_array(phba);
-       if (vports != NULL) {
-               for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
-                       if (vports[i]->load_flag & FC_UNLOADING)
-                               continue;
+       if (vports == NULL)
+               return;
  
-                       list_for_each_entry_safe(ndlp, next_ndlp,
-                                                &vports[i]->fc_nodes,
-                                                nlp_listp) {
-                               if (NLP_CHK_NODE_ACT(ndlp)) {
-                                       ndlp->nlp_rpi =
-                                               lpfc_sli4_alloc_rpi(phba);
-                                       lpfc_printf_vlog(ndlp->vport, KERN_INFO,
-                                                        LOG_NODE,
-                                                        "0009 rpi:%x DID:%x "
-                                                        "flg:%x map:%x %p\n",
-                                                        ndlp->nlp_rpi,
-                                                        ndlp->nlp_DID,
-                                                        ndlp->nlp_flag,
-                                                        ndlp->nlp_usg_map,
-                                                        ndlp);
-                               }
+       for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
+               if (vports[i]->load_flag & FC_UNLOADING)
+                       continue;
+
+               list_for_each_entry_safe(ndlp, next_ndlp,
+                                        &vports[i]->fc_nodes,
+                                        nlp_listp) {
+                       if (!NLP_CHK_NODE_ACT(ndlp))
+                               continue;
+                       rpi = lpfc_sli4_alloc_rpi(phba);
+                       if (rpi == LPFC_RPI_ALLOC_ERROR) {
+                               spin_lock_irqsave(&phba->ndlp_lock, flags);
+                               NLP_CLR_NODE_ACT(ndlp);
+                               spin_unlock_irqrestore(&phba->ndlp_lock, flags);
+                               continue;
                         }
+                       ndlp->nlp_rpi = rpi;
+                       lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
+                                        "0009 rpi:%x DID:%x "
+                                        "flg:%x map:%x %p\n", ndlp->nlp_rpi,
+                                        ndlp->nlp_DID, ndlp->nlp_flag,
+                                        ndlp->nlp_usg_map, ndlp);
                 }
         }
         lpfc_destroy_vport_work_array(phba, vports);
@@ -3508,6 +3541,12 @@ lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba)
         spin_unlock(&phba->scsi_buf_list_put_lock);
         spin_unlock_irq(&phba->scsi_buf_list_get_lock);
  
+       lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+                       "6060 Current allocated SCSI xri-sgl count:%d, "
+                       "maximum  SCSI xri count:%d (split:%d)\n",
+                       phba->sli4_hba.scsi_xri_cnt,
+                       phba->sli4_hba.scsi_xri_max, phba->cfg_xri_split);
+
         if (phba->sli4_hba.scsi_xri_cnt > phba->sli4_hba.scsi_xri_max) {
                 /* max scsi xri shrinked below the allocated scsi buffers */
                 scsi_xri_cnt = phba->sli4_hba.scsi_xri_cnt -
@@ -3555,6 +3594,44 @@ out_free_mem:
         return rc;
  }
  
+static uint64_t
+lpfc_get_wwpn(struct lpfc_hba *phba)
+{
+       uint64_t wwn;
+       int rc;
+       LPFC_MBOXQ_t *mboxq;
+       MAILBOX_t *mb;
+
+
+       mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
+                                               GFP_KERNEL);
+       if (!mboxq)
+               return (uint64_t)-1;
+
+       /* First get WWN of HBA instance */
+       lpfc_read_nv(phba, mboxq);
+       rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+       if (rc != MBX_SUCCESS) {
+               lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+                               "6019 Mailbox failed , mbxCmd x%x "
+                               "READ_NV, mbxStatus x%x\n",
+                               bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+                               bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+               mempool_free(mboxq, phba->mbox_mem_pool);
+               return (uint64_t) -1;
+       }
+       mb = &mboxq->u.mb;
+       memcpy(&wwn, (char *)mb->un.varRDnvp.portname, sizeof(uint64_t));
+       /* wwn is WWPN of HBA instance */
+       mempool_free(mboxq, phba->mbox_mem_pool);
+       if (phba->sli_rev == LPFC_SLI_REV4)
+               return be64_to_cpu(wwn);
+       else
+               return (((wwn & 0xffffffff00000000) >> 32) |
+                       ((wwn & 0x00000000ffffffff) << 32));
+
+}
+
  /**
   * lpfc_sli4_nvme_sgl_update - update xri-sgl sizing and mapping
   * @phba: pointer to lpfc hba data structure.
@@ -3676,17 +3753,32 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
         struct lpfc_vport *vport;
         struct Scsi_Host  *shost = NULL;
         int error = 0;
+       int i;
+       uint64_t wwn;
+       bool use_no_reset_hba = false;
+
+       wwn = lpfc_get_wwpn(phba);
+
+       for (i = 0; i < lpfc_no_hba_reset_cnt; i++) {
+               if (wwn == lpfc_no_hba_reset[i]) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+                                       "6020 Setting use_no_reset port=%llx\n",
+                                       wwn);
+                       use_no_reset_hba = true;
+                       break;
+               }
+       }
  
         if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
                 if (dev != &phba->pcidev->dev) {
                         shost = scsi_host_alloc(&lpfc_vport_template,
                                                 sizeof(struct lpfc_vport));
                 } else {
-                       if (phba->sli_rev == LPFC_SLI_REV4)
+                       if (!use_no_reset_hba)
                                 shost = scsi_host_alloc(&lpfc_template,
                                                 sizeof(struct lpfc_vport));
                         else
-                               shost = scsi_host_alloc(&lpfc_template_s3,
+                               shost = scsi_host_alloc(&lpfc_template_no_hr,
                                                 sizeof(struct lpfc_vport));
                 }
         } else if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
@@ -3734,17 +3826,14 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
         INIT_LIST_HEAD(&vport->rcv_buffer_list);
         spin_lock_init(&vport->work_port_lock);
  
-       init_timer(&vport->fc_disctmo);
-       vport->fc_disctmo.function = lpfc_disc_timeout;
-       vport->fc_disctmo.data = (unsigned long)vport;
+       setup_timer(&vport->fc_disctmo, lpfc_disc_timeout,
+                       (unsigned long)vport);
  
-       init_timer(&vport->els_tmofunc);
-       vport->els_tmofunc.function = lpfc_els_timeout;
-       vport->els_tmofunc.data = (unsigned long)vport;
+       setup_timer(&vport->els_tmofunc, lpfc_els_timeout,
+                       (unsigned long)vport);
  
-       init_timer(&vport->delayed_disc_tmo);
-       vport->delayed_disc_tmo.function = lpfc_delayed_disc_tmo;
-       vport->delayed_disc_tmo.data = (unsigned long)vport;
+       setup_timer(&vport->delayed_disc_tmo, lpfc_delayed_disc_tmo,
+                       (unsigned long)vport);
  
         error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
         if (error)
@@ -4458,9 +4547,15 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
                 /* Parse and translate link attention fields */
                 la = (struct lpfc_mbx_read_top *)&pmb->u.mb.un.varReadTop;
                 la->eventTag = acqe_fc->event_tag;
-               bf_set(lpfc_mbx_read_top_att_type, la,
-                      LPFC_FC_LA_TYPE_LINK_DOWN);
  
+               if (phba->sli4_hba.link_state.status ==
+                   LPFC_FC_LA_TYPE_UNEXP_WWPN) {
+                       bf_set(lpfc_mbx_read_top_att_type, la,
+                              LPFC_FC_LA_TYPE_UNEXP_WWPN);
+               } else {
+                       bf_set(lpfc_mbx_read_top_att_type, la,
+                              LPFC_FC_LA_TYPE_LINK_DOWN);
+               }
                 /* Invoke the mailbox command callback function */
                 lpfc_mbx_cmpl_read_topology(phba, pmb);
  
@@ -4666,10 +4761,9 @@ lpfc_sli4_perform_vport_cvl(struct lpfc_vport *vport)
         ndlp = lpfc_findnode_did(vport, Fabric_DID);
         if (!ndlp) {
                 /* Cannot find existing Fabric ndlp, so allocate a new one */
-               ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+               ndlp = lpfc_nlp_init(vport, Fabric_DID);
                 if (!ndlp)
                         return 0;
-               lpfc_nlp_init(vport, ndlp, Fabric_DID);
                 /* Set the node type */
                 ndlp->nlp_type |= NLP_FABRIC;
                 /* Put ndlp onto node list */
@@ -5406,21 +5500,15 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
         INIT_LIST_HEAD(&phba->luns);
  
         /* MBOX heartbeat timer */
-       init_timer(&psli->mbox_tmo);
-       psli->mbox_tmo.function = lpfc_mbox_timeout;
-       psli->mbox_tmo.data = (unsigned long) phba;
+       setup_timer(&psli->mbox_tmo, lpfc_mbox_timeout, (unsigned long)phba);
         /* Fabric block timer */
-       init_timer(&phba->fabric_block_timer);
-       phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
-       phba->fabric_block_timer.data = (unsigned long) phba;
+       setup_timer(&phba->fabric_block_timer, lpfc_fabric_block_timeout,
+                       (unsigned long)phba);
         /* EA polling mode timer */
-       init_timer(&phba->eratt_poll);
-       phba->eratt_poll.function = lpfc_poll_eratt;
-       phba->eratt_poll.data = (unsigned long) phba;
+       setup_timer(&phba->eratt_poll, lpfc_poll_eratt,
+                       (unsigned long)phba);
         /* Heartbeat timer */
-       init_timer(&phba->hb_tmofunc);
-       phba->hb_tmofunc.function = lpfc_hb_timeout;
-       phba->hb_tmofunc.data = (unsigned long)phba;
+       setup_timer(&phba->hb_tmofunc, lpfc_hb_timeout, (unsigned long)phba);
  
         return 0;
  }
@@ -5446,9 +5534,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
          */
  
         /* FCP polling mode timer */
-       init_timer(&phba->fcp_poll_timer);
-       phba->fcp_poll_timer.function = lpfc_poll_timeout;
-       phba->fcp_poll_timer.data = (unsigned long) phba;
+       setup_timer(&phba->fcp_poll_timer, lpfc_poll_timeout,
+                       (unsigned long)phba);
  
         /* Host attention work mask setup */
         phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
@@ -5482,7 +5569,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
  
         /* Initialize the host templates the configured values. */
         lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
-       lpfc_template_s3.sg_tablesize = phba->cfg_sg_seg_cnt;
+       lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
+       lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
  
         /* There are going to be 2 reserved BDEs: 1 FCP cmnd + 1 FCP rsp */
         if (phba->cfg_enable_bg) {
@@ -5617,14 +5705,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
          * Initialize timers used by driver
          */
  
-       init_timer(&phba->rrq_tmr);
-       phba->rrq_tmr.function = lpfc_rrq_timeout;
-       phba->rrq_tmr.data = (unsigned long)phba;
+       setup_timer(&phba->rrq_tmr, lpfc_rrq_timeout, (unsigned long)phba);
  
         /* FCF rediscover timer */
-       init_timer(&phba->fcf.redisc_wait);
-       phba->fcf.redisc_wait.function = lpfc_sli4_fcf_redisc_wait_tmo;
-       phba->fcf.redisc_wait.data = (unsigned long)phba;
+       setup_timer(&phba->fcf.redisc_wait, lpfc_sli4_fcf_redisc_wait_tmo,
+                       (unsigned long)phba);
  
         /*
          * Control structure for handling external multi-buffer mailbox
@@ -5706,6 +5791,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
         /* Initialize the host templates with the updated values. */
         lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
         lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+       lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
  
         if (phba->cfg_sg_dma_buf_size  <= LPFC_MIN_SG_SLI4_BUF_SZ)
                 phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
@@ -5736,6 +5822,9 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
                 /* Initialize the Abort nvme buffer list used by driver */
                 spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
                 INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+               INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
+               /* Fast-path XRI aborted CQ Event work queue list */
+               INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
         }
  
         /* This abort list used by worker thread */
@@ -5765,6 +5854,12 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
         INIT_LIST_HEAD(&phba->sli4_hba.lpfc_vfi_blk_list);
         INIT_LIST_HEAD(&phba->lpfc_vpi_blk_list);
  
+       /* Initialize mboxq lists. If the early init routines fail
+        * these lists need to be correctly initialized.
+        */
+       INIT_LIST_HEAD(&phba->sli.mboxq);
+       INIT_LIST_HEAD(&phba->sli.mboxq_cmpl);
+
         /* initialize optic_state to 0xFF */
         phba->sli4_hba.lnk_info.optic_state = 0xff;
  
@@ -5830,6 +5925,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
                                         "READ_NV, mbxStatus x%x\n",
                                         bf_get(lpfc_mqe_command, &mboxq->u.mqe),
                                         bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+                       mempool_free(mboxq, phba->mbox_mem_pool);
                         rc = -EIO;
                         goto out_free_bsmbx;
                 }
@@ -5847,10 +5943,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
                 /* Check to see if it matches any module parameter */
                 for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
                         if (wwn == lpfc_enable_nvmet[i]) {
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
                                 lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                                 "6017 NVME Target %016llx\n",
                                                 wwn);
                                 phba->nvmet_support = 1; /* a match */
+#else
+                               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                               "6021 Can't enable NVME Target."
+                                               " NVME_TARGET_FC infrastructure"
+                                               " is not in kernel\n");
+#endif
                         }
                 }
         }
@@ -6347,7 +6450,7 @@ lpfc_init_sgl_list(struct lpfc_hba *phba)
         INIT_LIST_HEAD(&phba->sli4_hba.lpfc_els_sgl_list);
         INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_els_sgl_list);
         INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_sgl_list);
-       INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
+       INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
  
         /* els xri-sgl book keeping */
         phba->sli4_hba.els_xri_cnt = 0;
@@ -7748,7 +7851,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
  
         /* Create Fast Path FCP WQs */
         wqesize = (phba->fcp_embed_io) ?
-                               LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
+               LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
         qdesc = lpfc_sli4_queue_alloc(phba, wqesize, phba->sli4_hba.wq_ecount);
         if (!qdesc) {
                 lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -7779,7 +7882,7 @@ int
  lpfc_sli4_queue_create(struct lpfc_hba *phba)
  {
         struct lpfc_queue *qdesc;
-       int idx, io_channel, max;
+       int idx, io_channel;
  
         /*
          * Create HBA Record arrays.
@@ -7940,15 +8043,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                 if (lpfc_alloc_nvme_wq_cq(phba, idx))
                         goto out_error;
  
-       /* allocate MRQ CQs */
-       max = phba->cfg_nvme_io_channel;
-       if (max < phba->cfg_nvmet_mrq)
-               max = phba->cfg_nvmet_mrq;
-
-       for (idx = 0; idx < max; idx++)
-               if (lpfc_alloc_nvme_wq_cq(phba, idx))
-                       goto out_error;
-
         if (phba->nvmet_support) {
                 for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
                         qdesc = lpfc_sli4_queue_alloc(phba,
@@ -8170,11 +8264,11 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
  
         /* Release FCP cqs */
         lpfc_sli4_release_queues(&phba->sli4_hba.fcp_cq,
-                                       phba->cfg_fcp_io_channel);
+                                phba->cfg_fcp_io_channel);
  
         /* Release FCP wqs */
         lpfc_sli4_release_queues(&phba->sli4_hba.fcp_wq,
-                                       phba->cfg_fcp_io_channel);
+                                phba->cfg_fcp_io_channel);
  
         /* Release FCP CQ mapping array */
         lpfc_sli4_release_queue_map(&phba->sli4_hba.fcp_cq_map);
@@ -8520,15 +8614,15 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
                 lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                 "0528 %s not allocated\n",
                                 phba->sli4_hba.mbx_cq ?
-                                               "Mailbox WQ" : "Mailbox CQ");
+                               "Mailbox WQ" : "Mailbox CQ");
                 rc = -ENOMEM;
                 goto out_destroy;
         }
  
         rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0],
-                                       phba->sli4_hba.mbx_cq,
-                                       phba->sli4_hba.mbx_wq,
-                                       NULL, 0, LPFC_MBOX);
+                              phba->sli4_hba.mbx_cq,
+                              phba->sli4_hba.mbx_wq,
+                              NULL, 0, LPFC_MBOX);
         if (rc) {
                 lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                         "0529 Failed setup of mailbox WQ/CQ: rc = 0x%x\n",
@@ -8712,12 +8806,9 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
                 }
         }
  
-       /*
-        * Configure EQ delay multipier for interrupt coalescing using
-        * MODIFY_EQ_DELAY for all EQs created, LPFC_MAX_EQ_DELAY at a time.
-        */
-       for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY)
+       for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
                 lpfc_modify_hba_eq_delay(phba, qidx);
+
         return 0;
  
  out_destroy:
@@ -8973,6 +9064,11 @@ lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba)
         /* Pending ELS XRI abort events */
         list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue,
                          &cqelist);
+       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+               /* Pending NVME XRI abort events */
+               list_splice_init(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+                                &cqelist);
+       }
         /* Pending asynnc events */
         list_splice_init(&phba->sli4_hba.sp_asynce_work_queue,
                          &cqelist);
@@ -9881,17 +9977,19 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
  {
         int wait_time = 0;
         int nvme_xri_cmpl = 1;
+       int nvmet_xri_cmpl = 1;
         int fcp_xri_cmpl = 1;
         int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
-       int nvmet_xri_cmpl =
-                       list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
  
         if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
                 fcp_xri_cmpl =
                         list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
-       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
                 nvme_xri_cmpl =
                         list_empty(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+               nvmet_xri_cmpl =
+                       list_empty(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
+       }
  
         while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl ||
                !nvmet_xri_cmpl) {
@@ -9917,9 +10015,12 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
                         msleep(LPFC_XRI_EXCH_BUSY_WAIT_T1);
                         wait_time += LPFC_XRI_EXCH_BUSY_WAIT_T1;
                 }
-               if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+               if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
                         nvme_xri_cmpl = list_empty(
                                 &phba->sli4_hba.lpfc_abts_nvme_buf_list);
+                       nvmet_xri_cmpl = list_empty(
+                               &phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
+               }
  
                 if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
                         fcp_xri_cmpl = list_empty(
@@ -9928,8 +10029,6 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
                 els_xri_cmpl =
                         list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
  
-               nvmet_xri_cmpl =
-                       list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
         }
  }
  
@@ -9995,9 +10094,14 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
         /* Stop kthread signal shall trigger work_done one more time */
         kthread_stop(phba->worker_thread);
  
+       /* Unset the queues shared with the hardware then release all
+        * allocated resources.
+        */
+       lpfc_sli4_queue_unset(phba);
+       lpfc_sli4_queue_destroy(phba);
+
         /* Reset SLI4 HBA FCoE function */
         lpfc_pci_function_reset(phba);
-       lpfc_sli4_queue_destroy(phba);
  
         /* Stop the SLI4 device port */
         phba->pport->work_port_events = 0;
@@ -10253,6 +10357,7 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
         }
  
         /* Initialize and populate the iocb list per host */
+
         error = lpfc_init_iocb_list(phba, LPFC_IOCB_LIST_CNT);
         if (error) {
                 lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -10400,12 +10505,7 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)
         fc_remove_host(shost);
         scsi_remove_host(shost);
  
-       /* Perform ndlp cleanup on the physical port.  The nvme and nvmet
-        * localports are destroyed after to cleanup all transport memory.
-        */
         lpfc_cleanup(vport);
-       lpfc_nvmet_destroy_targetport(phba);
-       lpfc_nvme_destroy_localport(vport);
  
         /*
          * Bring down the SLI Layer. This step disable all interrupts,
@@ -11003,7 +11103,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
         struct lpfc_hba   *phba;
         struct lpfc_vport *vport = NULL;
         struct Scsi_Host  *shost = NULL;
-       int error;
+       int error, cnt;
         uint32_t cfg_mode, intr_mode;
  
         /* Allocate memory for HBA structure */
@@ -11037,12 +11137,15 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
                 goto out_unset_pci_mem_s4;
         }
  
-       /* Initialize and populate the iocb list per host */
+       cnt = phba->cfg_iocb_cnt * 1024;
+       if (phba->nvmet_support)
+               cnt += phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq;
  
+       /* Initialize and populate the iocb list per host */
         lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-                       "2821 initialize iocb list %d.\n",
-                       phba->cfg_iocb_cnt*1024);
-       error = lpfc_init_iocb_list(phba, phba->cfg_iocb_cnt*1024);
+                       "2821 initialize iocb list %d total %d\n",
+                       phba->cfg_iocb_cnt, cnt);
+       error = lpfc_init_iocb_list(phba, cnt);
  
         if (error) {
                 lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -11129,7 +11232,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
         if ((phba->nvmet_support == 0) &&
             (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
                 /* Create NVME binding with nvme_fc_transport. This
-                * ensures the vport is initialized.
+                * ensures the vport is initialized.  If the localport
+                * create fails, it should not unload the driver to
+                * support field issues.
                  */
                 error = lpfc_nvme_create_localport(vport);
                 if (error) {
@@ -11137,7 +11242,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
                                         "6004 NVME registration failed, "
                                         "error x%x\n",
                                         error);
-                       goto out_disable_intr;
                 }
         }
  
@@ -11936,6 +12040,7 @@ int
  lpfc_fof_queue_create(struct lpfc_hba *phba)
  {
         struct lpfc_queue *qdesc;
+       uint32_t wqesize;
  
         /* Create FOF EQ */
         qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.eq_esize,
@@ -11956,8 +12061,11 @@ lpfc_fof_queue_create(struct lpfc_hba *phba)
                 phba->sli4_hba.oas_cq = qdesc;
  
                 /* Create OAS WQ */
-               qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.wq_esize,
+               wqesize = (phba->fcp_embed_io) ?
+                               LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
+               qdesc = lpfc_sli4_queue_alloc(phba, wqesize,
                                               phba->sli4_hba.wq_ecount);
+
                 if (!qdesc)
                         goto out_error;
  
@@ -12018,6 +12126,7 @@ static struct pci_driver lpfc_driver = {
         .id_table       = lpfc_id_table,
         .probe          = lpfc_pci_probe_one,
         .remove         = lpfc_pci_remove_one,
+       .shutdown       = lpfc_pci_remove_one,
         .suspend        = lpfc_pci_suspend_one,
         .resume         = lpfc_pci_resume_one,
         .err_handler    = &lpfc_err_handler,
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c

index a928f5187fa46bc5082e66558ec7a62351c93526..ce25a18367b590b11c1a1521db559580bb06fa52 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -2083,9 +2083,12 @@ lpfc_request_features(struct lpfc_hba *phba, struct lpfcMboxq *mboxq)
         if (phba->max_vpi && phba->cfg_enable_npiv)
                 bf_set(lpfc_mbx_rq_ftr_rq_npiv, &mboxq->u.mqe.un.req_ftrs, 1);
  
-       if (phba->nvmet_support)
+       if (phba->nvmet_support) {
                 bf_set(lpfc_mbx_rq_ftr_rq_mrqp, &mboxq->u.mqe.un.req_ftrs, 1);
-
+               /* iaab/iaar NOT set for now */
+                bf_set(lpfc_mbx_rq_ftr_rq_iaab, &mboxq->u.mqe.un.req_ftrs, 0);
+                bf_set(lpfc_mbx_rq_ftr_rq_iaar, &mboxq->u.mqe.un.req_ftrs, 0);
+       }
         return;
  }
  
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c

index c61d8d692edeeeca94512d2eeb0c2fcbdff0b382..5986c7957199df6ef97343a3c0402931cbdeb7ad 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -646,7 +646,6 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
         }
  
         dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
-       dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
         if (!dma_buf->iocbq) {
                 kfree(dma_buf->context);
                 pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
@@ -658,6 +657,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
                                 "2621 Ran out of nvmet iocb/WQEs\n");
                 return NULL;
         }
+       dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
         nvmewqe = dma_buf->iocbq;
         wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
         /* Initialize WQE */
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c

index 061626bdf701063354653bf8966f619de8a947dd..8777c2d5f50d35ecae18223da67245157811b4be 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -361,8 +361,12 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
         case  NLP_STE_PRLI_ISSUE:
         case  NLP_STE_UNMAPPED_NODE:
         case  NLP_STE_MAPPED_NODE:
-               /* lpfc_plogi_confirm_nport skips fabric did, handle it here */
-               if (!(ndlp->nlp_type & NLP_FABRIC)) {
+               /* For initiators, lpfc_plogi_confirm_nport skips fabric did.
+                * For target mode, execute implicit logo.
+                * Fabric nodes go into NPR.
+                */
+               if (!(ndlp->nlp_type & NLP_FABRIC) &&
+                   !(phba->nvmet_support)) {
                         lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb,
                                          ndlp, NULL);
                         return 1;
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c

index 609a908ea9db5ba291e678c296726d69cae784f9..8008c8205fb6334d0ec35b49bdbe761ac10987d7 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -316,7 +316,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
         bf_set(wqe_dfctl, &wqe->gen_req.wge_ctl, 0);
         bf_set(wqe_si, &wqe->gen_req.wge_ctl, 1);
         bf_set(wqe_la, &wqe->gen_req.wge_ctl, 1);
-       bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_DD_UNSOL_CTL);
+       bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_ELS4_REQ);
         bf_set(wqe_type, &wqe->gen_req.wge_ctl, FC_TYPE_NVME);
  
         /* Word 6 */
@@ -401,6 +401,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
         struct lpfc_nodelist *ndlp;
         struct ulp_bde64 *bpl;
         struct lpfc_dmabuf *bmp;
+       uint16_t ntype, nstate;
  
         /* there are two dma buf in the request, actually there is one and
          * the second one is just the start address + cmd size.
@@ -417,11 +418,26 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
         vport = lport->vport;
  
         ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
-       if (!ndlp) {
-               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-                                "6043 Could not find node for DID %x\n",
+       if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+                                "6051 DID x%06x not an active rport.\n",
                                  pnvme_rport->port_id);
-               return 1;
+               return -ENODEV;
+       }
+
+       /* The remote node has to be a mapped nvme target or an
+        * unmapped nvme initiator or it's an error.
+        */
+       ntype = ndlp->nlp_type;
+       nstate = ndlp->nlp_state;
+       if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) ||
+           (ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) {
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+                                "6088 DID x%06x not ready for "
+                                "IO. State x%x, Type x%x\n",
+                                pnvme_rport->port_id,
+                                ndlp->nlp_state, ndlp->nlp_type);
+               return -ENODEV;
         }
         bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
         if (!bmp) {
@@ -456,7 +472,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
  
         /* Expand print to include key fields. */
         lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
-                        "6051 ENTER.  lport %p, rport %p lsreq%p rqstlen:%d "
+                        "6149 ENTER.  lport %p, rport %p lsreq%p rqstlen:%d "
                          "rsplen:%d %pad %pad\n",
                          pnvme_lport, pnvme_rport,
                          pnvme_lsreq, pnvme_lsreq->rqstlen,
@@ -620,15 +636,15 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
          * Embed the payload in the last half of the WQE
          * WQE words 16-30 get the NVME CMD IU payload
          *
-        * WQE Word 16 is already setup with flags
-        * WQE words 17-19 get payload Words 2-4
+        * WQE words 16-19 get payload Words 1-4
          * WQE words 20-21 get payload Words 6-7
          * WQE words 22-29 get payload Words 16-23
          */
-       wptr = &wqe->words[17];  /* WQE ptr */
+       wptr = &wqe->words[16];  /* WQE ptr */
         dptr = (uint32_t *)nCmd->cmdaddr;  /* payload ptr */
-       dptr += 2;              /* Skip Words 0-1 in payload */
+       dptr++;                 /* Skip Word 0 in payload */
  
+       *wptr++ = *dptr++;      /* Word 1 */
         *wptr++ = *dptr++;      /* Word 2 */
         *wptr++ = *dptr++;      /* Word 3 */
         *wptr++ = *dptr++;      /* Word 4 */
@@ -745,6 +761,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
         struct nvme_fc_cmd_iu *cp;
         struct lpfc_nvme_rport *rport;
         struct lpfc_nodelist *ndlp;
+       struct lpfc_nvme_fcpreq_priv *freqpriv;
         unsigned long flags;
         uint32_t code;
         uint16_t cid, sqhd, data;
@@ -772,9 +789,8 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
         ndlp = rport->ndlp;
         if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
                 lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
-                                "6061 rport %p, ndlp %p, DID x%06x ndlp "
-                                "not ready.\n",
-                                rport, ndlp, rport->remoteport->port_id);
+                                "6061 rport %p,  DID x%06x node not ready.\n",
+                                rport, rport->remoteport->port_id);
  
                 ndlp = lpfc_findnode_did(vport, rport->remoteport->port_id);
                 if (!ndlp) {
@@ -853,15 +869,18 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
                                 break;
                         lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
                                          "6081 NVME Completion Protocol Error: "
-                                        "status x%x result x%x placed x%x\n",
+                                        "xri %x status x%x result x%x "
+                                        "placed x%x\n",
+                                        lpfc_ncmd->cur_iocbq.sli4_xritag,
                                          lpfc_ncmd->status, lpfc_ncmd->result,
                                          wcqe->total_data_placed);
                         break;
                 default:
  out_err:
                         lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
-                                        "6072 NVME Completion Error: "
+                                        "6072 NVME Completion Error: xri %x "
                                          "status x%x result x%x placed x%x\n",
+                                        lpfc_ncmd->cur_iocbq.sli4_xritag,
                                          lpfc_ncmd->status, lpfc_ncmd->result,
                                          wcqe->total_data_placed);
                         nCmd->transferred_length = 0;
@@ -900,6 +919,8 @@ out_err:
                         phba->cpucheck_cmpl_io[lpfc_ncmd->cpu]++;
         }
  #endif
+       freqpriv = nCmd->private;
+       freqpriv->nvme_buf = NULL;
         nCmd->done(nCmd);
  
         spin_lock_irqsave(&phba->hbalock, flags);
@@ -978,9 +999,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
                         bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
                                NVME_WRITE_CMD);
  
-                       /* Word 16 */
-                       wqe->words[16] = LPFC_NVME_EMBED_WRITE;
-
                         phba->fc4NvmeOutputRequests++;
                 } else {
                         /* Word 7 */
@@ -1002,9 +1020,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
                         bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
                                NVME_READ_CMD);
  
-                       /* Word 16 */
-                       wqe->words[16] = LPFC_NVME_EMBED_READ;
-
                         phba->fc4NvmeInputRequests++;
                 }
         } else {
@@ -1026,9 +1041,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
                 /* Word 11 */
                 bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD);
  
-               /* Word 16 */
-               wqe->words[16] = LPFC_NVME_EMBED_CMD;
-
                 phba->fc4NvmeControlRequests++;
         }
         /*
@@ -1108,12 +1120,12 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
  
                 first_data_sgl = sgl;
                 lpfc_ncmd->seg_cnt = nCmd->sg_cnt;
-               if (lpfc_ncmd->seg_cnt > phba->cfg_sg_seg_cnt) {
+               if (lpfc_ncmd->seg_cnt > phba->cfg_nvme_seg_cnt) {
                         lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
                                         "6058 Too many sg segments from "
                                         "NVME Transport.  Max %d, "
                                         "nvmeIO sg_cnt %d\n",
-                                       phba->cfg_sg_seg_cnt,
+                                       phba->cfg_nvme_seg_cnt,
                                         lpfc_ncmd->seg_cnt);
                         lpfc_ncmd->seg_cnt = 0;
                         return 1;
@@ -1205,6 +1217,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
         struct lpfc_nvme_buf *lpfc_ncmd;
         struct lpfc_nvme_rport *rport;
         struct lpfc_nvme_qhandle *lpfc_queue_info;
+       struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private;
  #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
         uint64_t start = 0;
  #endif
@@ -1283,9 +1296,10 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
          * Do not let the IO hang out forever.  There is no midlayer issuing
          * an abort so inform the FW of the maximum IO pending time.
          */
-       pnvme_fcreq->private = (void *)lpfc_ncmd;
+       freqpriv->nvme_buf = lpfc_ncmd;
         lpfc_ncmd->nvmeCmd = pnvme_fcreq;
         lpfc_ncmd->nrport = rport;
+       lpfc_ncmd->ndlp = ndlp;
         lpfc_ncmd->start_time = jiffies;
  
         lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp);
@@ -1319,7 +1333,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
                                  "sid: x%x did: x%x oxid: x%x\n",
                                  ret, vport->fc_myDID, ndlp->nlp_DID,
                                  lpfc_ncmd->cur_iocbq.sli4_xritag);
-               ret = -EINVAL;
+               ret = -EBUSY;
                 goto out_free_nvme_buf;
         }
  
@@ -1412,6 +1426,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
         struct lpfc_nvme_buf *lpfc_nbuf;
         struct lpfc_iocbq *abts_buf;
         struct lpfc_iocbq *nvmereq_wqe;
+       struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private;
         union lpfc_wqe *abts_wqe;
         unsigned long flags;
         int ret_val;
@@ -1422,7 +1437,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
         phba = vport->phba;
  
         /* Announce entry to new IO submit field. */
-       lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
+       lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
                          "6002 Abort Request to rport DID x%06x "
                          "for nvme_fc_req %p\n",
                          pnvme_rport->port_id,
@@ -1452,7 +1467,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
         /* The remote node has to be ready to send an abort. */
         if ((ndlp->nlp_state != NLP_STE_MAPPED_NODE) &&
             !(ndlp->nlp_type & NLP_NVME_TARGET)) {
-               lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_ABTS,
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                  "6048 rport %p, DID x%06x not ready for "
                                  "IO. State x%x, Type x%x\n",
                                  rport, pnvme_rport->port_id,
@@ -1467,27 +1482,28 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
         /* driver queued commands are in process of being flushed */
         if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
                 spin_unlock_irqrestore(&phba->hbalock, flags);
-               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                  "6139 Driver in reset cleanup - flushing "
                                  "NVME Req now.  hba_flag x%x\n",
                                  phba->hba_flag);
                 return;
         }
  
-       lpfc_nbuf = (struct lpfc_nvme_buf *)pnvme_fcreq->private;
+       lpfc_nbuf = freqpriv->nvme_buf;
         if (!lpfc_nbuf) {
                 spin_unlock_irqrestore(&phba->hbalock, flags);
-               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                  "6140 NVME IO req has no matching lpfc nvme "
                                  "io buffer.  Skipping abort req.\n");
                 return;
         } else if (!lpfc_nbuf->nvmeCmd) {
                 spin_unlock_irqrestore(&phba->hbalock, flags);
-               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                  "6141 lpfc NVME IO req has no nvme_fcreq "
                                  "io buffer.  Skipping abort req.\n");
                 return;
         }
+       nvmereq_wqe = &lpfc_nbuf->cur_iocbq;
  
         /*
          * The lpfc_nbuf and the mapped nvme_fcreq in the driver's
@@ -1498,23 +1514,22 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
          */
         if (lpfc_nbuf->nvmeCmd != pnvme_fcreq) {
                 spin_unlock_irqrestore(&phba->hbalock, flags);
-               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                  "6143 NVME req mismatch: "
                                  "lpfc_nbuf %p nvmeCmd %p, "
-                                "pnvme_fcreq %p.  Skipping Abort\n",
+                                "pnvme_fcreq %p.  Skipping Abort xri x%x\n",
                                  lpfc_nbuf, lpfc_nbuf->nvmeCmd,
-                                pnvme_fcreq);
+                                pnvme_fcreq, nvmereq_wqe->sli4_xritag);
                 return;
         }
  
         /* Don't abort IOs no longer on the pending queue. */
-       nvmereq_wqe = &lpfc_nbuf->cur_iocbq;
         if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
                 spin_unlock_irqrestore(&phba->hbalock, flags);
-               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                  "6142 NVME IO req %p not queued - skipping "
-                                "abort req\n",
-                                pnvme_fcreq);
+                                "abort req xri x%x\n",
+                                pnvme_fcreq, nvmereq_wqe->sli4_xritag);
                 return;
         }
  
@@ -1525,21 +1540,22 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
         /* Outstanding abort is in progress */
         if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) {
                 spin_unlock_irqrestore(&phba->hbalock, flags);
-               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                  "6144 Outstanding NVME I/O Abort Request "
                                  "still pending on nvme_fcreq %p, "
-                                "lpfc_ncmd %p\n",
-                                pnvme_fcreq, lpfc_nbuf);
+                                "lpfc_ncmd %p xri x%x\n",
+                                pnvme_fcreq, lpfc_nbuf,
+                                nvmereq_wqe->sli4_xritag);
                 return;
         }
  
         abts_buf = __lpfc_sli_get_iocbq(phba);
         if (!abts_buf) {
                 spin_unlock_irqrestore(&phba->hbalock, flags);
-               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                  "6136 No available abort wqes. Skipping "
-                                "Abts req for nvme_fcreq %p.\n",
-                                pnvme_fcreq);
+                                "Abts req for nvme_fcreq %p xri x%x\n",
+                                pnvme_fcreq, nvmereq_wqe->sli4_xritag);
                 return;
         }
  
@@ -1588,7 +1604,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
         ret_val = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_buf);
         spin_unlock_irqrestore(&phba->hbalock, flags);
         if (ret_val == IOCB_ERROR) {
-               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
                                  "6137 Failed abts issue_wqe with status x%x "
                                  "for nvme_fcreq %p.\n",
                                  ret_val, pnvme_fcreq);
@@ -1596,8 +1612,8 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
                 return;
         }
  
-       lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
-                        "6138 Transport Abort NVME Request Issued for\n"
+       lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
+                        "6138 Transport Abort NVME Request Issued for "
                          "ox_id x%x on reqtag x%x\n",
                          nvmereq_wqe->sli4_xritag,
                          abts_buf->iotag);
@@ -1626,7 +1642,7 @@ static struct nvme_fc_port_template lpfc_nvme_template = {
         .local_priv_sz = sizeof(struct lpfc_nvme_lport),
         .remote_priv_sz = sizeof(struct lpfc_nvme_rport),
         .lsrqst_priv_sz = 0,
-       .fcprqst_priv_sz = 0,
+       .fcprqst_priv_sz = sizeof(struct lpfc_nvme_fcpreq_priv),
  };
  
  /**
@@ -1821,10 +1837,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
                                                 pdma_phys_sgl1, cur_xritag);
                                 if (status) {
                                         /* failure, put on abort nvme list */
-                                       lpfc_ncmd->exch_busy = 1;
+                                       lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
                                 } else {
                                         /* success, put on NVME buffer list */
-                                       lpfc_ncmd->exch_busy = 0;
+                                       lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
                                         lpfc_ncmd->status = IOSTAT_SUCCESS;
                                         num_posted++;
                                 }
@@ -1854,10 +1870,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
                                          struct lpfc_nvme_buf, list);
                         if (status) {
                                 /* failure, put on abort nvme list */
-                               lpfc_ncmd->exch_busy = 1;
+                               lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
                         } else {
                                 /* success, put on NVME buffer list */
-                               lpfc_ncmd->exch_busy = 0;
+                               lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
                                 lpfc_ncmd->status = IOSTAT_SUCCESS;
                                 num_posted++;
                         }
@@ -2057,7 +2073,7 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
                 if (lpfc_test_rrq_active(phba, ndlp,
                                          lpfc_ncmd->cur_iocbq.sli4_lxritag))
                         continue;
-               list_del(&lpfc_ncmd->list);
+               list_del_init(&lpfc_ncmd->list);
                 found = 1;
                 break;
         }
@@ -2072,7 +2088,7 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
                         if (lpfc_test_rrq_active(
                                 phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag))
                                 continue;
-                       list_del(&lpfc_ncmd->list);
+                       list_del_init(&lpfc_ncmd->list);
                         found = 1;
                         break;
                 }
@@ -2099,7 +2115,13 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
         unsigned long iflag = 0;
  
         lpfc_ncmd->nonsg_phys = 0;
-       if (lpfc_ncmd->exch_busy) {
+       if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) {
+               lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                               "6310 XB release deferred for "
+                               "ox_id x%x on reqtag x%x\n",
+                               lpfc_ncmd->cur_iocbq.sli4_xritag,
+                               lpfc_ncmd->cur_iocbq.iotag);
+
                 spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock,
                                         iflag);
                 lpfc_ncmd->nvmeCmd = NULL;
@@ -2135,11 +2157,12 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
  int
  lpfc_nvme_create_localport(struct lpfc_vport *vport)
  {
+       int ret = 0;
         struct lpfc_hba  *phba = vport->phba;
         struct nvme_fc_port_info nfcp_info;
         struct nvme_fc_local_port *localport;
         struct lpfc_nvme_lport *lport;
-       int len, ret = 0;
+       int len;
  
         /* Initialize this localport instance.  The vport wwn usage ensures
          * that NPIV is accounted for.
@@ -2149,15 +2172,29 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
         nfcp_info.node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
         nfcp_info.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
  
-       /* For now need + 1 to get around NVME transport logic */
-       lpfc_nvme_template.max_sgl_segments = phba->cfg_sg_seg_cnt + 1;
+       /* Limit to LPFC_MAX_NVME_SEG_CNT.
+        * For now need + 1 to get around NVME transport logic.
+        */
+       if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) {
+               lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_INIT,
+                                "6300 Reducing sg segment cnt to %d\n",
+                                LPFC_MAX_NVME_SEG_CNT);
+               phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
+       } else {
+               phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt;
+       }
+       lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
         lpfc_nvme_template.max_hw_queues = phba->cfg_nvme_io_channel;
  
         /* localport is allocated from the stack, but the registration
          * call allocates heap memory as well as the private area.
          */
+#if (IS_ENABLED(CONFIG_NVME_FC))
         ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
                                          &vport->phba->pcidev->dev, &localport);
+#else
+       ret = -ENOMEM;
+#endif
         if (!ret) {
                 lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC,
                                  "6005 Successfully registered local "
@@ -2173,10 +2210,10 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
                 lport->vport = vport;
                 INIT_LIST_HEAD(&lport->rport_list);
                 vport->nvmei_support = 1;
+               len  = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
+               vport->phba->total_nvme_bufs += len;
         }
  
-       len  = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
-       vport->phba->total_nvme_bufs += len;
         return ret;
  }
  
@@ -2193,6 +2230,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
  void
  lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
  {
+#if (IS_ENABLED(CONFIG_NVME_FC))
         struct nvme_fc_local_port *localport;
         struct lpfc_nvme_lport *lport;
         struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
@@ -2208,7 +2246,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
         lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
                          "6011 Destroying NVME localport %p\n",
                          localport);
-
         list_for_each_entry_safe(rport, rport_next, &lport->rport_list, list) {
                 /* The last node ref has to get released now before the rport
                  * private memory area is released by the transport.
@@ -2222,6 +2259,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
                                          "6008 rport fail destroy %x\n", ret);
                 wait_for_completion_timeout(&rport->rport_unreg_done, 5);
         }
+
         /* lport's rport list is clear.  Unregister
          * lport and release resources.
          */
@@ -2245,17 +2283,29 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
                                  "Failed, status x%x\n",
                                  ret);
         }
+#endif
  }
  
  void
  lpfc_nvme_update_localport(struct lpfc_vport *vport)
  {
+#if (IS_ENABLED(CONFIG_NVME_FC))
         struct nvme_fc_local_port *localport;
         struct lpfc_nvme_lport *lport;
  
         localport = vport->localport;
+       if (!localport) {
+               lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME,
+                                "6710 Update NVME fail. No localport\n");
+               return;
+       }
         lport = (struct lpfc_nvme_lport *)localport->private;
-
+       if (!lport) {
+               lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME,
+                                "6171 Update NVME fail. localP %p, No lport\n",
+                                localport);
+               return;
+       }
         lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
                          "6012 Update NVME lport %p did x%x\n",
                          localport, vport->fc_myDID);
@@ -2269,12 +2319,13 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
         lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
                          "6030 bound lport %p to DID x%06x\n",
                          lport, localport->port_id);
-
+#endif
  }
  
  int
  lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
  {
+#if (IS_ENABLED(CONFIG_NVME_FC))
         int ret = 0;
         struct nvme_fc_local_port *localport;
         struct lpfc_nvme_lport *lport;
@@ -2348,7 +2399,6 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                         rpinfo.port_role |= FC_PORT_ROLE_NVME_INITIATOR;
                 rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
                 rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
-
                 ret = nvme_fc_register_remoteport(localport, &rpinfo,
                                                   &remote_port);
                 if (!ret) {
@@ -2384,6 +2434,9 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                                  ndlp->nlp_type, ndlp->nlp_DID, ndlp);
         }
         return ret;
+#else
+       return 0;
+#endif
  }
  
  /* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport.
@@ -2401,11 +2454,13 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
  void
  lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
  {
+#if (IS_ENABLED(CONFIG_NVME_FC))
         int ret;
         struct nvme_fc_local_port *localport;
         struct lpfc_nvme_lport *lport;
         struct lpfc_nvme_rport *rport;
         struct nvme_fc_remote_port *remoteport;
+       unsigned long wait_tmo;
  
         localport = vport->localport;
  
@@ -2448,17 +2503,82 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                  * before proceeding.  This guarantees the transport and driver
                  * have completed the unreg process.
                  */
-               ret = wait_for_completion_timeout(&rport->rport_unreg_done, 5);
+               wait_tmo = msecs_to_jiffies(5000);
+               ret = wait_for_completion_timeout(&rport->rport_unreg_done,
+                                                 wait_tmo);
                 if (ret == 0) {
                         lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-                                        "6169 Unreg nvme wait failed %d\n",
-                                        ret);
+                                        "6169 Unreg nvme wait timeout\n");
                 }
         }
         return;
  
   input_err:
+#endif
         lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-                        "6168: State error: lport %p, rport%p FCID x%06x\n",
+                        "6168 State error: lport %p, rport%p FCID x%06x\n",
                          vport->localport, ndlp->rport, ndlp->nlp_DID);
  }
+
+/**
+ * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the fcp xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * FCP aborted xri.
+ **/
+void
+lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+                          struct sli4_wcqe_xri_aborted *axri)
+{
+       uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+       uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
+       struct lpfc_nvme_buf *lpfc_ncmd, *next_lpfc_ncmd;
+       struct lpfc_nodelist *ndlp;
+       unsigned long iflag = 0;
+       int rrq_empty = 0;
+
+       if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+               return;
+       spin_lock_irqsave(&phba->hbalock, iflag);
+       spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+       list_for_each_entry_safe(lpfc_ncmd, next_lpfc_ncmd,
+                                &phba->sli4_hba.lpfc_abts_nvme_buf_list,
+                                list) {
+               if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) {
+                       list_del_init(&lpfc_ncmd->list);
+                       lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
+                       lpfc_ncmd->status = IOSTAT_SUCCESS;
+                       spin_unlock(
+                               &phba->sli4_hba.abts_nvme_buf_list_lock);
+
+                       rrq_empty = list_empty(&phba->active_rrq_list);
+                       spin_unlock_irqrestore(&phba->hbalock, iflag);
+                       ndlp = lpfc_ncmd->ndlp;
+                       if (ndlp) {
+                               lpfc_set_rrq_active(
+                                       phba, ndlp,
+                                       lpfc_ncmd->cur_iocbq.sli4_lxritag,
+                                       rxid, 1);
+                               lpfc_sli4_abts_err_handler(phba, ndlp, axri);
+                       }
+
+                       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                                       "6311 XRI Aborted xri x%x tag x%x "
+                                       "released\n",
+                                       xri, lpfc_ncmd->cur_iocbq.iotag);
+
+                       lpfc_release_nvme_buf(phba, lpfc_ncmd);
+                       if (rrq_empty)
+                               lpfc_worker_wake_up(phba);
+                       return;
+               }
+       }
+       spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+       spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                       "6312 XRI Aborted xri x%x not found\n", xri);
+
+}
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h

index b2fae5e813f8a82b17b53a5cd5affd8f7e5d6bad..ec32f45daa667dfcb1876a0b41dc62362154e733 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -21,12 +21,7 @@
   * included with this package.                                     *
   ********************************************************************/
  
-#define LPFC_NVME_MIN_SEGS             16
-#define LPFC_NVME_DEFAULT_SEGS         66      /* 256K IOs - 64 + 2 */
-#define LPFC_NVME_MAX_SEGS             510
-#define LPFC_NVMET_MIN_POSTBUF         16
-#define LPFC_NVMET_DEFAULT_POSTBUF     1024
-#define LPFC_NVMET_MAX_POSTBUF         4096
+#define LPFC_NVME_DEFAULT_SEGS         (64 + 1)        /* 256K IOs */
  #define LPFC_NVME_WQSIZE               256
  
  #define LPFC_NVME_ERSP_LEN             0x20
@@ -57,6 +52,7 @@ struct lpfc_nvme_buf {
         struct list_head list;
         struct nvmefc_fcp_req *nvmeCmd;
         struct lpfc_nvme_rport *nrport;
+       struct lpfc_nodelist *ndlp;
  
         uint32_t timeout;
  
@@ -101,3 +97,7 @@ struct lpfc_nvme_buf {
         uint64_t ts_data_nvme;
  #endif
  };
+
+struct lpfc_nvme_fcpreq_priv {
+       struct lpfc_nvme_buf *nvme_buf;
+};
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c

index c421e1738ee989efcca922da9b145995cc164c72..94434e621c335e678ad2aa1c3301b967cb15a210 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -71,6 +71,26 @@ static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *,
                                            struct lpfc_nvmet_rcv_ctx *,
                                            uint32_t, uint16_t);
  
+void
+lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
+{
+       unsigned long iflag;
+
+       lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+                       "6313 NVMET Defer ctx release xri x%x flg x%x\n",
+                       ctxp->oxid, ctxp->flag);
+
+       spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag);
+       if (ctxp->flag & LPFC_NVMET_CTX_RLS) {
+               spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock,
+                                      iflag);
+               return;
+       }
+       ctxp->flag |= LPFC_NVMET_CTX_RLS;
+       list_add_tail(&ctxp->list, &phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
+       spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag);
+}
+
  /**
   * lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response
   * @phba: Pointer to HBA context object.
@@ -139,6 +159,11 @@ lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
                    struct lpfc_dmabuf *mp)
  {
         if (ctxp) {
+               if (ctxp->flag)
+                       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                               "6314 rq_post ctx xri x%x flag x%x\n",
+                               ctxp->oxid, ctxp->flag);
+
                 if (ctxp->txrdy) {
                         pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
                                       ctxp->txrdy_phys);
@@ -337,39 +362,55 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
  #endif
  
         ctxp = cmdwqe->context2;
+       ctxp->flag &= ~LPFC_NVMET_IO_INP;
+
         rsp = &ctxp->ctx.fcp_req;
         op = rsp->op;
-       ctxp->flag &= ~LPFC_NVMET_IO_INP;
  
         status = bf_get(lpfc_wcqe_c_status, wcqe);
         result = wcqe->parameter;
  
-       if (!phba->targetport)
-               goto out;
+       if (phba->targetport)
+               tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+       else
+               tgtp = NULL;
  
         lpfc_nvmeio_data(phba, "NVMET FCP CMPL: xri x%x op x%x status x%x\n",
                          ctxp->oxid, op, status);
  
-       tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
         if (status) {
                 rsp->fcp_error = NVME_SC_DATA_XFER_ERROR;
                 rsp->transferred_length = 0;
-               atomic_inc(&tgtp->xmt_fcp_rsp_error);
+               if (tgtp)
+                       atomic_inc(&tgtp->xmt_fcp_rsp_error);
+
+               /* pick up SLI4 exhange busy condition */
+               if (bf_get(lpfc_wcqe_c_xb, wcqe)) {
+                       ctxp->flag |= LPFC_NVMET_XBUSY;
+
+                       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                                       "6315 IO Cmpl XBUSY: xri x%x: %x/%x\n",
+                                       ctxp->oxid, status, result);
+               } else {
+                       ctxp->flag &= ~LPFC_NVMET_XBUSY;
+               }
+
         } else {
                 rsp->fcp_error = NVME_SC_SUCCESS;
                 if (op == NVMET_FCOP_RSP)
                         rsp->transferred_length = rsp->rsplen;
                 else
                         rsp->transferred_length = rsp->transfer_length;
-               atomic_inc(&tgtp->xmt_fcp_rsp_cmpl);
+               if (tgtp)
+                       atomic_inc(&tgtp->xmt_fcp_rsp_cmpl);
         }
  
-out:
         if ((op == NVMET_FCOP_READDATA_RSP) ||
             (op == NVMET_FCOP_RSP)) {
                 /* Sanity check */
                 ctxp->state = LPFC_NVMET_STE_DONE;
                 ctxp->entry_cnt++;
+
  #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
                 if (phba->ktime_on) {
                         if (rsp->op == NVMET_FCOP_READDATA_RSP) {
@@ -408,9 +449,7 @@ out:
                 if (phba->ktime_on)
                         lpfc_nvmet_ktime(phba, ctxp);
  #endif
-               /* Let Abort cmpl repost the context */
-               if (!(ctxp->flag & LPFC_NVMET_ABORT_OP))
-                       lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+               /* lpfc_nvmet_xmt_fcp_release() will recycle the context */
         } else {
                 ctxp->entry_cnt++;
                 start_clean = offsetof(struct lpfc_iocbq, wqe);
@@ -519,8 +558,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
                 container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
         struct lpfc_hba *phba = ctxp->phba;
         struct lpfc_iocbq *nvmewqeq;
-       unsigned long iflags;
-       int rc, id;
+       int rc;
  
  #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
         if (phba->ktime_on) {
@@ -530,7 +568,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
                         ctxp->ts_nvme_data = ktime_get_ns();
         }
         if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
-               id = smp_processor_id();
+               int id = smp_processor_id();
                 ctxp->cpu = id;
                 if (id < LPFC_CHECK_CPU_CNT)
                         phba->cpucheck_xmt_io[id]++;
@@ -544,33 +582,14 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
         }
  #endif
  
-       if (rsp->op == NVMET_FCOP_ABORT) {
-               lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-                               "6103 Abort op: oxri x%x %d cnt %d\n",
-                               ctxp->oxid, ctxp->state, ctxp->entry_cnt);
-
-               lpfc_nvmeio_data(phba, "NVMET FCP ABRT: "
-                                "xri x%x state x%x cnt x%x\n",
-                                ctxp->oxid, ctxp->state, ctxp->entry_cnt);
-
-               atomic_inc(&lpfc_nvmep->xmt_fcp_abort);
-               ctxp->entry_cnt++;
-               ctxp->flag |= LPFC_NVMET_ABORT_OP;
-               if (ctxp->flag & LPFC_NVMET_IO_INP)
-                       lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid,
-                                                      ctxp->oxid);
-               else
-                       lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
-                                                        ctxp->oxid);
-               return 0;
-       }
-
         /* Sanity check */
-       if (ctxp->state == LPFC_NVMET_STE_ABORT) {
+       if ((ctxp->flag & LPFC_NVMET_ABTS_RCV) ||
+           (ctxp->state == LPFC_NVMET_STE_ABORT)) {
                 atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
                 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-                               "6102 Bad state IO x%x aborted\n",
+                               "6102 IO xri x%x aborted\n",
                                 ctxp->oxid);
+               rc = -ENXIO;
                 goto aerr;
         }
  
@@ -580,6 +599,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
                 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
                                 "6152 FCP Drop IO x%x: Prep\n",
                                 ctxp->oxid);
+               rc = -ENXIO;
                 goto aerr;
         }
  
@@ -592,10 +612,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
         lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n",
                          ctxp->oxid, rsp->op, rsp->rsplen);
  
-       /* For now we take hbalock */
-       spin_lock_irqsave(&phba->hbalock, iflags);
         rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
-       spin_unlock_irqrestore(&phba->hbalock, iflags);
         if (rc == WQE_SUCCESS) {
                 ctxp->flag |= LPFC_NVMET_IO_INP;
  #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -618,8 +635,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
         ctxp->wqeq->hba_wqidx = 0;
         nvmewqeq->context2 = NULL;
         nvmewqeq->context3 = NULL;
+       rc = -EBUSY;
  aerr:
-       return -ENXIO;
+       return rc;
  }
  
  static void
@@ -631,10 +649,79 @@ lpfc_nvmet_targetport_delete(struct nvmet_fc_target_port *targetport)
         complete(&tport->tport_unreg_done);
  }
  
+static void
+lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
+                        struct nvmefc_tgt_fcp_req *req)
+{
+       struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
+       struct lpfc_nvmet_rcv_ctx *ctxp =
+               container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+       struct lpfc_hba *phba = ctxp->phba;
+       unsigned long flags;
+
+       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                       "6103 Abort op: oxri x%x flg x%x cnt %d\n",
+                       ctxp->oxid, ctxp->flag, ctxp->entry_cnt);
+
+       lpfc_nvmeio_data(phba, "NVMET FCP ABRT: "
+                        "xri x%x flg x%x cnt x%x\n",
+                        ctxp->oxid, ctxp->flag, ctxp->entry_cnt);
+
+       atomic_inc(&lpfc_nvmep->xmt_fcp_abort);
+       ctxp->entry_cnt++;
+       spin_lock_irqsave(&ctxp->ctxlock, flags);
+
+       /* Since iaab/iaar are NOT set, we need to check
+        * if the firmware is in process of aborting IO
+        */
+       if (ctxp->flag & LPFC_NVMET_XBUSY) {
+               spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+               return;
+       }
+       ctxp->flag |= LPFC_NVMET_ABORT_OP;
+       if (ctxp->flag & LPFC_NVMET_IO_INP)
+               lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+                                              ctxp->oxid);
+       else
+               lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+                                                ctxp->oxid);
+       spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+}
+
+static void
+lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
+                          struct nvmefc_tgt_fcp_req *rsp)
+{
+       struct lpfc_nvmet_rcv_ctx *ctxp =
+               container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+       struct lpfc_hba *phba = ctxp->phba;
+       unsigned long flags;
+       bool aborting = false;
+
+       spin_lock_irqsave(&ctxp->ctxlock, flags);
+       if ((ctxp->flag & LPFC_NVMET_ABORT_OP) ||
+           (ctxp->flag & LPFC_NVMET_XBUSY)) {
+               aborting = true;
+               /* let the abort path do the real release */
+               lpfc_nvmet_defer_release(phba, ctxp);
+       }
+       spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+
+       lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d\n", ctxp->oxid,
+                        ctxp->state, 0);
+
+       if (aborting)
+               return;
+
+       lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+}
+
  static struct nvmet_fc_target_template lpfc_tgttemplate = {
         .targetport_delete = lpfc_nvmet_targetport_delete,
         .xmt_ls_rsp     = lpfc_nvmet_xmt_ls_rsp,
         .fcp_op         = lpfc_nvmet_xmt_fcp_op,
+       .fcp_abort      = lpfc_nvmet_xmt_fcp_abort,
+       .fcp_req_release = lpfc_nvmet_xmt_fcp_release,
  
         .max_hw_queues  = 1,
         .max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS,
@@ -663,14 +750,31 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
         pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
         pinfo.port_id = vport->fc_myDID;
  
+       /* Limit to LPFC_MAX_NVME_SEG_CNT.
+        * For now need + 1 to get around NVME transport logic.
+        */
+       if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) {
+               lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT,
+                               "6400 Reducing sg segment cnt to %d\n",
+                               LPFC_MAX_NVME_SEG_CNT);
+               phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
+       } else {
+               phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt;
+       }
+       lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
         lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel;
-       lpfc_tgttemplate.max_sgl_segments = phba->cfg_sg_seg_cnt;
         lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
-                                          NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
+                                          NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED |
+                                          NVMET_FCTGTFEAT_CMD_IN_ISR |
+                                          NVMET_FCTGTFEAT_OPDONE_IN_ISR;
  
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
         error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
                                              &phba->pcidev->dev,
                                              &phba->targetport);
+#else
+       error = -ENOMEM;
+#endif
         if (error) {
                 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
                                 "6025 Cannot register NVME targetport "
@@ -731,9 +835,138 @@ lpfc_nvmet_update_targetport(struct lpfc_hba *phba)
         return 0;
  }
  
+/**
+ * lpfc_sli4_nvmet_xri_aborted - Fast-path process of nvmet xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the nvmet xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * NVMET aborted xri.
+ **/
+void
+lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+                           struct sli4_wcqe_xri_aborted *axri)
+{
+       uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+       uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
+       struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
+       struct lpfc_nodelist *ndlp;
+       unsigned long iflag = 0;
+       int rrq_empty = 0;
+       bool released = false;
+
+       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                       "6317 XB aborted xri x%x rxid x%x\n", xri, rxid);
+
+       if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+               return;
+       spin_lock_irqsave(&phba->hbalock, iflag);
+       spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+       list_for_each_entry_safe(ctxp, next_ctxp,
+                                &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+                                list) {
+               if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+                       continue;
+
+               /* Check if we already received a free context call
+                * and we have completed processing an abort situation.
+                */
+               if (ctxp->flag & LPFC_NVMET_CTX_RLS &&
+                   !(ctxp->flag & LPFC_NVMET_ABORT_OP)) {
+                       list_del(&ctxp->list);
+                       released = true;
+               }
+               ctxp->flag &= ~LPFC_NVMET_XBUSY;
+               spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+
+               rrq_empty = list_empty(&phba->active_rrq_list);
+               spin_unlock_irqrestore(&phba->hbalock, iflag);
+               ndlp = lpfc_findnode_did(phba->pport, ctxp->sid);
+               if (ndlp && NLP_CHK_NODE_ACT(ndlp) &&
+                   (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE ||
+                    ndlp->nlp_state == NLP_STE_MAPPED_NODE)) {
+                       lpfc_set_rrq_active(phba, ndlp,
+                               ctxp->rqb_buffer->sglq->sli4_lxritag,
+                               rxid, 1);
+                       lpfc_sli4_abts_err_handler(phba, ndlp, axri);
+               }
+
+               lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                               "6318 XB aborted %x flg x%x (%x)\n",
+                               ctxp->oxid, ctxp->flag, released);
+               if (released)
+                       lpfc_nvmet_rq_post(phba, ctxp,
+                                          &ctxp->rqb_buffer->hbuf);
+               if (rrq_empty)
+                       lpfc_worker_wake_up(phba);
+               return;
+       }
+       spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+       spin_unlock_irqrestore(&phba->hbalock, iflag);
+}
+
+int
+lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
+                          struct fc_frame_header *fc_hdr)
+
+{
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+       struct lpfc_hba *phba = vport->phba;
+       struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
+       struct nvmefc_tgt_fcp_req *rsp;
+       uint16_t xri;
+       unsigned long iflag = 0;
+
+       xri = be16_to_cpu(fc_hdr->fh_ox_id);
+
+       spin_lock_irqsave(&phba->hbalock, iflag);
+       spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+       list_for_each_entry_safe(ctxp, next_ctxp,
+                                &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+                                list) {
+               if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+                       continue;
+
+               spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+               spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+               spin_lock_irqsave(&ctxp->ctxlock, iflag);
+               ctxp->flag |= LPFC_NVMET_ABTS_RCV;
+               spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
+
+               lpfc_nvmeio_data(phba,
+                       "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n",
+                       xri, smp_processor_id(), 0);
+
+               lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                               "6319 NVMET Rcv ABTS:acc xri x%x\n", xri);
+
+               rsp = &ctxp->ctx.fcp_req;
+               nvmet_fc_rcv_fcp_abort(phba->targetport, rsp);
+
+               /* Respond with BA_ACC accordingly */
+               lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 1);
+               return 0;
+       }
+       spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+       spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+       lpfc_nvmeio_data(phba, "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n",
+                        xri, smp_processor_id(), 1);
+
+       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                       "6320 NVMET Rcv ABTS:rjt xri x%x\n", xri);
+
+       /* Respond with BA_RJT accordingly */
+       lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 0);
+#endif
+       return 0;
+}
+
  void
  lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
  {
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
         struct lpfc_nvmet_tgtport *tgtp;
  
         if (phba->nvmet_support == 0)
@@ -745,6 +978,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
                 wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
         }
         phba->targetport = NULL;
+#endif
  }
  
  /**
@@ -764,6 +998,7 @@ static void
  lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                            struct hbq_dmabuf *nvmebuf)
  {
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
         struct lpfc_nvmet_tgtport *tgtp;
         struct fc_frame_header *fc_hdr;
         struct lpfc_nvmet_rcv_ctx *ctxp;
@@ -844,6 +1079,7 @@ dropit:
  
         atomic_inc(&tgtp->xmt_ls_abort);
         lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid);
+#endif
  }
  
  /**
@@ -865,6 +1101,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
                             struct rqb_dmabuf *nvmebuf,
                             uint64_t isr_timestamp)
  {
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
         struct lpfc_nvmet_rcv_ctx *ctxp;
         struct lpfc_nvmet_tgtport *tgtp;
         struct fc_frame_header *fc_hdr;
@@ -913,6 +1150,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
         ctxp->rqb_buffer = nvmebuf;
         ctxp->entry_cnt = 1;
         ctxp->flag = 0;
+       spin_lock_init(&ctxp->ctxlock);
  
  #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
         if (phba->ktime_on) {
@@ -935,8 +1173,8 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
         }
  #endif
  
-       lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d from %06x\n",
-                        oxid, size, sid);
+       lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d CPU %02x\n",
+                        oxid, size, smp_processor_id());
  
         atomic_inc(&tgtp->rcv_fcp_cmd_in);
         /*
@@ -955,7 +1193,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
  
         atomic_inc(&tgtp->rcv_fcp_cmd_drop);
         lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-                       "6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n",
+                       "6159 FCP Drop IO x%x: err x%x\n",
                         ctxp->oxid, rc);
  dropit:
         lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
@@ -970,6 +1208,7 @@ dropit:
                 /* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
                 lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
         }
+#endif
  }
  
  /**
@@ -1114,7 +1353,7 @@ lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba,
         bf_set(wqe_dfctl, &wqe->xmit_sequence.wge_ctl, 0);
         bf_set(wqe_ls, &wqe->xmit_sequence.wge_ctl, 1);
         bf_set(wqe_la, &wqe->xmit_sequence.wge_ctl, 0);
-       bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_DD_SOL_CTL);
+       bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_ELS4_REP);
         bf_set(wqe_type, &wqe->xmit_sequence.wge_ctl, FC_TYPE_NVME);
  
         /* Word 6 */
@@ -1209,11 +1448,11 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
                 return NULL;
         }
  
-       if (rsp->sg_cnt > phba->cfg_sg_seg_cnt) {
+       if (rsp->sg_cnt > phba->cfg_nvme_seg_cnt) {
                 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
                                 "6109 lpfc_nvmet_prep_fcp_wqe: seg cnt err: "
-                               "NPORT x%x oxid:x%x\n",
-                               ctxp->sid, ctxp->oxid);
+                               "NPORT x%x oxid:x%x cnt %d\n",
+                               ctxp->sid, ctxp->oxid, phba->cfg_nvme_seg_cnt);
                 return NULL;
         }
  
@@ -1445,7 +1684,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
  
         case NVMET_FCOP_RSP:
                 /* Words 0 - 2 */
-               sgel = &rsp->sg[0];
                 physaddr = rsp->rspdma;
                 wqe->fcp_trsp.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
                 wqe->fcp_trsp.bde.tus.f.bdeSize = rsp->rsplen;
@@ -1566,6 +1804,8 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
         struct lpfc_nvmet_rcv_ctx *ctxp;
         struct lpfc_nvmet_tgtport *tgtp;
         uint32_t status, result;
+       unsigned long flags;
+       bool released = false;
  
         ctxp = cmdwqe->context2;
         status = bf_get(lpfc_wcqe_c_status, wcqe);
@@ -1574,21 +1814,46 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
         tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
         atomic_inc(&tgtp->xmt_abort_cmpl);
  
+       ctxp->state = LPFC_NVMET_STE_DONE;
+
+       /* Check if we already received a free context call
+        * and we have completed processing an abort situation.
+        */
+       spin_lock_irqsave(&ctxp->ctxlock, flags);
+       if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
+           !(ctxp->flag & LPFC_NVMET_XBUSY)) {
+               list_del(&ctxp->list);
+               released = true;
+       }
+       ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+       spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+
         lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
-                       "6165 Abort cmpl: xri x%x WCQE: %08x %08x %08x %08x\n",
-                       ctxp->oxid, wcqe->word0, wcqe->total_data_placed,
+                       "6165 ABORT cmpl: xri x%x flg x%x (%d) "
+                       "WCQE: %08x %08x %08x %08x\n",
+                       ctxp->oxid, ctxp->flag, released,
+                       wcqe->word0, wcqe->total_data_placed,
                         result, wcqe->word3);
  
-       ctxp->state = LPFC_NVMET_STE_DONE;
-       lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+       /*
+        * if transport has released ctx, then can reuse it. Otherwise,
+        * will be recycled by transport release call.
+        */
+       if (released)
+               lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
  
         cmdwqe->context2 = NULL;
         cmdwqe->context3 = NULL;
         lpfc_sli_release_iocbq(phba, cmdwqe);
+
+       /* Since iaab/iaar are NOT set, there is no work left.
+        * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
+        * should have been called already.
+        */
  }
  
  /**
- * lpfc_nvmet_xmt_fcp_abort_cmp - Completion handler for ABTS
+ * lpfc_nvmet_unsol_fcp_abort_cmp - Completion handler for ABTS
   * @phba: Pointer to HBA context object.
   * @cmdwqe: Pointer to driver command WQE object.
   * @wcqe: Pointer to driver response CQE object.
@@ -1598,12 +1863,14 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
   * The function frees memory resources used for the NVME commands.
   **/
  static void
-lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
-                            struct lpfc_wcqe_complete *wcqe)
+lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+                              struct lpfc_wcqe_complete *wcqe)
  {
         struct lpfc_nvmet_rcv_ctx *ctxp;
         struct lpfc_nvmet_tgtport *tgtp;
+       unsigned long flags;
         uint32_t status, result;
+       bool released = false;
  
         ctxp = cmdwqe->context2;
         status = bf_get(lpfc_wcqe_c_status, wcqe);
@@ -1612,23 +1879,55 @@ lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
         tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
         atomic_inc(&tgtp->xmt_abort_cmpl);
  
+       if (!ctxp) {
+               /* if context is clear, related io alrady complete */
+               lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                               "6070 ABTS cmpl: WCQE: %08x %08x %08x %08x\n",
+                               wcqe->word0, wcqe->total_data_placed,
+                               result, wcqe->word3);
+               return;
+       }
+
+       /* Sanity check */
+       if (ctxp->state != LPFC_NVMET_STE_ABORT) {
+               lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+                               "6112 ABTS Wrong state:%d oxid x%x\n",
+                               ctxp->state, ctxp->oxid);
+       }
+
+       /* Check if we already received a free context call
+        * and we have completed processing an abort situation.
+        */
+       ctxp->state = LPFC_NVMET_STE_DONE;
+       spin_lock_irqsave(&ctxp->ctxlock, flags);
+       if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
+           !(ctxp->flag & LPFC_NVMET_XBUSY)) {
+               list_del(&ctxp->list);
+               released = true;
+       }
+       ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+       spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+
         lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-                       "6070 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n",
-                       ctxp, wcqe->word0, wcqe->total_data_placed,
+                       "6316 ABTS cmpl xri x%x flg x%x (%x) "
+                       "WCQE: %08x %08x %08x %08x\n",
+                       ctxp->oxid, ctxp->flag, released,
+                       wcqe->word0, wcqe->total_data_placed,
                         result, wcqe->word3);
-
-       if (ctxp) {
-               /* Sanity check */
-               if (ctxp->state != LPFC_NVMET_STE_ABORT) {
-                       lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
-                                       "6112 ABORT Wrong state:%d oxid x%x\n",
-                                       ctxp->state, ctxp->oxid);
-               }
-               ctxp->state = LPFC_NVMET_STE_DONE;
+       /*
+        * if transport has released ctx, then can reuse it. Otherwise,
+        * will be recycled by transport release call.
+        */
+       if (released)
                 lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
-               cmdwqe->context2 = NULL;
-               cmdwqe->context3 = NULL;
-       }
+
+       cmdwqe->context2 = NULL;
+       cmdwqe->context3 = NULL;
+
+       /* Since iaab/iaar are NOT set, there is no work left.
+        * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
+        * should have been called already.
+        */
  }
  
  /**
@@ -1681,10 +1980,14 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
         struct lpfc_nodelist *ndlp;
  
         lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-                       "6067 %s: Entrypoint: sid %x xri %x\n", __func__,
-                       sid, xri);
+                       "6067 ABTS: sid %x xri x%x/x%x\n",
+                       sid, xri, ctxp->wqeq->sli4_xritag);
  
         tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+       if (!ctxp->wqeq) {
+               ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+               ctxp->wqeq->hba_wqidx = 0;
+       }
  
         ndlp = lpfc_findnode_did(phba->pport, sid);
         if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
@@ -1693,7 +1996,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
                 atomic_inc(&tgtp->xmt_abort_rsp_error);
                 lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
                                 "6134 Drop ABTS - wrong NDLP state x%x.\n",
-                               ndlp->nlp_state);
+                               (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
  
                 /* No failure to an ABTS request. */
                 return 0;
@@ -1790,10 +2093,11 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
             (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
                 atomic_inc(&tgtp->xmt_abort_rsp_error);
                 lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
-                               "6160 Drop ABTS - wrong NDLP state x%x.\n",
-                               ndlp->nlp_state);
+                               "6160 Drop ABORT - wrong NDLP state x%x.\n",
+                               (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
  
                 /* No failure to an ABTS request. */
+               ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
                 return 0;
         }
  
@@ -1801,9 +2105,10 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
         ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba);
         if (!ctxp->abort_wqeq) {
                 lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
-                               "6161 Abort failed: No wqeqs: "
+                               "6161 ABORT failed: No wqeqs: "
                                 "xri: x%x\n", ctxp->oxid);
                 /* No failure to an ABTS request. */
+               ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
                 return 0;
         }
         abts_wqeq = ctxp->abort_wqeq;
@@ -1811,8 +2116,8 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
         ctxp->state = LPFC_NVMET_STE_ABORT;
  
         /* Announce entry to new IO submit field. */
-       lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
-                       "6162 Abort Request to rport DID x%06x "
+       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+                       "6162 ABORT Request to rport DID x%06x "
                         "for xri x%x x%x\n",
                         ctxp->sid, ctxp->oxid, ctxp->wqeq->sli4_xritag);
  
@@ -1828,6 +2133,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
                                 "NVME Req now. hba_flag x%x oxid x%x\n",
                                 phba->hba_flag, ctxp->oxid);
                 lpfc_sli_release_iocbq(phba, abts_wqeq);
+               ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
                 return 0;
         }
  
@@ -1839,6 +2145,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
                                 "still pending on oxid x%x\n",
                                 ctxp->oxid);
                 lpfc_sli_release_iocbq(phba, abts_wqeq);
+               ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
                 return 0;
         }
  
@@ -1886,9 +2193,10 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
         if (rc == WQE_SUCCESS)
                 return 0;
  
+       ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
         lpfc_sli_release_iocbq(phba, abts_wqeq);
-       lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-                       "6166 Failed abts issue_wqe with status x%x "
+       lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+                       "6166 Failed ABORT issue_wqe with status x%x "
                         "for oxid x%x.\n",
                         rc, ctxp->oxid);
         return 1;
@@ -1917,8 +2225,8 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
  
         spin_lock_irqsave(&phba->hbalock, flags);
         abts_wqeq = ctxp->wqeq;
-       abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_fcp_abort_cmp;
-       abts_wqeq->iocb_cmpl = 0;
+       abts_wqeq->wqe_cmpl = lpfc_nvmet_unsol_fcp_abort_cmp;
+       abts_wqeq->iocb_cmpl = NULL;
         abts_wqeq->iocb_flag |= LPFC_IO_NVMET;
         rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
         spin_unlock_irqrestore(&phba->hbalock, flags);
@@ -1928,7 +2236,7 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
         }
  
  aerr:
-       lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+       ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
         atomic_inc(&tgtp->xmt_abort_rsp_error);
         lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
                         "6135 Failed to Issue ABTS for oxid x%x. Status x%x\n",
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h

index ca96f05c1604f5a5c01c4d201f8867941a0d78c8..128759fe665058dba133febdaa7140f29469f733 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -21,9 +21,7 @@
   * included with this package.                                     *
   ********************************************************************/
  
-#define LPFC_NVMET_MIN_SEGS            16
-#define LPFC_NVMET_DEFAULT_SEGS                64      /* 256K IOs */
-#define LPFC_NVMET_MAX_SEGS            510
+#define LPFC_NVMET_DEFAULT_SEGS                (64 + 1)        /* 256K IOs */
  #define LPFC_NVMET_SUCCESS_LEN 12
  
  /* Used for NVME Target */
@@ -77,10 +75,12 @@ struct lpfc_nvmet_rcv_ctx {
                 struct nvmefc_tgt_ls_req ls_req;
                 struct nvmefc_tgt_fcp_req fcp_req;
         } ctx;
+       struct list_head list;
         struct lpfc_hba *phba;
         struct lpfc_iocbq *wqeq;
         struct lpfc_iocbq *abort_wqeq;
         dma_addr_t txrdy_phys;
+       spinlock_t ctxlock; /* protect flag access */
         uint32_t *txrdy;
         uint32_t sid;
         uint32_t offset;
@@ -97,8 +97,11 @@ struct lpfc_nvmet_rcv_ctx {
  #define LPFC_NVMET_STE_RSP             4
  #define LPFC_NVMET_STE_DONE            5
         uint16_t flag;
-#define LPFC_NVMET_IO_INP              1
-#define LPFC_NVMET_ABORT_OP            2
+#define LPFC_NVMET_IO_INP              0x1  /* IO is in progress on exchange */
+#define LPFC_NVMET_ABORT_OP            0x2  /* Abort WQE issued on exchange */
+#define LPFC_NVMET_XBUSY               0x4  /* XB bit set on IO cmpl */
+#define LPFC_NVMET_CTX_RLS             0x8  /* ctx free requested */
+#define LPFC_NVMET_ABTS_RCV            0x10  /* ABTS received on exchange */
         struct rqb_dmabuf *rqb_buffer;
  
  #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c

index 9d6384af9fce7e9b321e8031a421869095fbab55..54fd0c81ceaf69a7ceb475acb87e309df7e10de1 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5953,12 +5953,13 @@ struct scsi_host_template lpfc_template_nvme = {
         .track_queue_depth      = 0,
  };
  
-struct scsi_host_template lpfc_template_s3 = {
+struct scsi_host_template lpfc_template_no_hr = {
         .module                 = THIS_MODULE,
         .name                   = LPFC_DRIVER_NAME,
         .proc_name              = LPFC_DRIVER_NAME,
         .info                   = lpfc_info,
         .queuecommand           = lpfc_queuecommand,
+       .eh_timed_out           = fc_eh_timed_out,
         .eh_abort_handler       = lpfc_abort_handler,
         .eh_device_reset_handler = lpfc_device_reset_handler,
         .eh_target_reset_handler = lpfc_target_reset_handler,
@@ -6015,7 +6016,6 @@ struct scsi_host_template lpfc_vport_template = {
         .eh_abort_handler       = lpfc_abort_handler,
         .eh_device_reset_handler = lpfc_device_reset_handler,
         .eh_target_reset_handler = lpfc_target_reset_handler,
-       .eh_bus_reset_handler   = lpfc_bus_reset_handler,
         .slave_alloc            = lpfc_slave_alloc,
         .slave_configure        = lpfc_slave_configure,
         .slave_destroy          = lpfc_slave_destroy,
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c

index e43e5e23c24b475f3f8930bcae9cb67e4685153e..cf19f4976f5fb6338509d85a6e9c4624b9b882b5 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,3 +1,4 @@
+
  /*******************************************************************
   * This file is part of the Emulex Linux Device Driver for         *
   * Fibre Channel Host Bus Adapters.                                *
@@ -952,7 +953,7 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
         start_sglq = sglq;
         while (!found) {
                 if (!sglq)
-                       return NULL;
+                       break;
                 if (ndlp && ndlp->active_rrqs_xri_bitmap &&
                     test_bit(sglq->sli4_lxritag,
                     ndlp->active_rrqs_xri_bitmap)) {
@@ -6337,7 +6338,7 @@ lpfc_sli4_get_allocated_extnts(struct lpfc_hba *phba, uint16_t type,
  }
  
  /**
- * lpfc_sli4_repost_sgl_list - Repsot the buffers sgl pages as block
+ * lpfc_sli4_repost_sgl_list - Repost the buffers sgl pages as block
   * @phba: pointer to lpfc hba data structure.
   * @pring: Pointer to driver SLI ring object.
   * @sgl_list: linked link of sgl buffers to post
@@ -12212,6 +12213,41 @@ void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba)
         }
  }
  
+/**
+ * lpfc_sli4_nvme_xri_abort_event_proc - Process nvme xri abort event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 NVME abort XRI events.
+ **/
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba)
+{
+       struct lpfc_cq_event *cq_event;
+
+       /* First, declare the fcp xri abort event has been handled */
+       spin_lock_irq(&phba->hbalock);
+       phba->hba_flag &= ~NVME_XRI_ABORT_EVENT;
+       spin_unlock_irq(&phba->hbalock);
+       /* Now, handle all the fcp xri abort events */
+       while (!list_empty(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue)) {
+               /* Get the first event from the head of the event queue */
+               spin_lock_irq(&phba->hbalock);
+               list_remove_head(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+                                cq_event, struct lpfc_cq_event, list);
+               spin_unlock_irq(&phba->hbalock);
+               /* Notify aborted XRI for NVME work queue */
+               if (phba->nvmet_support) {
+                       lpfc_sli4_nvmet_xri_aborted(phba,
+                                                   &cq_event->cqe.wcqe_axri);
+               } else {
+                       lpfc_sli4_nvme_xri_aborted(phba,
+                                                  &cq_event->cqe.wcqe_axri);
+               }
+               /* Free the event processed back to the free pool */
+               lpfc_sli4_cq_event_release(phba, cq_event);
+       }
+}
+
  /**
   * lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event
   * @phba: pointer to lpfc hba data structure.
@@ -12709,10 +12745,22 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
                 spin_unlock_irqrestore(&phba->hbalock, iflags);
                 workposted = true;
                 break;
+       case LPFC_NVME:
+               spin_lock_irqsave(&phba->hbalock, iflags);
+               list_add_tail(&cq_event->list,
+                             &phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
+               /* Set the nvme xri abort event flag */
+               phba->hba_flag |= NVME_XRI_ABORT_EVENT;
+               spin_unlock_irqrestore(&phba->hbalock, iflags);
+               workposted = true;
+               break;
         default:
                 lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-                               "0603 Invalid work queue CQE subtype (x%x)\n",
-                               cq->subtype);
+                               "0603 Invalid CQ subtype %d: "
+                               "%08x %08x %08x %08x\n",
+                               cq->subtype, wcqe->word0, wcqe->parameter,
+                               wcqe->word2, wcqe->word3);
+               lpfc_sli4_cq_event_release(phba, cq_event);
                 workposted = false;
                 break;
         }
@@ -13710,7 +13758,10 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
                 lpfc_free_rq_buffer(queue->phba, queue);
                 kfree(queue->rqbp);
         }
-       kfree(queue->pring);
+
+       if (!list_empty(&queue->wq_list))
+               list_del(&queue->wq_list);
+
         kfree(queue);
         return;
  }
@@ -13827,6 +13878,8 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
   * @startq: The starting FCP EQ to modify
   *
   * This function sends an MODIFY_EQ_DELAY mailbox command to the HBA.
+ * The command allows up to LPFC_MAX_EQ_DELAY_EQID_CNT EQ ID's to be
+ * updated in one mailbox command.
   *
   * The @phba struct is used to send mailbox command to HBA. The @startq
   * is used to get the starting FCP EQ to change.
@@ -13879,7 +13932,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq)
                 eq_delay->u.request.eq[cnt].phase = 0;
                 eq_delay->u.request.eq[cnt].delay_multi = dmult;
                 cnt++;
-               if (cnt >= LPFC_MAX_EQ_DELAY)
+               if (cnt >= LPFC_MAX_EQ_DELAY_EQID_CNT)
                         break;
         }
         eq_delay->u.request.num_eq = cnt;
@@ -14688,6 +14741,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
         case LPFC_Q_CREATE_VERSION_1:
                 bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1,
                        wq->entry_count);
+               bf_set(lpfc_mbox_hdr_version, &shdr->request,
+                      LPFC_Q_CREATE_VERSION_1);
+
                 switch (wq->entry_size) {
                 default:
                 case 64:
@@ -15185,17 +15241,17 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
                 drq = drqp[idx];
                 cq  = cqp[idx];
  
-               if (hrq->entry_count != drq->entry_count) {
-                       status = -EINVAL;
-                       goto out;
-               }
-
                 /* sanity check on queue memory */
                 if (!hrq || !drq || !cq) {
                         status = -ENODEV;
                         goto out;
                 }
  
+               if (hrq->entry_count != drq->entry_count) {
+                       status = -EINVAL;
+                       goto out;
+               }
+
                 if (idx == 0) {
                         bf_set(lpfc_mbx_rq_create_num_pages,
                                &rq_create->u.request,
@@ -15511,6 +15567,8 @@ lpfc_wq_destroy(struct lpfc_hba *phba, struct lpfc_queue *wq)
         }
         /* Remove wq from any list */
         list_del_init(&wq->list);
+       kfree(wq->pring);
+       wq->pring = NULL;
         mempool_free(mbox, wq->phba->mbox_mem_pool);
         return status;
  }
@@ -16463,7 +16521,7 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba,
   * This function sends a basic response to a previous unsol sequence abort
   * event after aborting the sequence handling.
   **/
-static void
+void
  lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
                         struct fc_frame_header *fc_hdr, bool aborted)
  {
@@ -16484,14 +16542,13 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
  
         ndlp = lpfc_findnode_did(vport, sid);
         if (!ndlp) {
-               ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+               ndlp = lpfc_nlp_init(vport, sid);
                 if (!ndlp) {
                         lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
                                          "1268 Failed to allocate ndlp for "
                                          "oxid:x%x SID:x%x\n", oxid, sid);
                         return;
                 }
-               lpfc_nlp_init(vport, ndlp, sid);
                 /* Put ndlp onto pport node list */
                 lpfc_enqueue_node(vport, ndlp);
         } else if (!NLP_CHK_NODE_ACT(ndlp)) {
@@ -16640,6 +16697,11 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport,
         }
         lpfc_in_buf_free(phba, &dmabuf->dbuf);
  
+       if (phba->nvmet_support) {
+               lpfc_nvmet_rcv_unsol_abort(vport, &fc_hdr);
+               return;
+       }
+
         /* Respond with BA_ACC or BA_RJT accordingly */
         lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted);
  }
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h

index 91153c9f6d18259b3978b7f3218ad76198170c34..da46471337c8a7e3b6c38164bc4afe1a4f0087c2 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -620,7 +620,7 @@ struct lpfc_sli4_hba {
         struct list_head lpfc_els_sgl_list;
         struct list_head lpfc_abts_els_sgl_list;
         struct list_head lpfc_nvmet_sgl_list;
-       struct list_head lpfc_abts_nvmet_sgl_list;
+       struct list_head lpfc_abts_nvmet_ctx_list;
         struct list_head lpfc_abts_scsi_buf_list;
         struct list_head lpfc_abts_nvme_buf_list;
         struct lpfc_sglq **lpfc_sglq_active_list;
@@ -642,6 +642,7 @@ struct lpfc_sli4_hba {
         struct list_head sp_asynce_work_queue;
         struct list_head sp_fcp_xri_aborted_work_queue;
         struct list_head sp_els_xri_aborted_work_queue;
+       struct list_head sp_nvme_xri_aborted_work_queue;
         struct list_head sp_unsol_work_queue;
         struct lpfc_sli4_link link_state;
         struct lpfc_sli4_lnk_info lnk_info;
@@ -794,9 +795,14 @@ void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *);
  int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
                         void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
  void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *);
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba);
  void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *);
  void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *,
                                struct sli4_wcqe_xri_aborted *);
+void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+                               struct sli4_wcqe_xri_aborted *axri);
+void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+                                struct sli4_wcqe_xri_aborted *axri);
  void lpfc_sli4_els_xri_aborted(struct lpfc_hba *,
                                struct sli4_wcqe_xri_aborted *);
  void lpfc_sli4_vport_delete_els_xri_aborted(struct lpfc_vport *);
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h

index 86c6c9b26b823a04dd5afbdff4688901d577c82d..1c26dc67151b65e050b729e63a0871cb6846564f 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
   * included with this package.                                     *
   *******************************************************************/
  
-#define LPFC_DRIVER_VERSION "11.2.0.7"
+#define LPFC_DRIVER_VERSION "11.2.0.12"
  #define LPFC_DRIVER_NAME               "lpfc"
  
  /* Used for SLI 2/3 */
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c

index 9a0339dbc024bb02483aaa2a0f383d1134b03143..c714482bf4c5587b90b607c8d5ec4f62cf5b628e 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -738,10 +738,9 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
                 ndlp = lpfc_findnode_did(vport, Fabric_DID);
                 if (!ndlp) {
                         /* Cannot find existing Fabric ndlp, allocate one */
-                       ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+                       ndlp = lpfc_nlp_init(vport, Fabric_DID);
                         if (!ndlp)
                                 goto skip_logo;
-                       lpfc_nlp_init(vport, ndlp, Fabric_DID);
                         /* Indicate free memory when release */
                         NLP_SET_FREE_REQ(ndlp);
                 } else {
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h

index e7e5974e1a2c435ef2ee0a79276e981fcb79cc87..2b209bbb4c9165fa7afdeff0f233f649684f8495 100644 (file)
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -35,8 +35,8 @@
  /*
   * MegaRAID SAS Driver meta data
   */
-#define MEGASAS_VERSION                                "07.701.16.00-rc1"
-#define MEGASAS_RELDATE                                "February 2, 2017"
+#define MEGASAS_VERSION                                "07.701.17.00-rc1"
+#define MEGASAS_RELDATE                                "March 2, 2017"
  
  /*
   * Device IDs
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c

index 7ac9a9ee9bd473c3cc0b6178975f46e3d32f3b77..0016f12cc563e7c6e1eb3c2a87685f60c83b9747 100644 (file)
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1963,6 +1963,9 @@ scan_target:
         if (!mr_device_priv_data)
                 return -ENOMEM;
         sdev->hostdata = mr_device_priv_data;
+
+       atomic_set(&mr_device_priv_data->r1_ldio_hint,
+                  instance->r1_ldio_hint_default);
         return 0;
  }
  
@@ -5034,10 +5037,12 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe)
                                          &instance->irq_context[j]);
                         /* Retry irq register for IO_APIC*/
                         instance->msix_vectors = 0;
-                       if (is_probe)
+                       if (is_probe) {
+                               pci_free_irq_vectors(instance->pdev);
                                 return megasas_setup_irqs_ioapic(instance);
-                       else
+                       } else {
                                 return -1;
+                       }
                 }
         }
         return 0;
@@ -5277,9 +5282,11 @@ static int megasas_init_fw(struct megasas_instance *instance)
                         MPI2_REPLY_POST_HOST_INDEX_OFFSET);
         }
  
-       i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
-       if (i < 0)
-               goto fail_setup_irqs;
+       if (!instance->msix_vectors) {
+               i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
+               if (i < 0)
+                       goto fail_setup_irqs;
+       }
  
         dev_info(&instance->pdev->dev,
                 "firmware supports msix\t: (%d)", fw_msix_count);
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c

index 29650ba669da58da099cf91e9de0aae504146bb0..f990ab4d45e1bf72b3adf8991b11c01309c7530b 100644 (file)
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2159,7 +2159,7 @@ megasas_set_raidflag_cpu_affinity(union RAID_CONTEXT_UNION *praid_context,
                                 cpu_sel = MR_RAID_CTX_CPUSEL_1;
  
                         if (is_stream_detected(rctx_g35) &&
-                           (raid->level == 5) &&
+                           ((raid->level == 5) || (raid->level == 6)) &&
                             (raid->writeMode == MR_RL_WRITE_THROUGH_MODE) &&
                             (cpu_sel == MR_RAID_CTX_CPUSEL_FCFS))
                                 cpu_sel = MR_RAID_CTX_CPUSEL_0;
@@ -2338,7 +2338,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
                                 fp_possible = false;
                                 atomic_dec(&instance->fw_outstanding);
                         } else if ((scsi_buff_len > MR_LARGE_IO_MIN_SIZE) ||
-                                  atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint)) {
+                                  (atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint) > 0)) {
                                 fp_possible = false;
                                 atomic_dec(&instance->fw_outstanding);
                                 if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h

index 7fe7e6ed595b79e8831bfbeb55767e5d65ff7e5c..8981806fb13fa7792e2b8d45f4fc6880b362da25 100644 (file)
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -1442,9 +1442,6 @@ void mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc,
         u64 sas_address, u16 handle, u8 phy_number, u8 link_rate);
  extern struct sas_function_template mpt3sas_transport_functions;
  extern struct scsi_transport_template *mpt3sas_transport_template;
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
-                               enum scsi_device_state new_state);
  /* trigger data externs */
  void mpt3sas_send_trigger_data_event(struct MPT3SAS_ADAPTER *ioc,
         struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c

index 46e866c36c8a884a98588a8d7a81272aed0ce400..919ba2bb15f110f4619a72646d170f87ad985237 100644 (file)
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2859,7 +2859,7 @@ _scsih_internal_device_block(struct scsi_device *sdev,
             sas_device_priv_data->sas_target->handle);
         sas_device_priv_data->block = 1;
  
-       r = scsi_internal_device_block(sdev);
+       r = scsi_internal_device_block(sdev, false);
         if (r == -EINVAL)
                 sdev_printk(KERN_WARNING, sdev,
                     "device_block failed with return(%d) for handle(0x%04x)\n",
@@ -2895,7 +2895,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev,
                     "performing a block followed by an unblock\n",
                     r, sas_device_priv_data->sas_target->handle);
                 sas_device_priv_data->block = 1;
-               r = scsi_internal_device_block(sdev);
+               r = scsi_internal_device_block(sdev, false);
                 if (r)
                         sdev_printk(KERN_WARNING, sdev, "retried device_block "
                             "failed with return(%d) for handle(0x%04x)\n",
@@ -4677,7 +4677,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
         struct MPT3SAS_DEVICE *sas_device_priv_data;
         u32 response_code = 0;
         unsigned long flags;
-       unsigned int sector_sz;
  
         mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
  
@@ -4742,20 +4741,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
         }
  
         xfer_cnt = le32_to_cpu(mpi_reply->TransferCount);
-
-       /* In case of bogus fw or device, we could end up having
-        * unaligned partial completion. We can force alignment here,
-        * then scsi-ml does not need to handle this misbehavior.
-        */
-       sector_sz = scmd->device->sector_size;
-       if (unlikely(!blk_rq_is_passthrough(scmd->request) && sector_sz &&
-                    xfer_cnt % sector_sz)) {
-               sdev_printk(KERN_INFO, scmd->device,
-                   "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n",
-                           xfer_cnt, sector_sz);
-               xfer_cnt = round_down(xfer_cnt, sector_sz);
-       }
-
         scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt);
         if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)
                 log_info =  le32_to_cpu(mpi_reply->IOCLogInfo);
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c

index 6903f03c88af46bb54f7d3bb1e07b3a16596714d..8a1b948164191c322aa01e97b54a930efadd301a 100644 (file)
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -477,7 +477,7 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
                              int error)
  {
         or->async_error = error;
-       or->req_errors = req->errors ? : error;
+       or->req_errors = scsi_req(req)->result ? : error;
         or->sense_len = scsi_req(req)->sense_len;
         if (or->sense_len)
                 memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
@@ -489,7 +489,10 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
  
  int osd_execute_request(struct osd_request *or)
  {
-       int error = blk_execute_rq(or->request->q, NULL, or->request, 0);
+       int error;
+
+       blk_execute_rq(or->request->q, NULL, or->request, 0);
+       error = scsi_req(or->request)->result ? -EIO : 0;
  
         _set_error_resid(or, or->request, error);
         return error;
@@ -1602,7 +1605,7 @@ static int _init_blk_request(struct osd_request *or,
         req->rq_flags |= RQF_QUIET;
  
         req->timeout = or->timeout;
-       req->retries = or->retries;
+       scsi_req(req)->retries = or->retries;
  
         if (has_out) {
                 or->out.req = req;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c

index c47f4b349bac44dcaaa88ffefdb4257cc51ccee3..67cbed92f07dd05001f1e3f5a3ad40a00e59a4e6 100644 (file)
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -327,7 +327,7 @@ static void osst_end_async(struct request *req, int update)
         struct osst_tape *STp = SRpnt->stp;
         struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data;
  
-       STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
+       STp->buffer->cmdstat.midlevel_result = SRpnt->result = rq->result;
  #if DEBUG
         STp->write_pending = 0;
  #endif
@@ -414,7 +414,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
         memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */
         memcpy(rq->cmd, cmd, rq->cmd_len);
         req->timeout = timeout;
-       req->retries = retries;
+       rq->retries = retries;
         req->end_io_data = SRpnt;
  
         blk_execute_rq_nowait(req->q, NULL, req, 1, osst_end_async);
diff --git a/drivers/scsi/qedf/qedf_dbg.h b/drivers/scsi/qedf/qedf_dbg.h

index 23bd70628a2f05b9c5a5c2dbd6184e4ad6d076b2..7d173f48a81e8d240cd5d957b2997a3c0c3f0223 100644 (file)
--- a/drivers/scsi/qedf/qedf_dbg.h
+++ b/drivers/scsi/qedf/qedf_dbg.h
@@ -81,14 +81,17 @@ struct qedf_dbg_ctx {
  #define QEDF_INFO(pdev, level, fmt, ...)       \
                 qedf_dbg_info(pdev, __func__, __LINE__, level, fmt,     \
                               ## __VA_ARGS__)
-
-extern void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
                           const char *fmt, ...);
-extern void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
                            const char *, ...);
-extern void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
+__printf(4, 5)
+void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
                             u32 line, const char *, ...);
-extern void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(5, 6)
+void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
                           u32 info, const char *fmt, ...);
  
  /* GRC Dump related defines */
diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c

index 868d423380d120ca82c135a8e545fbd98e1ba935..e10b91cc3c62388ccee0db756f1371d7c8037ff9 100644 (file)
--- a/drivers/scsi/qedf/qedf_fip.c
+++ b/drivers/scsi/qedf/qedf_fip.c
@@ -99,7 +99,8 @@ static void qedf_fcoe_process_vlan_resp(struct qedf_ctx *qedf,
                 qedf_set_vlan_id(qedf, vid);
  
                 /* Inform waiter that it's ok to call fcoe_ctlr_link up() */
-               complete(&qedf->fipvlan_compl);
+               if (!completion_done(&qedf->fipvlan_compl))
+                       complete(&qedf->fipvlan_compl);
         }
  }
  
@@ -203,7 +204,7 @@ void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb)
                         case FIP_DT_MAC:
                                 mp = (struct fip_mac_desc *)desc;
                                 QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
-                                   "fd_mac=%pM.\n", __func__, mp->fd_mac);
+                                   "fd_mac=%pM\n", mp->fd_mac);
                                 ether_addr_copy(cvl_mac, mp->fd_mac);
                                 break;
                         case FIP_DT_NAME:
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c

index ee0dcf9d3aba7847eaa673ab3542a1c0d6d375c3..46debe5034af102710a574a80254433b94265271 100644 (file)
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -1342,7 +1342,7 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
                 } else {
                         refcount = kref_read(&io_req->refcount);
                         QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
-                           "%d:0:%d:%d xid=0x%0x op=0x%02x "
+                           "%d:0:%d:%lld xid=0x%0x op=0x%02x "
                             "lba=%02x%02x%02x%02x cdb_status=%d "
                             "fcp_resid=0x%x refcount=%d.\n",
                             qedf->lport->host->host_no, sc_cmd->device->id,
@@ -1426,7 +1426,7 @@ void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
  
         sc_cmd->result = result << 16;
         refcount = kref_read(&io_req->refcount);
-       QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing "
+       QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%lld: Completing "
             "sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, "
             "allowed=%d retries=%d refcount=%d.\n",
             qedf->lport->host->host_no, sc_cmd->device->id,
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c

index d9d7a86b5f8baf13038cfc1bd17885945d1fdf4e..cceddd995a4bf46605ae94143cfcff9df693fb83 100644 (file)
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2456,8 +2456,8 @@ static int qedf_alloc_bdq(struct qedf_ctx *qedf)
         }
  
         QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
-           "BDQ PBL addr=0x%p dma=0x%llx.\n", qedf->bdq_pbl,
-           qedf->bdq_pbl_dma);
+                 "BDQ PBL addr=0x%p dma=%pad\n",
+                 qedf->bdq_pbl, &qedf->bdq_pbl_dma);
  
         /*
          * Populate BDQ PBL with physical and virtual address of individual
@@ -2803,6 +2803,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
                 atomic_set(&qedf->num_offloads, 0);
                 qedf->stop_io_on_error = false;
                 pci_set_drvdata(pdev, qedf);
+               init_completion(&qedf->fipvlan_compl);
  
                 QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO,
                    "QLogic FastLinQ FCoE Module qedf %s, "
diff --git a/drivers/scsi/qedi/qedi_debugfs.c b/drivers/scsi/qedi/qedi_debugfs.c

index 955936274241406e2c8df92a7f3a5fcd530d7f05..59417199bf363ae956bb2832f10fe22307a978d1 100644 (file)
--- a/drivers/scsi/qedi/qedi_debugfs.c
+++ b/drivers/scsi/qedi/qedi_debugfs.c
@@ -14,7 +14,7 @@
  #include <linux/debugfs.h>
  #include <linux/module.h>
  
-int do_not_recover;
+int qedi_do_not_recover;
  static struct dentry *qedi_dbg_root;
  
  void
@@ -74,22 +74,22 @@ qedi_dbg_exit(void)
  static ssize_t
  qedi_dbg_do_not_recover_enable(struct qedi_dbg_ctx *qedi_dbg)
  {
-       if (!do_not_recover)
-               do_not_recover = 1;
+       if (!qedi_do_not_recover)
+               qedi_do_not_recover = 1;
  
         QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
-                 do_not_recover);
+                 qedi_do_not_recover);
         return 0;
  }
  
  static ssize_t
  qedi_dbg_do_not_recover_disable(struct qedi_dbg_ctx *qedi_dbg)
  {
-       if (do_not_recover)
-               do_not_recover = 0;
+       if (qedi_do_not_recover)
+               qedi_do_not_recover = 0;
  
         QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
-                 do_not_recover);
+                 qedi_do_not_recover);
         return 0;
  }
  
@@ -141,7 +141,7 @@ qedi_dbg_do_not_recover_cmd_read(struct file *filp, char __user *buffer,
         if (*ppos)
                 return 0;
  
-       cnt = sprintf(buffer, "do_not_recover=%d\n", do_not_recover);
+       cnt = sprintf(buffer, "do_not_recover=%d\n", qedi_do_not_recover);
         cnt = min_t(int, count, cnt - *ppos);
         *ppos += cnt;
         return cnt;
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c

index c9f0ef4e11b33ce9ca2a707645b1b2088b6a1f63..2bce3efc66a4b4bda8bae40768ca3e961a6d33b0 100644 (file)
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -1461,9 +1461,9 @@ static void qedi_tmf_work(struct work_struct *work)
                   get_itt(tmf_hdr->rtt), get_itt(ctask->itt), cmd->task_id,
                   qedi_conn->iscsi_conn_id);
  
-       if (do_not_recover) {
+       if (qedi_do_not_recover) {
                 QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n",
-                        do_not_recover);
+                        qedi_do_not_recover);
                 goto abort_ret;
         }
  
diff --git a/drivers/scsi/qedi/qedi_gbl.h b/drivers/scsi/qedi/qedi_gbl.h

index 8e488de88ece9fb8fc49b991935cf5c24a87b5b4..63d793f460645d44c4ae29638145cbcbcbc9be78 100644 (file)
--- a/drivers/scsi/qedi/qedi_gbl.h
+++ b/drivers/scsi/qedi/qedi_gbl.h
@@ -12,8 +12,14 @@
  
  #include "qedi_iscsi.h"
  
+#ifdef CONFIG_DEBUG_FS
+extern int qedi_do_not_recover;
+#else
+#define qedi_do_not_recover (0)
+#endif
+
  extern uint qedi_io_tracing;
-extern int do_not_recover;
+
  extern struct scsi_host_template qedi_host_template;
  extern struct iscsi_transport qedi_iscsi_transport;
  extern const struct qed_iscsi_ops *qedi_ops;
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c

index b9f79d36142d5e85182d39d16ba34a7a1a6f6521..4cc474364c50568806b16520ddd66239b9f3ebfd 100644 (file)
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -833,7 +833,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
                 return ERR_PTR(ret);
         }
  
-       if (do_not_recover) {
+       if (qedi_do_not_recover) {
                 ret = -ENOMEM;
                 return ERR_PTR(ret);
         }
@@ -957,7 +957,7 @@ static int qedi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
         struct qedi_endpoint *qedi_ep;
         int ret = 0;
  
-       if (do_not_recover)
+       if (qedi_do_not_recover)
                 return 1;
  
         qedi_ep = ep->dd_data;
@@ -1025,7 +1025,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
                 }
  
                 if (test_bit(QEDI_IN_RECOVERY, &qedi->flags)) {
-                       if (do_not_recover) {
+                       if (qedi_do_not_recover) {
                                 QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
                                           "Do not recover cid=0x%x\n",
                                           qedi_ep->iscsi_cid);
@@ -1039,7 +1039,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
                 }
         }
  
-       if (do_not_recover)
+       if (qedi_do_not_recover)
                 goto ep_exit_recover;
  
         switch (qedi_ep->state) {
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c

index 5eda21d903e93dfc96702552d2de9deb7f01c734..92775a8b74b1cdc068b8d8808b7bd9ccd5b212eb 100644 (file)
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1805,7 +1805,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
          */
         qedi_ops->common->update_pf_params(qedi->cdev, &qedi->pf_params);
  
-       qedi_setup_int(qedi);
+       rc = qedi_setup_int(qedi);
         if (rc)
                 goto stop_iscsi_func;
  
@@ -2007,6 +2007,7 @@ static void qedi_remove(struct pci_dev *pdev)
  
  static struct pci_device_id qedi_pci_tbl[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, 0x165E) },
+       { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, 0x8084) },
         { 0 },
  };
  MODULE_DEVICE_TABLE(pci, qedi_pci_tbl);
diff --git a/drivers/scsi/qla2xxx/Kconfig b/drivers/scsi/qla2xxx/Kconfig

index 67c0d5aa32125ca135ccb6cc2bd83af76b0ffd1b..de952935b5d2ca572d618e2a8802a1e035c0fbdb 100644 (file)
--- a/drivers/scsi/qla2xxx/Kconfig
+++ b/drivers/scsi/qla2xxx/Kconfig
@@ -3,6 +3,7 @@ config SCSI_QLA_FC
         depends on PCI && SCSI
         depends on SCSI_FC_ATTRS
         select FW_LOADER
+       select BTREE
         ---help---
         This qla2xxx driver supports all QLogic Fibre Channel
         PCI and PCIe host adapters.
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c

index f610103994afd4c53cbf439db646eb5b44851689..435ff7fd6384a0a4e941efb3d60411e0731d4c1b 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2154,8 +2154,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
                     "Timer for the VP[%d] has stopped\n", vha->vp_idx);
         }
  
-       BUG_ON(atomic_read(&vha->vref_count));
-
         qla2x00_free_fcports(vha);
  
         mutex_lock(&ha->vport_lock);
@@ -2166,7 +2164,7 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
         dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l,
             vha->gnl.ldma);
  
-       if (vha->qpair->vp_idx == vha->vp_idx) {
+       if (vha->qpair && vha->qpair->vp_idx == vha->vp_idx) {
                 if (qla2xxx_delete_qpair(vha, vha->qpair) != QLA_SUCCESS)
                         ql_log(ql_log_warn, vha, 0x7087,
                             "Queue Pair delete failed.\n");
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c

index 84c9098cc089fc599774dfc82dc029895d229060..b6e40fd4c3c1aba6b00c0e6587fea27cff215393 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -2553,13 +2553,13 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
                                                 ql_log(ql_log_warn, vha, 0x7089,
                                                     "mbx abort_command "
                                                     "failed.\n");
-                                               bsg_job->req->errors =
+                                               scsi_req(bsg_job->req)->result =
                                                 bsg_reply->result = -EIO;
                                         } else {
                                                 ql_dbg(ql_dbg_user, vha, 0x708a,
                                                     "mbx abort_command "
                                                     "success.\n");
-                                               bsg_job->req->errors =
+                                               scsi_req(bsg_job->req)->result =
                                                 bsg_reply->result = 0;
                                         }
                                         spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -2570,7 +2570,7 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
         }
         spin_unlock_irqrestore(&ha->hardware_lock, flags);
         ql_log(ql_log_info, vha, 0x708b, "SRB not found to abort.\n");
-       bsg_job->req->errors = bsg_reply->result = -ENXIO;
+       scsi_req(bsg_job->req)->result = bsg_reply->result = -ENXIO;
         return 0;
  
  done:
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c

index 21d9fb7fc88796cbaa09fbfa160b9b20c17e2015..51b4179469d1851be96872ee39b24739fc34135e 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -2707,13 +2707,9 @@ ql_dump_buffer(uint32_t level, scsi_qla_host_t *vha, int32_t id,
             "%-+5d  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F\n", size);
         ql_dbg(level, vha, id,
             "----- -----------------------------------------------\n");
-       for (cnt = 0; cnt < size; cnt++, buf++) {
-               if (cnt % 16 == 0)
-                       ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU);
-               printk(" %02x", *buf);
-               if (cnt % 16 == 15)
-                       printk("\n");
+       for (cnt = 0; cnt < size; cnt += 16) {
+               ql_dbg(level, vha, id, "%04x: ", cnt);
+               print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1,
+                              buf + cnt, min(16U, size - cnt), false);
         }
-       if (cnt % 16 != 0)
-               printk("\n");
  }
diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h

index e1fc4e66966aeab7b64bfd4ca9c75ca4da1a5be5..c6bffe929fe7dc54b83ac8d89087b4b0d7e0efca 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -348,6 +348,7 @@ ql_log_pci(uint32_t, struct pci_dev *pdev, int32_t, const char *fmt, ...);
  #define ql_dbg_tgt     0x00004000 /* Target mode */
  #define ql_dbg_tgt_mgt 0x00002000 /* Target mode management */
  #define ql_dbg_tgt_tmr 0x00001000 /* Target mode task management */
+#define ql_dbg_tgt_dif  0x00000800 /* Target mode dif */
  
  extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *,
         uint32_t, void **);
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h

index 625d438e3cce01e39a57bfdd3d581ac24e6a5c55..ae119018dfaae9fe65c5cfe1869cdc655b27a3ea 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -25,6 +25,7 @@
  #include <linux/firmware.h>
  #include <linux/aer.h>
  #include <linux/mutex.h>
+#include <linux/btree.h>
  
  #include <scsi/scsi.h>
  #include <scsi/scsi_host.h>
@@ -395,11 +396,15 @@ struct srb_iocb {
                         struct completion comp;
                 } abt;
                 struct ct_arg ctarg;
+#define MAX_IOCB_MB_REG 28
+#define SIZEOF_IOCB_MB_REG (MAX_IOCB_MB_REG * sizeof(uint16_t))
                 struct {
-                       __le16 in_mb[28];       /* fr fw */
-                       __le16 out_mb[28];      /* to fw */
+                       __le16 in_mb[MAX_IOCB_MB_REG];  /* from FW */
+                       __le16 out_mb[MAX_IOCB_MB_REG]; /* to FW */
                         void *out, *in;
                         dma_addr_t out_dma, in_dma;
+                       struct completion comp;
+                       int rc;
                 } mbx;
                 struct {
                         struct imm_ntfy_from_isp *ntfy;
@@ -437,7 +442,7 @@ typedef struct srb {
         uint32_t handle;
         uint16_t flags;
         uint16_t type;
-       char *name;
+       const char *name;
         int iocbs;
         struct qla_qpair *qpair;
         u32 gen1;       /* scratch */
@@ -2300,6 +2305,8 @@ typedef struct fc_port {
         struct ct_sns_desc ct_desc;
         enum discovery_state disc_state;
         enum login_state fw_login_state;
+       unsigned long plogi_nack_done_deadline;
+
         u32 login_gen, last_login_gen;
         u32 rscn_gen, last_rscn_gen;
         u32 chip_reset;
@@ -3106,6 +3113,16 @@ struct qla_chip_state_84xx {
         uint32_t gold_fw_version;
  };
  
+struct qla_dif_statistics {
+       uint64_t dif_input_bytes;
+       uint64_t dif_output_bytes;
+       uint64_t dif_input_requests;
+       uint64_t dif_output_requests;
+       uint32_t dif_guard_err;
+       uint32_t dif_ref_tag_err;
+       uint32_t dif_app_tag_err;
+};
+
  struct qla_statistics {
         uint32_t total_isp_aborts;
         uint64_t input_bytes;
@@ -3118,6 +3135,8 @@ struct qla_statistics {
         uint32_t stat_max_pend_cmds;
         uint32_t stat_max_qfull_cmds_alloc;
         uint32_t stat_max_qfull_cmds_dropped;
+
+       struct qla_dif_statistics qla_dif_stats;
  };
  
  struct bidi_statistics {
@@ -3125,6 +3144,16 @@ struct bidi_statistics {
         unsigned long long transfer_bytes;
  };
  
+struct qla_tc_param {
+       struct scsi_qla_host *vha;
+       uint32_t blk_sz;
+       uint32_t bufflen;
+       struct scatterlist *sg;
+       struct scatterlist *prot_sg;
+       struct crc_context *ctx;
+       uint8_t *ctx_dsd_alloced;
+};
+
  /* Multi queue support */
  #define MBC_INITIALIZE_MULTIQ 0x1f
  #define QLA_QUE_PAGE 0X1000
@@ -3272,6 +3301,8 @@ struct qlt_hw_data {
         uint8_t tgt_node_name[WWN_SIZE];
  
         struct dentry *dfs_tgt_sess;
+       struct dentry *dfs_tgt_port_database;
+
         struct list_head q_full_list;
         uint32_t num_pend_cmds;
         uint32_t num_qfull_cmds_alloc;
@@ -3281,6 +3312,7 @@ struct qlt_hw_data {
         spinlock_t sess_lock;
         int rspq_vector_cpuid;
         spinlock_t atio_lock ____cacheline_aligned;
+       struct btree_head32 host_map;
  };
  
  #define MAX_QFULL_CMDS_ALLOC   8192
@@ -3290,6 +3322,10 @@ struct qlt_hw_data {
  
  #define LEAK_EXCHG_THRESH_HOLD_PERCENT 75      /* 75 percent */
  
+#define QLA_EARLY_LINKUP(_ha) \
+       ((_ha->flags.n2n_ae || _ha->flags.lip_ae) && \
+        _ha->flags.fw_started && !_ha->flags.fw_init_done)
+
  /*
   * Qlogic host adapter specific data structure.
  */
@@ -3339,7 +3375,11 @@ struct qla_hw_data {
                 uint32_t        fawwpn_enabled:1;
                 uint32_t        exlogins_enabled:1;
                 uint32_t        exchoffld_enabled:1;
-               /* 35 bits */
+
+               uint32_t        lip_ae:1;
+               uint32_t        n2n_ae:1;
+               uint32_t        fw_started:1;
+               uint32_t        fw_init_done:1;
         } flags;
  
         /* This spinlock is used to protect "io transactions", you must
@@ -3432,7 +3472,6 @@ struct qla_hw_data {
  #define P2P_LOOP  3
         uint8_t         interrupts_on;
         uint32_t        isp_abort_cnt;
-
  #define PCI_DEVICE_ID_QLOGIC_ISP2532    0x2532
  #define PCI_DEVICE_ID_QLOGIC_ISP8432    0x8432
  #define PCI_DEVICE_ID_QLOGIC_ISP8001   0x8001
@@ -3913,6 +3952,7 @@ typedef struct scsi_qla_host {
         struct list_head vp_fcports;    /* list of fcports */
         struct list_head work_list;
         spinlock_t work_lock;
+       struct work_struct iocb_work;
  
         /* Commonly used flags and state information. */
         struct Scsi_Host *host;
@@ -4076,6 +4116,7 @@ typedef struct scsi_qla_host {
         /* Count of active session/fcport */
         int fcport_count;
         wait_queue_head_t fcport_waitQ;
+       wait_queue_head_t vref_waitq;
  } scsi_qla_host_t;
  
  struct qla27xx_image_status {
@@ -4131,14 +4172,17 @@ struct qla2_sgx {
         mb();                                           \
         if (__vha->flags.delete_progress) {             \
                 atomic_dec(&__vha->vref_count);         \
+               wake_up(&__vha->vref_waitq);            \
                 __bail = 1;                             \
         } else {                                        \
                 __bail = 0;                             \
         }                                               \
  } while (0)
  
-#define QLA_VHA_MARK_NOT_BUSY(__vha)                   \
+#define QLA_VHA_MARK_NOT_BUSY(__vha) do {              \
         atomic_dec(&__vha->vref_count);                 \
+       wake_up(&__vha->vref_waitq);                    \
+} while (0)                                            \
  
  #define QLA_QPAIR_MARK_BUSY(__qpair, __bail) do {      \
         atomic_inc(&__qpair->ref_count);                \
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c

index b48cce696bac77e44f7c7579fd1829a40391da2d..989e17b0758cd51ec029204c48eddf37c55c180a 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -19,11 +19,11 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
         struct qla_hw_data *ha = vha->hw;
         unsigned long flags;
         struct fc_port *sess = NULL;
-       struct qla_tgt *tgt= vha->vha_tgt.qla_tgt;
+       struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
  
-       seq_printf(s, "%s\n",vha->host_str);
+       seq_printf(s, "%s\n", vha->host_str);
         if (tgt) {
-               seq_printf(s, "Port ID   Port Name                Handle\n");
+               seq_puts(s, "Port ID   Port Name                Handle\n");
  
                 spin_lock_irqsave(&ha->tgt.sess_lock, flags);
                 list_for_each_entry(sess, &vha->vp_fcports, list)
@@ -44,7 +44,6 @@ qla2x00_dfs_tgt_sess_open(struct inode *inode, struct file *file)
         return single_open(file, qla2x00_dfs_tgt_sess_show, vha);
  }
  
-
  static const struct file_operations dfs_tgt_sess_ops = {
         .open           = qla2x00_dfs_tgt_sess_open,
         .read           = seq_read,
@@ -52,6 +51,78 @@ static const struct file_operations dfs_tgt_sess_ops = {
         .release        = single_release,
  };
  
+static int
+qla2x00_dfs_tgt_port_database_show(struct seq_file *s, void *unused)
+{
+       scsi_qla_host_t *vha = s->private;
+       struct qla_hw_data *ha = vha->hw;
+       struct gid_list_info *gid_list;
+       dma_addr_t gid_list_dma;
+       fc_port_t fc_port;
+       char *id_iter;
+       int rc, i;
+       uint16_t entries, loop_id;
+       struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+
+       seq_printf(s, "%s\n", vha->host_str);
+       if (tgt) {
+               gid_list = dma_alloc_coherent(&ha->pdev->dev,
+                   qla2x00_gid_list_size(ha),
+                   &gid_list_dma, GFP_KERNEL);
+               if (!gid_list) {
+                       ql_dbg(ql_dbg_user, vha, 0x705c,
+                           "DMA allocation failed for %u\n",
+                            qla2x00_gid_list_size(ha));
+                       return 0;
+               }
+
+               rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma,
+                   &entries);
+               if (rc != QLA_SUCCESS)
+                       goto out_free_id_list;
+
+               id_iter = (char *)gid_list;
+
+               seq_puts(s, "Port Name  Port ID         Loop ID\n");
+
+               for (i = 0; i < entries; i++) {
+                       struct gid_list_info *gid =
+                           (struct gid_list_info *)id_iter;
+                       loop_id = le16_to_cpu(gid->loop_id);
+                       memset(&fc_port, 0, sizeof(fc_port_t));
+
+                       fc_port.loop_id = loop_id;
+
+                       rc = qla24xx_gpdb_wait(vha, &fc_port, 0);
+                       seq_printf(s, "%8phC  %02x%02x%02x  %d\n",
+                               fc_port.port_name, fc_port.d_id.b.domain,
+                               fc_port.d_id.b.area, fc_port.d_id.b.al_pa,
+                               fc_port.loop_id);
+                       id_iter += ha->gid_list_info_size;
+               }
+out_free_id_list:
+               dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha),
+                   gid_list, gid_list_dma);
+       }
+
+       return 0;
+}
+
+static int
+qla2x00_dfs_tgt_port_database_open(struct inode *inode, struct file *file)
+{
+       scsi_qla_host_t *vha = inode->i_private;
+
+       return single_open(file, qla2x00_dfs_tgt_port_database_show, vha);
+}
+
+static const struct file_operations dfs_tgt_port_database_ops = {
+       .open           = qla2x00_dfs_tgt_port_database_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
  static int
  qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused)
  {
@@ -114,6 +185,21 @@ qla_dfs_tgt_counters_show(struct seq_file *s, void *unused)
         seq_printf(s, "num Q full sent = %lld\n",
                 vha->tgt_counters.num_q_full_sent);
  
+       /* DIF stats */
+       seq_printf(s, "DIF Inp Bytes = %lld\n",
+               vha->qla_stats.qla_dif_stats.dif_input_bytes);
+       seq_printf(s, "DIF Outp Bytes = %lld\n",
+               vha->qla_stats.qla_dif_stats.dif_output_bytes);
+       seq_printf(s, "DIF Inp Req = %lld\n",
+               vha->qla_stats.qla_dif_stats.dif_input_requests);
+       seq_printf(s, "DIF Outp Req = %lld\n",
+               vha->qla_stats.qla_dif_stats.dif_output_requests);
+       seq_printf(s, "DIF Guard err = %d\n",
+               vha->qla_stats.qla_dif_stats.dif_guard_err);
+       seq_printf(s, "DIF Ref tag err = %d\n",
+               vha->qla_stats.qla_dif_stats.dif_ref_tag_err);
+       seq_printf(s, "DIF App tag err = %d\n",
+               vha->qla_stats.qla_dif_stats.dif_app_tag_err);
         return 0;
  }
  
@@ -281,6 +367,14 @@ create_nodes:
                 goto out;
         }
  
+       ha->tgt.dfs_tgt_port_database = debugfs_create_file("tgt_port_database",
+           S_IRUSR,  ha->dfs_dir, vha, &dfs_tgt_port_database_ops);
+       if (!ha->tgt.dfs_tgt_port_database) {
+               ql_log(ql_log_warn, vha, 0xffff,
+                   "Unable to create debugFS tgt_port_database node.\n");
+               goto out;
+       }
+
         ha->dfs_fce = debugfs_create_file("fce", S_IRUSR, ha->dfs_dir, vha,
             &dfs_fce_ops);
         if (!ha->dfs_fce) {
@@ -311,6 +405,11 @@ qla2x00_dfs_remove(scsi_qla_host_t *vha)
                 ha->tgt.dfs_tgt_sess = NULL;
         }
  
+       if (ha->tgt.dfs_tgt_port_database) {
+               debugfs_remove(ha->tgt.dfs_tgt_port_database);
+               ha->tgt.dfs_tgt_port_database = NULL;
+       }
+
         if (ha->dfs_fw_resource_cnt) {
                 debugfs_remove(ha->dfs_fw_resource_cnt);
                 ha->dfs_fw_resource_cnt = NULL;
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h

index b3d6441d1d90eb27f1908fa27ea1ec28f024b1d9..5b2451745e9f471988e8685d68f3423ec5d5811f 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -193,6 +193,7 @@ extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *);
  void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
         uint16_t *);
  int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *);
+int qla24xx_async_abort_cmd(srb_t *);
  
  /*
   * Global Functions in qla_mid.c source file.
@@ -256,11 +257,11 @@ extern unsigned long qla2x00_get_async_timeout(struct scsi_qla_host *);
  extern void *qla2x00_alloc_iocbs(scsi_qla_host_t *, srb_t *);
  extern int qla2x00_issue_marker(scsi_qla_host_t *, int);
  extern int qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *, srb_t *,
-       uint32_t *, uint16_t, struct qla_tgt_cmd *);
+       uint32_t *, uint16_t, struct qla_tc_param *);
  extern int qla24xx_walk_and_build_sglist(struct qla_hw_data *, srb_t *,
-       uint32_t *, uint16_t, struct qla_tgt_cmd *);
+       uint32_t *, uint16_t, struct qla_tc_param *);
  extern int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *, srb_t *,
-       uint32_t *, uint16_t, struct qla_tgt_cmd *);
+       uint32_t *, uint16_t, struct qla_tc_param *);
  extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *);
  extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *);
  extern int qla24xx_build_scsi_crc_2_iocbs(srb_t *,
@@ -368,7 +369,7 @@ qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *,
  
  extern int
  qla24xx_get_isp_stats(scsi_qla_host_t *, struct link_statistics *,
-    dma_addr_t, uint);
+    dma_addr_t, uint16_t);
  
  extern int qla24xx_abort_command(srb_t *);
  extern int qla24xx_async_abort_command(srb_t *);
@@ -472,6 +473,13 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *, dma_addr_t, uint32_t, uint32_t);
  extern int
  qla26xx_dport_diagnostics(scsi_qla_host_t *, void *, uint, uint);
  
+int qla24xx_send_mb_cmd(struct scsi_qla_host *, mbx_cmd_t *);
+int qla24xx_gpdb_wait(struct scsi_qla_host *, fc_port_t *, u8);
+int qla24xx_gidlist_wait(struct scsi_qla_host *, void *, dma_addr_t,
+    uint16_t *);
+int __qla24xx_parse_gpdb(struct scsi_qla_host *, fc_port_t *,
+       struct port_database_24xx *);
+
  /*
   * Global Function Prototypes in qla_isr.c source file.
   */
@@ -846,5 +854,7 @@ extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *,
         uint64_t wwn, port_id_t port_id, uint16_t loop_id, struct fc_port **);
  void qla24xx_delete_sess_fn(struct work_struct *);
  void qlt_unknown_atio_work_fn(struct work_struct *);
+void qlt_update_host_map(struct scsi_qla_host *, port_id_t);
+void qlt_remove_target_resources(struct qla_hw_data *);
  
  #endif /* _QLA_GBL_H */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c

index 32fb9007f13770e4cd43650521b67e991a66d3e9..f9d2fe7b1adedf9349c11b7bfaf389c223a21ba8 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -629,7 +629,6 @@ void qla24xx_async_gpdb_sp_done(void *s, int res)
         struct srb *sp = s;
         struct scsi_qla_host *vha = sp->vha;
         struct qla_hw_data *ha = vha->hw;
-       uint64_t zero = 0;
         struct port_database_24xx *pd;
         fc_port_t *fcport = sp->fcport;
         u16 *mb = sp->u.iocb_cmd.u.mbx.in_mb;
@@ -649,48 +648,7 @@ void qla24xx_async_gpdb_sp_done(void *s, int res)
  
         pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in;
  
-       /* Check for logged in state. */
-       if (pd->current_login_state != PDS_PRLI_COMPLETE &&
-           pd->last_login_state != PDS_PRLI_COMPLETE) {
-               ql_dbg(ql_dbg_mbx, vha, 0xffff,
-                   "Unable to verify login-state (%x/%x) for "
-                   "loop_id %x.\n", pd->current_login_state,
-                   pd->last_login_state, fcport->loop_id);
-               rval = QLA_FUNCTION_FAILED;
-               goto gpd_error_out;
-       }
-
-       if (fcport->loop_id == FC_NO_LOOP_ID ||
-           (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
-               memcmp(fcport->port_name, pd->port_name, 8))) {
-               /* We lost the device mid way. */
-               rval = QLA_NOT_LOGGED_IN;
-               goto gpd_error_out;
-       }
-
-       /* Names are little-endian. */
-       memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
-
-       /* Get port_id of device. */
-       fcport->d_id.b.domain = pd->port_id[0];
-       fcport->d_id.b.area = pd->port_id[1];
-       fcport->d_id.b.al_pa = pd->port_id[2];
-       fcport->d_id.b.rsvd_1 = 0;
-
-       /* If not target must be initiator or unknown type. */
-       if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
-               fcport->port_type = FCT_INITIATOR;
-       else
-               fcport->port_type = FCT_TARGET;
-
-       /* Passback COS information. */
-       fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
-               FC_COS_CLASS2 : FC_COS_CLASS3;
-
-       if (pd->prli_svc_param_word_3[0] & BIT_7) {
-               fcport->flags |= FCF_CONF_COMP_SUPPORTED;
-               fcport->conf_compl_supported = 1;
-       }
+       rval = __qla24xx_parse_gpdb(vha, fcport, pd);
  
  gpd_error_out:
         memset(&ea, 0, sizeof(ea));
@@ -876,10 +834,14 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
         fcport->login_retry--;
  
         if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
-           (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
             (fcport->fw_login_state == DSC_LS_PRLI_PEND))
                 return 0;
  
+       if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+               if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline))
+                       return 0;
+       }
+
         /* for pure Target Mode. Login will not be initiated */
         if (vha->host->active_mode == MODE_TARGET)
                 return 0;
@@ -1041,10 +1003,14 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
                 fcport->flags);
  
         if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
-           (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
             (fcport->fw_login_state == DSC_LS_PRLI_PEND))
                 return;
  
+       if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+               if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline))
+                       return;
+       }
+
         if (fcport->flags & FCF_ASYNC_SENT) {
                 fcport->login_retry++;
                 set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
@@ -1258,7 +1224,7 @@ qla24xx_abort_sp_done(void *ptr, int res)
         complete(&abt->u.abt.comp);
  }
  
-static int
+int
  qla24xx_async_abort_cmd(srb_t *cmd_sp)
  {
         scsi_qla_host_t *vha = cmd_sp->vha;
@@ -3212,6 +3178,7 @@ next_check:
         } else {
                 ql_dbg(ql_dbg_init, vha, 0x00d3,
                     "Init Firmware -- success.\n");
+               ha->flags.fw_started = 1;
         }
  
         return (rval);
@@ -3374,8 +3341,8 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
         uint8_t       domain;
         char            connect_type[22];
         struct qla_hw_data *ha = vha->hw;
-       unsigned long flags;
         scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
+       port_id_t id;
  
         /* Get host addresses. */
         rval = qla2x00_get_adapter_id(vha,
@@ -3453,13 +3420,11 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
  
         /* Save Host port and loop ID. */
         /* byte order - Big Endian */
-       vha->d_id.b.domain = domain;
-       vha->d_id.b.area = area;
-       vha->d_id.b.al_pa = al_pa;
-
-       spin_lock_irqsave(&ha->vport_slock, flags);
-       qlt_update_vp_map(vha, SET_AL_PA);
-       spin_unlock_irqrestore(&ha->vport_slock, flags);
+       id.b.domain = domain;
+       id.b.area = area;
+       id.b.al_pa = al_pa;
+       id.b.rsvd_1 = 0;
+       qlt_update_host_map(vha, id);
  
         if (!vha->flags.init_done)
                 ql_log(ql_log_info, vha, 0x2010,
@@ -4036,6 +4001,7 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
                         atomic_set(&vha->loop_state, LOOP_READY);
                         ql_dbg(ql_dbg_disc, vha, 0x2069,
                             "LOOP READY.\n");
+                       ha->flags.fw_init_done = 1;
  
                         /*
                          * Process any ATIO queue entries that came in
@@ -5148,6 +5114,7 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha)
                         }
                 }
                 atomic_dec(&vha->vref_count);
+               wake_up(&vha->vref_waitq);
         }
         spin_unlock_irqrestore(&ha->vport_slock, flags);
  }
@@ -5526,6 +5493,11 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
         if (!(IS_P3P_TYPE(ha)))
                 ha->isp_ops->reset_chip(vha);
  
+       ha->flags.n2n_ae = 0;
+       ha->flags.lip_ae = 0;
+       ha->current_topology = 0;
+       ha->flags.fw_started = 0;
+       ha->flags.fw_init_done = 0;
         ha->chip_reset++;
  
         atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
@@ -6802,6 +6774,8 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
                 return;
         if (!ha->fw_major_version)
                 return;
+       if (!ha->flags.fw_started)
+               return;
  
         ret = qla2x00_stop_firmware(vha);
         for (retries = 5; ret != QLA_SUCCESS && ret != QLA_FUNCTION_TIMEOUT &&
@@ -6815,6 +6789,9 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
                     "Attempting retry of stop-firmware command.\n");
                 ret = qla2x00_stop_firmware(vha);
         }
+
+       ha->flags.fw_started = 0;
+       ha->flags.fw_init_done = 0;
  }
  
  int
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c

index 535079280288fbd6554a3ca28e620065b8b9fe98..ea027f6a7fd4e949c1a9a53aad0de00b0a7ee361 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -889,7 +889,7 @@ qla24xx_get_one_block_sg(uint32_t blk_sz, struct qla2_sgx *sgx,
  
  int
  qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
-       uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+       uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc)
  {
         void *next_dsd;
         uint8_t avail_dsds = 0;
@@ -898,7 +898,6 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
         struct scatterlist *sg_prot;
         uint32_t *cur_dsd = dsd;
         uint16_t        used_dsds = tot_dsds;
-
         uint32_t        prot_int; /* protection interval */
         uint32_t        partial;
         struct qla2_sgx sgx;
@@ -966,7 +965,7 @@ alloc_and_fill:
                         } else {
                                 list_add_tail(&dsd_ptr->list,
                                     &(tc->ctx->dsd_list));
-                               tc->ctx_dsd_alloced = 1;
+                               *tc->ctx_dsd_alloced = 1;
                         }
  
  
@@ -1005,7 +1004,7 @@ alloc_and_fill:
  
  int
  qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
-       uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+       uint16_t tot_dsds, struct qla_tc_param *tc)
  {
         void *next_dsd;
         uint8_t avail_dsds = 0;
@@ -1066,7 +1065,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
                         } else {
                                 list_add_tail(&dsd_ptr->list,
                                     &(tc->ctx->dsd_list));
-                               tc->ctx_dsd_alloced = 1;
+                               *tc->ctx_dsd_alloced = 1;
                         }
  
                         /* add new list to cmd iocb or last list */
@@ -1092,7 +1091,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
  
  int
  qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
-       uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+       uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc)
  {
         void *next_dsd;
         uint8_t avail_dsds = 0;
@@ -1158,7 +1157,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
                         } else {
                                 list_add_tail(&dsd_ptr->list,
                                     &(tc->ctx->dsd_list));
-                               tc->ctx_dsd_alloced = 1;
+                               *tc->ctx_dsd_alloced = 1;
                         }
  
                         /* add new list to cmd iocb or last list */
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c

index 3c66ea29de2704fcefc71e965c071aa05c7bca78..3203367a4f423608ab69d75882d5a3141a1465a1 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -708,6 +708,8 @@ skip_rio:
                     "mbx7=%xh.\n", mb[1], mb[2], mb[3], mbx);
  
                 ha->isp_ops->fw_dump(vha, 1);
+               ha->flags.fw_init_done = 0;
+               ha->flags.fw_started = 0;
  
                 if (IS_FWI2_CAPABLE(ha)) {
                         if (mb[1] == 0 && mb[2] == 0) {
@@ -761,6 +763,9 @@ skip_rio:
                 break;
  
         case MBA_LIP_OCCURRED:          /* Loop Initialization Procedure */
+               ha->flags.lip_ae = 1;
+               ha->flags.n2n_ae = 0;
+
                 ql_dbg(ql_dbg_async, vha, 0x5009,
                     "LIP occurred (%x).\n", mb[1]);
  
@@ -797,6 +802,10 @@ skip_rio:
                 break;
  
         case MBA_LOOP_DOWN:             /* Loop Down Event */
+               ha->flags.n2n_ae = 0;
+               ha->flags.lip_ae = 0;
+               ha->current_topology = 0;
+
                 mbx = (IS_QLA81XX(ha) || IS_QLA8031(ha))
                         ? RD_REG_WORD(&reg24->mailbox4) : 0;
                 mbx = (IS_P3P_TYPE(ha)) ? RD_REG_WORD(&reg82->mailbox_out[4])
@@ -866,6 +875,9 @@ skip_rio:
  
         /* case MBA_DCBX_COMPLETE: */
         case MBA_POINT_TO_POINT:        /* Point-to-Point */
+               ha->flags.lip_ae = 0;
+               ha->flags.n2n_ae = 1;
+
                 if (IS_QLA2100(ha))
                         break;
  
@@ -1620,9 +1632,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
                 QLA_LOGIO_LOGIN_RETRIED : 0;
         if (logio->entry_status) {
                 ql_log(ql_log_warn, fcport->vha, 0x5034,
-                   "Async-%s error entry - hdl=%x"
+                   "Async-%s error entry - %8phC hdl=%x"
                     "portid=%02x%02x%02x entry-status=%x.\n",
-                   type, sp->handle, fcport->d_id.b.domain,
+                   type, fcport->port_name, sp->handle, fcport->d_id.b.domain,
                     fcport->d_id.b.area, fcport->d_id.b.al_pa,
                     logio->entry_status);
                 ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x504d,
@@ -1633,8 +1645,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
  
         if (le16_to_cpu(logio->comp_status) == CS_COMPLETE) {
                 ql_dbg(ql_dbg_async, fcport->vha, 0x5036,
-                   "Async-%s complete - hdl=%x portid=%02x%02x%02x "
-                   "iop0=%x.\n", type, sp->handle, fcport->d_id.b.domain,
+                   "Async-%s complete - %8phC hdl=%x portid=%02x%02x%02x "
+                   "iop0=%x.\n", type, fcport->port_name, sp->handle,
+                   fcport->d_id.b.domain,
                     fcport->d_id.b.area, fcport->d_id.b.al_pa,
                     le32_to_cpu(logio->io_parameter[0]));
  
@@ -1674,6 +1687,17 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
         case LSC_SCODE_NPORT_USED:
                 data[0] = MBS_LOOP_ID_USED;
                 break;
+       case LSC_SCODE_CMD_FAILED:
+               if (iop[1] == 0x0606) {
+                       /*
+                        * PLOGI/PRLI Completed. We must have Recv PLOGI/PRLI,
+                        * Target side acked.
+                        */
+                       data[0] = MBS_COMMAND_COMPLETE;
+                       goto logio_done;
+               }
+               data[0] = MBS_COMMAND_ERROR;
+               break;
         case LSC_SCODE_NOXCB:
                 vha->hw->exch_starvation++;
                 if (vha->hw->exch_starvation > 5) {
@@ -1695,8 +1719,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
         }
  
         ql_dbg(ql_dbg_async, fcport->vha, 0x5037,
-           "Async-%s failed - hdl=%x portid=%02x%02x%02x comp=%x "
-           "iop0=%x iop1=%x.\n", type, sp->handle, fcport->d_id.b.domain,
+           "Async-%s failed - %8phC hdl=%x portid=%02x%02x%02x comp=%x "
+           "iop0=%x iop1=%x.\n", type, fcport->port_name,
+               sp->handle, fcport->d_id.b.domain,
             fcport->d_id.b.area, fcport->d_id.b.al_pa,
             le16_to_cpu(logio->comp_status),
             le32_to_cpu(logio->io_parameter[0]),
@@ -2679,7 +2704,7 @@ qla24xx_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
                 return;
  
         abt = &sp->u.iocb_cmd;
-       abt->u.abt.comp_status = le32_to_cpu(pkt->nport_handle);
+       abt->u.abt.comp_status = le16_to_cpu(pkt->nport_handle);
         sp->done(sp, 0);
  }
  
@@ -2693,7 +2718,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
         struct sts_entry_24xx *pkt;
         struct qla_hw_data *ha = vha->hw;
  
-       if (!vha->flags.online)
+       if (!ha->flags.fw_started)
                 return;
  
         while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c

index 35079f4174179967d99568a4491713d82d96c7a3..a113ab3592a7f86eb16ce8f76d82337557cab029 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -10,6 +10,28 @@
  #include <linux/delay.h>
  #include <linux/gfp.h>
  
+static struct mb_cmd_name {
+       uint16_t cmd;
+       const char *str;
+} mb_str[] = {
+       {MBC_GET_PORT_DATABASE,         "GPDB"},
+       {MBC_GET_ID_LIST,               "GIDList"},
+       {MBC_GET_LINK_PRIV_STATS,       "Stats"},
+};
+
+static const char *mb_to_str(uint16_t cmd)
+{
+       int i;
+       struct mb_cmd_name *e;
+
+       for (i = 0; i < ARRAY_SIZE(mb_str); i++) {
+               e = mb_str + i;
+               if (cmd == e->cmd)
+                       return e->str;
+       }
+       return "unknown";
+}
+
  static struct rom_cmd {
         uint16_t cmd;
  } rom_cmds[] = {
@@ -2818,7 +2840,7 @@ qla2x00_get_link_status(scsi_qla_host_t *vha, uint16_t loop_id,
  
  int
  qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats,
-    dma_addr_t stats_dma, uint options)
+    dma_addr_t stats_dma, uint16_t options)
  {
         int rval;
         mbx_cmd_t mc;
@@ -2828,19 +2850,17 @@ qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats,
         ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1088,
             "Entered %s.\n", __func__);
  
-       mcp->mb[0] = MBC_GET_LINK_PRIV_STATS;
-       mcp->mb[2] = MSW(stats_dma);
-       mcp->mb[3] = LSW(stats_dma);
-       mcp->mb[6] = MSW(MSD(stats_dma));
-       mcp->mb[7] = LSW(MSD(stats_dma));
-       mcp->mb[8] = sizeof(struct link_statistics) / 4;
-       mcp->mb[9] = vha->vp_idx;
-       mcp->mb[10] = options;
-       mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_0;
-       mcp->in_mb = MBX_2|MBX_1|MBX_0;
-       mcp->tov = MBX_TOV_SECONDS;
-       mcp->flags = IOCTL_CMD;
-       rval = qla2x00_mailbox_command(vha, mcp);
+       memset(&mc, 0, sizeof(mc));
+       mc.mb[0] = MBC_GET_LINK_PRIV_STATS;
+       mc.mb[2] = MSW(stats_dma);
+       mc.mb[3] = LSW(stats_dma);
+       mc.mb[6] = MSW(MSD(stats_dma));
+       mc.mb[7] = LSW(MSD(stats_dma));
+       mc.mb[8] = sizeof(struct link_statistics) / 4;
+       mc.mb[9] = cpu_to_le16(vha->vp_idx);
+       mc.mb[10] = cpu_to_le16(options);
+
+       rval = qla24xx_send_mb_cmd(vha, &mc);
  
         if (rval == QLA_SUCCESS) {
                 if (mcp->mb[0] != MBS_COMMAND_COMPLETE) {
@@ -3603,6 +3623,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
         scsi_qla_host_t *vp = NULL;
         unsigned long   flags;
         int found;
+       port_id_t id;
  
         ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b6,
             "Entered %s.\n", __func__);
@@ -3610,28 +3631,27 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
         if (rptid_entry->entry_status != 0)
                 return;
  
+       id.b.domain = rptid_entry->port_id[2];
+       id.b.area   = rptid_entry->port_id[1];
+       id.b.al_pa  = rptid_entry->port_id[0];
+       id.b.rsvd_1 = 0;
+
         if (rptid_entry->format == 0) {
                 /* loop */
-               ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7,
+               ql_dbg(ql_dbg_async, vha, 0x10b7,
                     "Format 0 : Number of VPs setup %d, number of "
                     "VPs acquired %d.\n", rptid_entry->vp_setup,
                     rptid_entry->vp_acquired);
-               ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b8,
+               ql_dbg(ql_dbg_async, vha, 0x10b8,
                     "Primary port id %02x%02x%02x.\n",
                     rptid_entry->port_id[2], rptid_entry->port_id[1],
                     rptid_entry->port_id[0]);
  
-               vha->d_id.b.domain = rptid_entry->port_id[2];
-               vha->d_id.b.area = rptid_entry->port_id[1];
-               vha->d_id.b.al_pa = rptid_entry->port_id[0];
-
-               spin_lock_irqsave(&ha->vport_slock, flags);
-               qlt_update_vp_map(vha, SET_AL_PA);
-               spin_unlock_irqrestore(&ha->vport_slock, flags);
+               qlt_update_host_map(vha, id);
  
         } else if (rptid_entry->format == 1) {
                 /* fabric */
-               ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b9,
+               ql_dbg(ql_dbg_async, vha, 0x10b9,
                     "Format 1: VP[%d] enabled - status %d - with "
                     "port id %02x%02x%02x.\n", rptid_entry->vp_idx,
                         rptid_entry->vp_status,
@@ -3653,12 +3673,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
                                             WWN_SIZE);
                                 }
  
-                               vha->d_id.b.domain = rptid_entry->port_id[2];
-                               vha->d_id.b.area = rptid_entry->port_id[1];
-                               vha->d_id.b.al_pa = rptid_entry->port_id[0];
-                               spin_lock_irqsave(&ha->vport_slock, flags);
-                               qlt_update_vp_map(vha, SET_AL_PA);
-                               spin_unlock_irqrestore(&ha->vport_slock, flags);
+                               qlt_update_host_map(vha, id);
                         }
  
                         fc_host_port_name(vha->host) =
@@ -3694,12 +3709,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
                         if (!found)
                                 return;
  
-                       vp->d_id.b.domain = rptid_entry->port_id[2];
-                       vp->d_id.b.area =  rptid_entry->port_id[1];
-                       vp->d_id.b.al_pa = rptid_entry->port_id[0];
-                       spin_lock_irqsave(&ha->vport_slock, flags);
-                       qlt_update_vp_map(vp, SET_AL_PA);
-                       spin_unlock_irqrestore(&ha->vport_slock, flags);
+                       qlt_update_host_map(vp, id);
  
                         /*
                          * Cannot configure here as we are still sitting on the
@@ -5827,3 +5837,225 @@ qla26xx_dport_diagnostics(scsi_qla_host_t *vha,
  
         return rval;
  }
+
+static void qla2x00_async_mb_sp_done(void *s, int res)
+{
+       struct srb *sp = s;
+
+       sp->u.iocb_cmd.u.mbx.rc = res;
+
+       complete(&sp->u.iocb_cmd.u.mbx.comp);
+       /* don't free sp here. Let the caller do the free */
+}
+
+/*
+ * This mailbox uses the iocb interface to send MB command.
+ * This allows non-critial (non chip setup) command to go
+ * out in parrallel.
+ */
+int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp)
+{
+       int rval = QLA_FUNCTION_FAILED;
+       srb_t *sp;
+       struct srb_iocb *c;
+
+       if (!vha->hw->flags.fw_started)
+               goto done;
+
+       sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+       if (!sp)
+               goto done;
+
+       sp->type = SRB_MB_IOCB;
+       sp->name = mb_to_str(mcp->mb[0]);
+
+       qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+       memcpy(sp->u.iocb_cmd.u.mbx.out_mb, mcp->mb, SIZEOF_IOCB_MB_REG);
+
+       c = &sp->u.iocb_cmd;
+       c->timeout = qla2x00_async_iocb_timeout;
+       init_completion(&c->u.mbx.comp);
+
+       sp->done = qla2x00_async_mb_sp_done;
+
+       rval = qla2x00_start_sp(sp);
+       if (rval != QLA_SUCCESS) {
+               ql_dbg(ql_dbg_mbx, vha, 0xffff,
+                   "%s: %s Failed submission. %x.\n",
+                   __func__, sp->name, rval);
+               goto done_free_sp;
+       }
+
+       ql_dbg(ql_dbg_mbx, vha, 0xffff, "MB:%s hndl %x submitted\n",
+           sp->name, sp->handle);
+
+       wait_for_completion(&c->u.mbx.comp);
+       memcpy(mcp->mb, sp->u.iocb_cmd.u.mbx.in_mb, SIZEOF_IOCB_MB_REG);
+
+       rval = c->u.mbx.rc;
+       switch (rval) {
+       case QLA_FUNCTION_TIMEOUT:
+               ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Timeout. %x.\n",
+                   __func__, sp->name, rval);
+               break;
+       case  QLA_SUCCESS:
+               ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s done.\n",
+                   __func__, sp->name);
+               sp->free(sp);
+               break;
+       default:
+               ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Failed. %x.\n",
+                   __func__, sp->name, rval);
+               sp->free(sp);
+               break;
+       }
+
+       return rval;
+
+done_free_sp:
+       sp->free(sp);
+done:
+       return rval;
+}
+
+/*
+ * qla24xx_gpdb_wait
+ * NOTE: Do not call this routine from DPC thread
+ */
+int qla24xx_gpdb_wait(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
+{
+       int rval = QLA_FUNCTION_FAILED;
+       dma_addr_t pd_dma;
+       struct port_database_24xx *pd;
+       struct qla_hw_data *ha = vha->hw;
+       mbx_cmd_t mc;
+
+       if (!vha->hw->flags.fw_started)
+               goto done;
+
+       pd = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma);
+       if (pd  == NULL) {
+               ql_log(ql_log_warn, vha, 0xffff,
+                       "Failed to allocate port database structure.\n");
+               goto done_free_sp;
+       }
+       memset(pd, 0, max(PORT_DATABASE_SIZE, PORT_DATABASE_24XX_SIZE));
+
+       memset(&mc, 0, sizeof(mc));
+       mc.mb[0] = MBC_GET_PORT_DATABASE;
+       mc.mb[1] = cpu_to_le16(fcport->loop_id);
+       mc.mb[2] = MSW(pd_dma);
+       mc.mb[3] = LSW(pd_dma);
+       mc.mb[6] = MSW(MSD(pd_dma));
+       mc.mb[7] = LSW(MSD(pd_dma));
+       mc.mb[9] = cpu_to_le16(vha->vp_idx);
+       mc.mb[10] = cpu_to_le16((uint16_t)opt);
+
+       rval = qla24xx_send_mb_cmd(vha, &mc);
+       if (rval != QLA_SUCCESS) {
+               ql_dbg(ql_dbg_mbx, vha, 0xffff,
+                   "%s: %8phC fail\n", __func__, fcport->port_name);
+               goto done_free_sp;
+       }
+
+       rval = __qla24xx_parse_gpdb(vha, fcport, pd);
+
+       ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %8phC done\n",
+           __func__, fcport->port_name);
+
+done_free_sp:
+       if (pd)
+               dma_pool_free(ha->s_dma_pool, pd, pd_dma);
+done:
+       return rval;
+}
+
+int __qla24xx_parse_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport,
+    struct port_database_24xx *pd)
+{
+       int rval = QLA_SUCCESS;
+       uint64_t zero = 0;
+
+       /* Check for logged in state. */
+       if (pd->current_login_state != PDS_PRLI_COMPLETE &&
+               pd->last_login_state != PDS_PRLI_COMPLETE) {
+               ql_dbg(ql_dbg_mbx, vha, 0xffff,
+                          "Unable to verify login-state (%x/%x) for "
+                          "loop_id %x.\n", pd->current_login_state,
+                          pd->last_login_state, fcport->loop_id);
+               rval = QLA_FUNCTION_FAILED;
+               goto gpd_error_out;
+       }
+
+       if (fcport->loop_id == FC_NO_LOOP_ID ||
+           (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
+            memcmp(fcport->port_name, pd->port_name, 8))) {
+               /* We lost the device mid way. */
+               rval = QLA_NOT_LOGGED_IN;
+               goto gpd_error_out;
+       }
+
+       /* Names are little-endian. */
+       memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
+       memcpy(fcport->port_name, pd->port_name, WWN_SIZE);
+
+       /* Get port_id of device. */
+       fcport->d_id.b.domain = pd->port_id[0];
+       fcport->d_id.b.area = pd->port_id[1];
+       fcport->d_id.b.al_pa = pd->port_id[2];
+       fcport->d_id.b.rsvd_1 = 0;
+
+       /* If not target must be initiator or unknown type. */
+       if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
+               fcport->port_type = FCT_INITIATOR;
+       else
+               fcport->port_type = FCT_TARGET;
+
+       /* Passback COS information. */
+       fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
+               FC_COS_CLASS2 : FC_COS_CLASS3;
+
+       if (pd->prli_svc_param_word_3[0] & BIT_7) {
+               fcport->flags |= FCF_CONF_COMP_SUPPORTED;
+               fcport->conf_compl_supported = 1;
+       }
+
+gpd_error_out:
+       return rval;
+}
+
+/*
+ * qla24xx_gidlist__wait
+ * NOTE: don't call this routine from DPC thread.
+ */
+int qla24xx_gidlist_wait(struct scsi_qla_host *vha,
+       void *id_list, dma_addr_t id_list_dma, uint16_t *entries)
+{
+       int rval = QLA_FUNCTION_FAILED;
+       mbx_cmd_t mc;
+
+       if (!vha->hw->flags.fw_started)
+               goto done;
+
+       memset(&mc, 0, sizeof(mc));
+       mc.mb[0] = MBC_GET_ID_LIST;
+       mc.mb[2] = MSW(id_list_dma);
+       mc.mb[3] = LSW(id_list_dma);
+       mc.mb[6] = MSW(MSD(id_list_dma));
+       mc.mb[7] = LSW(MSD(id_list_dma));
+       mc.mb[8] = 0;
+       mc.mb[9] = cpu_to_le16(vha->vp_idx);
+
+       rval = qla24xx_send_mb_cmd(vha, &mc);
+       if (rval != QLA_SUCCESS) {
+               ql_dbg(ql_dbg_mbx, vha, 0xffff,
+                       "%s:  fail\n", __func__);
+       } else {
+               *entries = mc.mb[1];
+               ql_dbg(ql_dbg_mbx, vha, 0xffff,
+                       "%s:  done\n", __func__);
+       }
+done:
+       return rval;
+}
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c

index c6d6f0d912ff75ffaf9b9d810f81af735e39549b..09a490c98763a9406a6eafd3082df8f8ed50a149 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -74,13 +74,14 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
          * ensures no active vp_list traversal while the vport is removed
          * from the queue)
          */
-       spin_lock_irqsave(&ha->vport_slock, flags);
-       while (atomic_read(&vha->vref_count)) {
-               spin_unlock_irqrestore(&ha->vport_slock, flags);
-
-               msleep(500);
+       wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count),
+           10*HZ);
  
-               spin_lock_irqsave(&ha->vport_slock, flags);
+       spin_lock_irqsave(&ha->vport_slock, flags);
+       if (atomic_read(&vha->vref_count)) {
+               ql_dbg(ql_dbg_vport, vha, 0xfffa,
+                   "vha->vref_count=%u timeout\n", vha->vref_count.counter);
+               vha->vref_count = (atomic_t)ATOMIC_INIT(0);
         }
         list_del(&vha->list);
         qlt_update_vp_map(vha, RESET_VP_IDX);
@@ -269,6 +270,7 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb)
  
                         spin_lock_irqsave(&ha->vport_slock, flags);
                         atomic_dec(&vha->vref_count);
+                       wake_up(&vha->vref_waitq);
                 }
                 i++;
         }
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c

index 1fed235a1b4a03172a4717a360a90f29ae383a4f..83d61d2142e98d9c48ad3a6dcc5acf4183e293ff 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1160,8 +1160,13 @@ static inline
  uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha)
  {
         struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
+       struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82;
  
-       return ((RD_REG_DWORD(&reg->host_status)) == ISP_REG_DISCONNECT);
+       if (IS_P3P_TYPE(ha))
+               return ((RD_REG_DWORD(&reg82->host_int)) == ISP_REG_DISCONNECT);
+       else
+               return ((RD_REG_DWORD(&reg->host_status)) ==
+                       ISP_REG_DISCONNECT);
  }
  
  /**************************************************************************
@@ -1651,7 +1656,8 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
                                 /* Don't abort commands in adapter during EEH
                                  * recovery as it's not accessible/responding.
                                  */
-                               if (GET_CMD_SP(sp) && !ha->flags.eeh_busy) {
+                               if (GET_CMD_SP(sp) && !ha->flags.eeh_busy &&
+                                   (sp->type == SRB_SCSI_CMD)) {
                                         /* Get a reference to the sp and drop the lock.
                                          * The reference ensures this sp->done() call
                                          * - and not the call in qla2xxx_eh_abort() -
@@ -2560,6 +2566,20 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time)
         return atomic_read(&vha->loop_state) == LOOP_READY;
  }
  
+static void qla2x00_iocb_work_fn(struct work_struct *work)
+{
+       struct scsi_qla_host *vha = container_of(work,
+               struct scsi_qla_host, iocb_work);
+       int cnt = 0;
+
+       while (!list_empty(&vha->work_list)) {
+               qla2x00_do_work(vha);
+               cnt++;
+               if (cnt > 10)
+                       break;
+       }
+}
+
  /*
   * PCI driver interface
   */
@@ -3078,6 +3098,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
          */
         qla2xxx_wake_dpc(base_vha);
  
+       INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn);
         INIT_WORK(&ha->board_disable, qla2x00_disable_board_on_pci_error);
  
         if (IS_QLA8031(ha) || IS_MCTP_CAPABLE(ha)) {
@@ -3469,6 +3490,7 @@ qla2x00_remove_one(struct pci_dev *pdev)
         qla2x00_free_sysfs_attr(base_vha, true);
  
         fc_remove_host(base_vha->host);
+       qlt_remove_target_resources(ha);
  
         scsi_remove_host(base_vha->host);
  
@@ -4268,6 +4290,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
         spin_lock_init(&vha->work_lock);
         spin_lock_init(&vha->cmd_list_lock);
         init_waitqueue_head(&vha->fcport_waitQ);
+       init_waitqueue_head(&vha->vref_waitq);
  
         vha->gnl.size = sizeof(struct get_name_list_extended) *
                         (ha->max_loop_id + 1);
@@ -4319,7 +4342,11 @@ qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
         spin_lock_irqsave(&vha->work_lock, flags);
         list_add_tail(&e->list, &vha->work_list);
         spin_unlock_irqrestore(&vha->work_lock, flags);
-       qla2xxx_wake_dpc(vha);
+
+       if (QLA_EARLY_LINKUP(vha->hw))
+               schedule_work(&vha->iocb_work);
+       else
+               qla2xxx_wake_dpc(vha);
  
         return QLA_SUCCESS;
  }
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c

index 45f5077684f0a5b39c0645ddee831bf4071667d4..0e03ca2ab3e52358c817cdd2cdc667ba2bfb1ba3 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -130,6 +130,9 @@ static void qlt_send_term_imm_notif(struct scsi_qla_host *vha,
  static struct fc_port *qlt_create_sess(struct scsi_qla_host *vha,
         fc_port_t *fcport, bool local);
  void qlt_unreg_sess(struct fc_port *sess);
+static void qlt_24xx_handle_abts(struct scsi_qla_host *,
+       struct abts_recv_from_24xx *);
+
  /*
   * Global Variables
   */
@@ -140,6 +143,20 @@ static struct workqueue_struct *qla_tgt_wq;
  static DEFINE_MUTEX(qla_tgt_mutex);
  static LIST_HEAD(qla_tgt_glist);
  
+static const char *prot_op_str(u32 prot_op)
+{
+       switch (prot_op) {
+       case TARGET_PROT_NORMAL:        return "NORMAL";
+       case TARGET_PROT_DIN_INSERT:    return "DIN_INSERT";
+       case TARGET_PROT_DOUT_INSERT:   return "DOUT_INSERT";
+       case TARGET_PROT_DIN_STRIP:     return "DIN_STRIP";
+       case TARGET_PROT_DOUT_STRIP:    return "DOUT_STRIP";
+       case TARGET_PROT_DIN_PASS:      return "DIN_PASS";
+       case TARGET_PROT_DOUT_PASS:     return "DOUT_PASS";
+       default:                        return "UNKNOWN";
+       }
+}
+
  /* This API intentionally takes dest as a parameter, rather than returning
   * int value to avoid caller forgetting to issue wmb() after the store */
  void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest)
@@ -170,21 +187,23 @@ static inline
  struct scsi_qla_host *qlt_find_host_by_d_id(struct scsi_qla_host *vha,
         uint8_t *d_id)
  {
-       struct qla_hw_data *ha = vha->hw;
-       uint8_t vp_idx;
-
-       if ((vha->d_id.b.area != d_id[1]) || (vha->d_id.b.domain != d_id[0]))
-               return NULL;
+       struct scsi_qla_host *host;
+       uint32_t key = 0;
  
-       if (vha->d_id.b.al_pa == d_id[2])
+       if ((vha->d_id.b.area == d_id[1]) && (vha->d_id.b.domain == d_id[0]) &&
+           (vha->d_id.b.al_pa == d_id[2]))
                 return vha;
  
-       BUG_ON(ha->tgt.tgt_vp_map == NULL);
-       vp_idx = ha->tgt.tgt_vp_map[d_id[2]].idx;
-       if (likely(test_bit(vp_idx, ha->vp_idx_map)))
-               return ha->tgt.tgt_vp_map[vp_idx].vha;
+       key  = (uint32_t)d_id[0] << 16;
+       key |= (uint32_t)d_id[1] <<  8;
+       key |= (uint32_t)d_id[2];
  
-       return NULL;
+       host = btree_lookup32(&vha->hw->tgt.host_map, key);
+       if (!host)
+               ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+                          "Unable to find host %06x\n", key);
+
+       return host;
  }
  
  static inline
@@ -389,6 +408,8 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
                         (struct abts_recv_from_24xx *)atio;
                 struct scsi_qla_host *host = qlt_find_host_by_vp_idx(vha,
                         entry->vp_index);
+               unsigned long flags;
+
                 if (unlikely(!host)) {
                         ql_dbg(ql_dbg_tgt, vha, 0xffff,
                             "qla_target(%d): Response pkt (ABTS_RECV_24XX) "
@@ -396,9 +417,12 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
                             vha->vp_idx, entry->vp_index);
                         break;
                 }
-               qlt_response_pkt(host, (response_t *)atio);
+               if (!ha_locked)
+                       spin_lock_irqsave(&host->hw->hardware_lock, flags);
+               qlt_24xx_handle_abts(host, (struct abts_recv_from_24xx *)atio);
+               if (!ha_locked)
+                       spin_unlock_irqrestore(&host->hw->hardware_lock, flags);
                 break;
-
         }
  
         /* case PUREX_IOCB_TYPE: ql2xmvasynctoatio */
@@ -554,6 +578,7 @@ void qla2x00_async_nack_sp_done(void *s, int res)
                 sp->fcport->login_gen++;
                 sp->fcport->fw_login_state = DSC_LS_PLOGI_COMP;
                 sp->fcport->logout_on_delete = 1;
+               sp->fcport->plogi_nack_done_deadline = jiffies + HZ;
                 break;
  
         case SRB_NACK_PRLI:
@@ -613,6 +638,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
                 break;
         case SRB_NACK_PRLI:
                 fcport->fw_login_state = DSC_LS_PRLI_PEND;
+               fcport->deleted = 0;
                 c = "PRLI";
                 break;
         case SRB_NACK_LOGO:
@@ -1215,7 +1241,7 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id,
         }
  
         /* Get list of logged in devices */
-       rc = qla2x00_get_id_list(vha, gid_list, gid_list_dma, &entries);
+       rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma, &entries);
         if (rc != QLA_SUCCESS) {
                 ql_dbg(ql_dbg_tgt_mgt, vha, 0xf045,
                     "qla_target(%d): get_id_list() failed: %x\n",
@@ -1551,6 +1577,9 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha,
         request_t *pkt;
         struct nack_to_isp *nack;
  
+       if (!ha->flags.fw_started)
+               return;
+
         ql_dbg(ql_dbg_tgt, vha, 0xe004, "Sending NOTIFY_ACK (ha=%p)\n", ha);
  
         /* Send marker if required */
@@ -2013,6 +2042,70 @@ void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *mcmd)
  }
  EXPORT_SYMBOL(qlt_free_mcmd);
  
+/*
+ * ha->hardware_lock supposed to be held on entry. Might drop it, then
+ * reacquire
+ */
+void qlt_send_resp_ctio(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
+    uint8_t scsi_status, uint8_t sense_key, uint8_t asc, uint8_t ascq)
+{
+       struct atio_from_isp *atio = &cmd->atio;
+       struct ctio7_to_24xx *ctio;
+       uint16_t temp;
+
+       ql_dbg(ql_dbg_tgt_dif, vha, 0x3066,
+           "Sending response CTIO7 (vha=%p, atio=%p, scsi_status=%02x, "
+           "sense_key=%02x, asc=%02x, ascq=%02x",
+           vha, atio, scsi_status, sense_key, asc, ascq);
+
+       ctio = (struct ctio7_to_24xx *)qla2x00_alloc_iocbs(vha, NULL);
+       if (!ctio) {
+               ql_dbg(ql_dbg_async, vha, 0x3067,
+                   "qla2x00t(%ld): %s failed: unable to allocate request packet",
+                   vha->host_no, __func__);
+               goto out;
+       }
+
+       ctio->entry_type = CTIO_TYPE7;
+       ctio->entry_count = 1;
+       ctio->handle = QLA_TGT_SKIP_HANDLE;
+       ctio->nport_handle = cmd->sess->loop_id;
+       ctio->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
+       ctio->vp_index = vha->vp_idx;
+       ctio->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
+       ctio->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
+       ctio->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+       ctio->exchange_addr = atio->u.isp24.exchange_addr;
+       ctio->u.status1.flags = (atio->u.isp24.attr << 9) |
+           cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_SEND_STATUS);
+       temp = be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id);
+       ctio->u.status1.ox_id = cpu_to_le16(temp);
+       ctio->u.status1.scsi_status =
+           cpu_to_le16(SS_RESPONSE_INFO_LEN_VALID | scsi_status);
+       ctio->u.status1.response_len = cpu_to_le16(18);
+       ctio->u.status1.residual = cpu_to_le32(get_datalen_for_atio(atio));
+
+       if (ctio->u.status1.residual != 0)
+               ctio->u.status1.scsi_status |=
+                   cpu_to_le16(SS_RESIDUAL_UNDER);
+
+       /* Response code and sense key */
+       put_unaligned_le32(((0x70 << 24) | (sense_key << 8)),
+           (&ctio->u.status1.sense_data)[0]);
+       /* Additional sense length */
+       put_unaligned_le32(0x0a, (&ctio->u.status1.sense_data)[1]);
+       /* ASC and ASCQ */
+       put_unaligned_le32(((asc << 24) | (ascq << 16)),
+           (&ctio->u.status1.sense_data)[3]);
+
+       /* Memory Barrier */
+       wmb();
+
+       qla2x00_start_iocbs(vha, vha->req);
+out:
+       return;
+}
+
  /* callback from target fabric module code */
  void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd)
  {
@@ -2261,7 +2354,7 @@ static int qlt_24xx_build_ctio_pkt(struct qla_tgt_prm *prm,
                  */
                 return -EAGAIN;
         } else
-               ha->tgt.cmds[h-1] = prm->cmd;
+               ha->tgt.cmds[h - 1] = prm->cmd;
  
         pkt->handle = h | CTIO_COMPLETION_HANDLE_MARK;
         pkt->nport_handle = prm->cmd->loop_id;
@@ -2391,6 +2484,50 @@ static inline int qlt_has_data(struct qla_tgt_cmd *cmd)
         return cmd->bufflen > 0;
  }
  
+static void qlt_print_dif_err(struct qla_tgt_prm *prm)
+{
+       struct qla_tgt_cmd *cmd;
+       struct scsi_qla_host *vha;
+
+       /* asc 0x10=dif error */
+       if (prm->sense_buffer && (prm->sense_buffer[12] == 0x10)) {
+               cmd = prm->cmd;
+               vha = cmd->vha;
+               /* ASCQ */
+               switch (prm->sense_buffer[13]) {
+               case 1:
+                       ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                           "BE detected Guard TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+                           "se_cmd=%p tag[%x]",
+                           cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+                           cmd->atio.u.isp24.exchange_addr);
+                       break;
+               case 2:
+                       ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                           "BE detected APP TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+                           "se_cmd=%p tag[%x]",
+                           cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+                           cmd->atio.u.isp24.exchange_addr);
+                       break;
+               case 3:
+                       ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                           "BE detected REF TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+                           "se_cmd=%p tag[%x]",
+                           cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+                           cmd->atio.u.isp24.exchange_addr);
+                       break;
+               default:
+                       ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                           "BE detected Dif ERR: lba[%llx|%lld] len[%x] "
+                           "se_cmd=%p tag[%x]",
+                           cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+                           cmd->atio.u.isp24.exchange_addr);
+                       break;
+               }
+               ql_dump_buffer(ql_dbg_tgt_dif, vha, 0xffff, cmd->cdb, 16);
+       }
+}
+
  /*
   * Called without ha->hardware_lock held
   */
@@ -2512,18 +2649,9 @@ skip_explict_conf:
                 for (i = 0; i < prm->sense_buffer_len/4; i++)
                         ((uint32_t *)ctio->u.status1.sense_data)[i] =
                                 cpu_to_be32(((uint32_t *)prm->sense_buffer)[i]);
-#if 0
-               if (unlikely((prm->sense_buffer_len % 4) != 0)) {
-                       static int q;
-                       if (q < 10) {
-                               ql_dbg(ql_dbg_tgt, vha, 0xe04f,
-                                   "qla_target(%d): %d bytes of sense "
-                                   "lost", prm->tgt->ha->vp_idx,
-                                   prm->sense_buffer_len % 4);
-                               q++;
-                       }
-               }
-#endif
+
+               qlt_print_dif_err(prm);
+
         } else {
                 ctio->u.status1.flags &=
                     ~cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_0);
@@ -2537,19 +2665,9 @@ skip_explict_conf:
         /* Sense with len > 24, is it possible ??? */
  }
  
-
-
-/* diff  */
  static inline int
  qlt_hba_err_chk_enabled(struct se_cmd *se_cmd)
  {
-       /*
-        * Uncomment when corresponding SCSI changes are done.
-        *
-        if (!sp->cmd->prot_chk)
-        return 0;
-        *
-        */
         switch (se_cmd->prot_op) {
         case TARGET_PROT_DOUT_INSERT:
         case TARGET_PROT_DIN_STRIP:
@@ -2570,16 +2688,38 @@ qlt_hba_err_chk_enabled(struct se_cmd *se_cmd)
         return 0;
  }
  
+static inline int
+qla_tgt_ref_mask_check(struct se_cmd *se_cmd)
+{
+       switch (se_cmd->prot_op) {
+       case TARGET_PROT_DIN_INSERT:
+       case TARGET_PROT_DOUT_INSERT:
+       case TARGET_PROT_DIN_STRIP:
+       case TARGET_PROT_DOUT_STRIP:
+       case TARGET_PROT_DIN_PASS:
+       case TARGET_PROT_DOUT_PASS:
+           return 1;
+       default:
+           return 0;
+       }
+       return 0;
+}
+
  /*
- * qla24xx_set_t10dif_tags_from_cmd - Extract Ref and App tags from SCSI command
- *
+ * qla_tgt_set_dif_tags - Extract Ref and App tags from SCSI command
   */
-static inline void
-qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx)
+static void
+qla_tgt_set_dif_tags(struct qla_tgt_cmd *cmd, struct crc_context *ctx,
+    uint16_t *pfw_prot_opts)
  {
+       struct se_cmd *se_cmd = &cmd->se_cmd;
         uint32_t lba = 0xffffffff & se_cmd->t_task_lba;
+       scsi_qla_host_t *vha = cmd->tgt->vha;
+       struct qla_hw_data *ha = vha->hw;
+       uint32_t t32 = 0;
  
-       /* wait til Mode Sense/Select cmd, modepage Ah, subpage 2
+       /*
+        * wait till Mode Sense/Select cmd, modepage Ah, subpage 2
          * have been immplemented by TCM, before AppTag is avail.
          * Look for modesense_handlers[]
          */
@@ -2587,65 +2727,73 @@ qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx)
         ctx->app_tag_mask[0] = 0x0;
         ctx->app_tag_mask[1] = 0x0;
  
+       if (IS_PI_UNINIT_CAPABLE(ha)) {
+               if ((se_cmd->prot_type == TARGET_DIF_TYPE1_PROT) ||
+                   (se_cmd->prot_type == TARGET_DIF_TYPE2_PROT))
+                       *pfw_prot_opts |= PO_DIS_VALD_APP_ESC;
+               else if (se_cmd->prot_type == TARGET_DIF_TYPE3_PROT)
+                       *pfw_prot_opts |= PO_DIS_VALD_APP_REF_ESC;
+       }
+
+       t32 = ha->tgt.tgt_ops->get_dif_tags(cmd, pfw_prot_opts);
+
         switch (se_cmd->prot_type) {
         case TARGET_DIF_TYPE0_PROT:
                 /*
-                * No check for ql2xenablehba_err_chk, as it would be an
-                * I/O error if hba tag generation is not done.
+                * No check for ql2xenablehba_err_chk, as it
+                * would be an I/O error if hba tag generation
+                * is not done.
                  */
                 ctx->ref_tag = cpu_to_le32(lba);
-
-               if (!qlt_hba_err_chk_enabled(se_cmd))
-                       break;
-
                 /* enable ALL bytes of the ref tag */
                 ctx->ref_tag_mask[0] = 0xff;
                 ctx->ref_tag_mask[1] = 0xff;
                 ctx->ref_tag_mask[2] = 0xff;
                 ctx->ref_tag_mask[3] = 0xff;
                 break;
-       /*
-        * For TYpe 1 protection: 16 bit GUARD tag, 32 bit REF tag, and
-        * 16 bit app tag.
-        */
         case TARGET_DIF_TYPE1_PROT:
-               ctx->ref_tag = cpu_to_le32(lba);
-
-               if (!qlt_hba_err_chk_enabled(se_cmd))
-                       break;
-
-               /* enable ALL bytes of the ref tag */
-               ctx->ref_tag_mask[0] = 0xff;
-               ctx->ref_tag_mask[1] = 0xff;
-               ctx->ref_tag_mask[2] = 0xff;
-               ctx->ref_tag_mask[3] = 0xff;
-               break;
-       /*
-        * For TYPE 2 protection: 16 bit GUARD + 32 bit REF tag has to
-        * match LBA in CDB + N
-        */
+           /*
+            * For TYPE 1 protection: 16 bit GUARD tag, 32 bit
+            * REF tag, and 16 bit app tag.
+            */
+           ctx->ref_tag = cpu_to_le32(lba);
+           if (!qla_tgt_ref_mask_check(se_cmd) ||
+               !(ha->tgt.tgt_ops->chk_dif_tags(t32))) {
+                   *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+                   break;
+           }
+           /* enable ALL bytes of the ref tag */
+           ctx->ref_tag_mask[0] = 0xff;
+           ctx->ref_tag_mask[1] = 0xff;
+           ctx->ref_tag_mask[2] = 0xff;
+           ctx->ref_tag_mask[3] = 0xff;
+           break;
         case TARGET_DIF_TYPE2_PROT:
-               ctx->ref_tag = cpu_to_le32(lba);
-
-               if (!qlt_hba_err_chk_enabled(se_cmd))
-                       break;
-
-               /* enable ALL bytes of the ref tag */
-               ctx->ref_tag_mask[0] = 0xff;
-               ctx->ref_tag_mask[1] = 0xff;
-               ctx->ref_tag_mask[2] = 0xff;
-               ctx->ref_tag_mask[3] = 0xff;
-               break;
-
-       /* For Type 3 protection: 16 bit GUARD only */
+           /*
+            * For TYPE 2 protection: 16 bit GUARD + 32 bit REF
+            * tag has to match LBA in CDB + N
+            */
+           ctx->ref_tag = cpu_to_le32(lba);
+           if (!qla_tgt_ref_mask_check(se_cmd) ||
+               !(ha->tgt.tgt_ops->chk_dif_tags(t32))) {
+                   *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+                   break;
+           }
+           /* enable ALL bytes of the ref tag */
+           ctx->ref_tag_mask[0] = 0xff;
+           ctx->ref_tag_mask[1] = 0xff;
+           ctx->ref_tag_mask[2] = 0xff;
+           ctx->ref_tag_mask[3] = 0xff;
+           break;
         case TARGET_DIF_TYPE3_PROT:
-               ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] =
-                       ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00;
-               break;
+           /* For TYPE 3 protection: 16 bit GUARD only */
+           *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+           ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] =
+               ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00;
+           break;
         }
  }
  
-
  static inline int
  qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
  {
@@ -2664,6 +2812,7 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
         struct se_cmd           *se_cmd = &cmd->se_cmd;
         uint32_t h;
         struct atio_from_isp *atio = &prm->cmd->atio;
+       struct qla_tc_param     tc;
         uint16_t t16;
  
         ha = vha->hw;
@@ -2689,16 +2838,15 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
         case TARGET_PROT_DIN_INSERT:
         case TARGET_PROT_DOUT_STRIP:
                 transfer_length = data_bytes;
-               data_bytes += dif_bytes;
+               if (cmd->prot_sg_cnt)
+                       data_bytes += dif_bytes;
                 break;
-
         case TARGET_PROT_DIN_STRIP:
         case TARGET_PROT_DOUT_INSERT:
         case TARGET_PROT_DIN_PASS:
         case TARGET_PROT_DOUT_PASS:
                 transfer_length = data_bytes + dif_bytes;
                 break;
-
         default:
                 BUG();
                 break;
@@ -2734,7 +2882,6 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
                 break;
         }
  
-
         /* ---- PKT ---- */
         /* Update entry type to indicate Command Type CRC_2 IOCB */
         pkt->entry_type  = CTIO_CRC2;
@@ -2752,9 +2899,8 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
         } else
                 ha->tgt.cmds[h-1] = prm->cmd;
  
-
         pkt->handle  = h | CTIO_COMPLETION_HANDLE_MARK;
-       pkt->nport_handle = prm->cmd->loop_id;
+       pkt->nport_handle = cpu_to_le16(prm->cmd->loop_id);
         pkt->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
         pkt->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
         pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
@@ -2775,12 +2921,10 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
         else if (cmd->dma_data_direction == DMA_FROM_DEVICE)
                 pkt->flags = cpu_to_le16(CTIO7_FLAGS_DATA_OUT);
  
-
         pkt->dseg_count = prm->tot_dsds;
         /* Fibre channel byte count */
         pkt->transfer_length = cpu_to_le32(transfer_length);
  
-
         /* ----- CRC context -------- */
  
         /* Allocate CRC context from global pool */
@@ -2800,13 +2944,12 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
         /* Set handle */
         crc_ctx_pkt->handle = pkt->handle;
  
-       qlt_set_t10dif_tags(se_cmd, crc_ctx_pkt);
+       qla_tgt_set_dif_tags(cmd, crc_ctx_pkt, &fw_prot_opts);
  
         pkt->crc_context_address[0] = cpu_to_le32(LSD(crc_ctx_dma));
         pkt->crc_context_address[1] = cpu_to_le32(MSD(crc_ctx_dma));
         pkt->crc_context_len = CRC_CONTEXT_LEN_FW;
  
-
         if (!bundling) {
                 cur_dsd = (uint32_t *) &crc_ctx_pkt->u.nobundling.data_address;
         } else {
@@ -2827,16 +2970,24 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
         crc_ctx_pkt->byte_count = cpu_to_le32(data_bytes);
         crc_ctx_pkt->guard_seed = cpu_to_le16(0);
  
+       memset((uint8_t *)&tc, 0 , sizeof(tc));
+       tc.vha = vha;
+       tc.blk_sz = cmd->blk_sz;
+       tc.bufflen = cmd->bufflen;
+       tc.sg = cmd->sg;
+       tc.prot_sg = cmd->prot_sg;
+       tc.ctx = crc_ctx_pkt;
+       tc.ctx_dsd_alloced = &cmd->ctx_dsd_alloced;
  
         /* Walks data segments */
         pkt->flags |= cpu_to_le16(CTIO7_FLAGS_DSD_PTR);
  
         if (!bundling && prm->prot_seg_cnt) {
                 if (qla24xx_walk_and_build_sglist_no_difb(ha, NULL, cur_dsd,
-                       prm->tot_dsds, cmd))
+                       prm->tot_dsds, &tc))
                         goto crc_queuing_error;
         } else if (qla24xx_walk_and_build_sglist(ha, NULL, cur_dsd,
-               (prm->tot_dsds - prm->prot_seg_cnt), cmd))
+               (prm->tot_dsds - prm->prot_seg_cnt), &tc))
                 goto crc_queuing_error;
  
         if (bundling && prm->prot_seg_cnt) {
@@ -2845,18 +2996,18 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
  
                 cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address;
                 if (qla24xx_walk_and_build_prot_sglist(ha, NULL, cur_dsd,
-                       prm->prot_seg_cnt, cmd))
+                       prm->prot_seg_cnt, &tc))
                         goto crc_queuing_error;
         }
         return QLA_SUCCESS;
  
  crc_queuing_error:
         /* Cleanup will be performed by the caller */
+       vha->hw->tgt.cmds[h - 1] = NULL;
  
         return QLA_FUNCTION_FAILED;
  }
  
-
  /*
   * Callback to setup response of xmit_type of QLA_TGT_XMIT_DATA and *
   * QLA_TGT_XMIT_STATUS for >= 24xx silicon
@@ -2906,7 +3057,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
         else
                 vha->tgt_counters.core_qla_que_buf++;
  
-       if (!vha->flags.online || cmd->reset_count != ha->chip_reset) {
+       if (!ha->flags.fw_started || cmd->reset_count != ha->chip_reset) {
                 /*
                  * Either the port is not online or this request was from
                  * previous life, just abort the processing.
@@ -3047,7 +3198,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd)
  
         spin_lock_irqsave(&ha->hardware_lock, flags);
  
-       if (!vha->flags.online || (cmd->reset_count != ha->chip_reset) ||
+       if (!ha->flags.fw_started || (cmd->reset_count != ha->chip_reset) ||
             (cmd->sess && cmd->sess->deleted)) {
                 /*
                  * Either the port is not online or this request was from
@@ -3104,139 +3255,113 @@ EXPORT_SYMBOL(qlt_rdy_to_xfer);
  
  
  /*
- * Checks the guard or meta-data for the type of error
- * detected by the HBA.
+ * it is assumed either hardware_lock or qpair lock is held.
   */
-static inline int
+static void
  qlt_handle_dif_error(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd,
-               struct ctio_crc_from_fw *sts)
+       struct ctio_crc_from_fw *sts)
  {
         uint8_t         *ap = &sts->actual_dif[0];
         uint8_t         *ep = &sts->expected_dif[0];
-       uint32_t        e_ref_tag, a_ref_tag;
-       uint16_t        e_app_tag, a_app_tag;
-       uint16_t        e_guard, a_guard;
         uint64_t        lba = cmd->se_cmd.t_task_lba;
+       uint8_t scsi_status, sense_key, asc, ascq;
+       unsigned long flags;
  
-       a_guard   = be16_to_cpu(*(uint16_t *)(ap + 0));
-       a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2));
-       a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4));
-
-       e_guard   = be16_to_cpu(*(uint16_t *)(ep + 0));
-       e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2));
-       e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4));
-
-       ql_dbg(ql_dbg_tgt, vha, 0xe075,
-           "iocb(s) %p Returned STATUS.\n", sts);
-
-       ql_dbg(ql_dbg_tgt, vha, 0xf075,
-           "dif check TGT cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x]\n",
-           cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-           a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, a_guard, e_guard);
-
-       /*
-        * Ignore sector if:
-        * For type     3: ref & app tag is all 'f's
-        * For type 0,1,2: app tag is all 'f's
-        */
-       if ((a_app_tag == 0xffff) &&
-           ((cmd->se_cmd.prot_type != TARGET_DIF_TYPE3_PROT) ||
-            (a_ref_tag == 0xffffffff))) {
-               uint32_t blocks_done;
-
-               /* 2TB boundary case covered automatically with this */
-               blocks_done = e_ref_tag - (uint32_t)lba + 1;
-               cmd->se_cmd.bad_sector = e_ref_tag;
-               cmd->se_cmd.pi_err = 0;
-               ql_dbg(ql_dbg_tgt, vha, 0xf074,
-                       "need to return scsi good\n");
-
-               /* Update protection tag */
-               if (cmd->prot_sg_cnt) {
-                       uint32_t i, k = 0, num_ent;
-                       struct scatterlist *sg, *sgl;
-
-
-                       sgl = cmd->prot_sg;
-
-                       /* Patch the corresponding protection tags */
-                       for_each_sg(sgl, sg, cmd->prot_sg_cnt, i) {
-                               num_ent = sg_dma_len(sg) / 8;
-                               if (k + num_ent < blocks_done) {
-                                       k += num_ent;
-                                       continue;
-                               }
-                               k = blocks_done;
-                               break;
-                       }
+       cmd->trc_flags |= TRC_DIF_ERR;
  
-                       if (k != blocks_done) {
-                               ql_log(ql_log_warn, vha, 0xf076,
-                                   "unexpected tag values tag:lba=%u:%llu)\n",
-                                   e_ref_tag, (unsigned long long)lba);
-                               goto out;
-                       }
+       cmd->a_guard   = be16_to_cpu(*(uint16_t *)(ap + 0));
+       cmd->a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2));
+       cmd->a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4));
  
-#if 0
-                       struct sd_dif_tuple *spt;
-                       /* TODO:
-                        * This section came from initiator. Is it valid here?
-                        * should ulp be override with actual val???
-                        */
-                       spt = page_address(sg_page(sg)) + sg->offset;
-                       spt += j;
+       cmd->e_guard   = be16_to_cpu(*(uint16_t *)(ep + 0));
+       cmd->e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2));
+       cmd->e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4));
  
-                       spt->app_tag = 0xffff;
-                       if (cmd->se_cmd.prot_type == SCSI_PROT_DIF_TYPE3)
-                               spt->ref_tag = 0xffffffff;
-#endif
-               }
+       ql_dbg(ql_dbg_tgt_dif, vha, 0xf075,
+           "%s: aborted %d state %d\n", __func__, cmd->aborted, cmd->state);
  
-               return 0;
-       }
+       scsi_status = sense_key = asc = ascq = 0;
  
-       /* check guard */
-       if (e_guard != a_guard) {
-               cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
-               cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
-
-               ql_log(ql_log_warn, vha, 0xe076,
-                   "Guard ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-                   cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-                   a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-                   a_guard, e_guard, cmd);
-               goto out;
+       /* check appl tag */
+       if (cmd->e_app_tag != cmd->a_app_tag) {
+               ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                       "App Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+                       "Ref[%x|%x], App[%x|%x], "
+                       "Guard [%x|%x] cmd=%p ox_id[%04x]",
+                       cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+                       cmd->a_ref_tag, cmd->e_ref_tag,
+                       cmd->a_app_tag, cmd->e_app_tag,
+                       cmd->a_guard, cmd->e_guard,
+                       cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+
+               cmd->dif_err_code = DIF_ERR_APP;
+               scsi_status = SAM_STAT_CHECK_CONDITION;
+               sense_key = ABORTED_COMMAND;
+               asc = 0x10;
+               ascq = 0x2;
         }
  
         /* check ref tag */
-       if (e_ref_tag != a_ref_tag) {
-               cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
-               cmd->se_cmd.bad_sector = e_ref_tag;
-
-               ql_log(ql_log_warn, vha, 0xe077,
-                       "Ref Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-                       cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-                       a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-                       a_guard, e_guard, cmd);
+       if (cmd->e_ref_tag != cmd->a_ref_tag) {
+               ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                       "Ref Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+                       "Ref[%x|%x], App[%x|%x], "
+                       "Guard[%x|%x] cmd=%p ox_id[%04x] ",
+                       cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+                       cmd->a_ref_tag, cmd->e_ref_tag,
+                       cmd->a_app_tag, cmd->e_app_tag,
+                       cmd->a_guard, cmd->e_guard,
+                       cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+
+               cmd->dif_err_code = DIF_ERR_REF;
+               scsi_status = SAM_STAT_CHECK_CONDITION;
+               sense_key = ABORTED_COMMAND;
+               asc = 0x10;
+               ascq = 0x3;
                 goto out;
         }
  
-       /* check appl tag */
-       if (e_app_tag != a_app_tag) {
-               cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
-               cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
-
-               ql_log(ql_log_warn, vha, 0xe078,
-                       "App Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-                       cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-                       a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-                       a_guard, e_guard, cmd);
-               goto out;
+       /* check guard */
+       if (cmd->e_guard != cmd->a_guard) {
+               ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                       "Guard ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+                       "Ref[%x|%x], App[%x|%x], "
+                       "Guard [%x|%x] cmd=%p ox_id[%04x]",
+                       cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+                       cmd->a_ref_tag, cmd->e_ref_tag,
+                       cmd->a_app_tag, cmd->e_app_tag,
+                       cmd->a_guard, cmd->e_guard,
+                       cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+               cmd->dif_err_code = DIF_ERR_GRD;
+               scsi_status = SAM_STAT_CHECK_CONDITION;
+               sense_key = ABORTED_COMMAND;
+               asc = 0x10;
+               ascq = 0x1;
         }
  out:
-       return 1;
-}
+       switch (cmd->state) {
+       case QLA_TGT_STATE_NEED_DATA:
+               /* handle_data will load DIF error code  */
+               cmd->state = QLA_TGT_STATE_DATA_IN;
+               vha->hw->tgt.tgt_ops->handle_data(cmd);
+               break;
+       default:
+               spin_lock_irqsave(&cmd->cmd_lock, flags);
+               if (cmd->aborted) {
+                       spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+                       vha->hw->tgt.tgt_ops->free_cmd(cmd);
+                       break;
+               }
+               spin_unlock_irqrestore(&cmd->cmd_lock, flags);
  
+               qlt_send_resp_ctio(vha, cmd, scsi_status, sense_key, asc, ascq);
+               /* assume scsi status gets out on the wire.
+                * Will not wait for completion.
+                */
+               vha->hw->tgt.tgt_ops->free_cmd(cmd);
+               break;
+       }
+}
  
  /* If hardware_lock held on entry, might drop it, then reaquire */
  /* This function sends the appropriate CTIO to ISP 2xxx or 24xx */
@@ -3251,7 +3376,7 @@ static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha,
         ql_dbg(ql_dbg_tgt_tmr, vha, 0xe01c,
             "Sending TERM ELS CTIO (ha=%p)\n", ha);
  
-       pkt = (request_t *)qla2x00_alloc_iocbs_ready(vha, NULL);
+       pkt = (request_t *)qla2x00_alloc_iocbs(vha, NULL);
         if (pkt == NULL) {
                 ql_dbg(ql_dbg_tgt, vha, 0xe080,
                     "qla_target(%d): %s failed: unable to allocate "
@@ -3543,6 +3668,16 @@ static int qlt_term_ctio_exchange(struct scsi_qla_host *vha, void *ctio,
  {
         int term = 0;
  
+       if (cmd->se_cmd.prot_op)
+               ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+                   "Term DIF cmd: lba[0x%llx|%lld] len[0x%x] "
+                   "se_cmd=%p tag[%x] op %#x/%s",
+                    cmd->lba, cmd->lba,
+                    cmd->num_blks, &cmd->se_cmd,
+                    cmd->atio.u.isp24.exchange_addr,
+                    cmd->se_cmd.prot_op,
+                    prot_op_str(cmd->se_cmd.prot_op));
+
         if (ctio != NULL) {
                 struct ctio7_from_24xx *c = (struct ctio7_from_24xx *)ctio;
                 term = !(c->flags &
@@ -3760,32 +3895,15 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
                         struct ctio_crc_from_fw *crc =
                                 (struct ctio_crc_from_fw *)ctio;
                         ql_dbg(ql_dbg_tgt_mgt, vha, 0xf073,
-                           "qla_target(%d): CTIO with DIF_ERROR status %x received (state %x, se_cmd %p) actual_dif[0x%llx] expect_dif[0x%llx]\n",
+                           "qla_target(%d): CTIO with DIF_ERROR status %x "
+                           "received (state %x, ulp_cmd %p) actual_dif[0x%llx] "
+                           "expect_dif[0x%llx]\n",
                             vha->vp_idx, status, cmd->state, se_cmd,
                             *((u64 *)&crc->actual_dif[0]),
                             *((u64 *)&crc->expected_dif[0]));
  
-                       if (qlt_handle_dif_error(vha, cmd, ctio)) {
-                               if (cmd->state == QLA_TGT_STATE_NEED_DATA) {
-                                       /* scsi Write/xfer rdy complete */
-                                       goto skip_term;
-                               } else {
-                                       /* scsi read/xmit respond complete
-                                        * call handle dif to send scsi status
-                                        * rather than terminate exchange.
-                                        */
-                                       cmd->state = QLA_TGT_STATE_PROCESSED;
-                                       ha->tgt.tgt_ops->handle_dif_err(cmd);
-                                       return;
-                               }
-                       } else {
-                               /* Need to generate a SCSI good completion.
-                                * because FW did not send scsi status.
-                                */
-                               status = 0;
-                               goto skip_term;
-                       }
-                       break;
+                       qlt_handle_dif_error(vha, cmd, ctio);
+                       return;
                 }
                 default:
                         ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b,
@@ -3808,7 +3926,6 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
                                 return;
                 }
         }
-skip_term:
  
         if (cmd->state == QLA_TGT_STATE_PROCESSED) {
                 cmd->trc_flags |= TRC_CTIO_DONE;
@@ -4584,7 +4701,8 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
                 }
  
                 if (sess != NULL) {
-                       if (sess->fw_login_state == DSC_LS_PLOGI_PEND) {
+                       if (sess->fw_login_state != DSC_LS_PLOGI_PEND &&
+                           sess->fw_login_state != DSC_LS_PLOGI_COMP) {
                                 /*
                                  * Impatient initiator sent PRLI before last
                                  * PLOGI could finish. Will force him to re-try,
@@ -4623,15 +4741,23 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
  
                 /* Make session global (not used in fabric mode) */
                 if (ha->current_topology != ISP_CFG_F) {
-                       set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
-                       set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
-                       qla2xxx_wake_dpc(vha);
+                       if (sess) {
+                               ql_dbg(ql_dbg_disc, vha, 0xffff,
+                                   "%s %d %8phC post nack\n",
+                                   __func__, __LINE__, sess->port_name);
+                               qla24xx_post_nack_work(vha, sess, iocb,
+                                       SRB_NACK_PRLI);
+                               res = 0;
+                       } else {
+                               set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
+                               set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
+                               qla2xxx_wake_dpc(vha);
+                       }
                 } else {
                         if (sess) {
                                 ql_dbg(ql_dbg_disc, vha, 0xffff,
-                                          "%s %d %8phC post nack\n",
-                                          __func__, __LINE__, sess->port_name);
-
+                                   "%s %d %8phC post nack\n",
+                                   __func__, __LINE__, sess->port_name);
                                 qla24xx_post_nack_work(vha, sess, iocb,
                                         SRB_NACK_PRLI);
                                 res = 0;
@@ -4639,7 +4765,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
                 }
                 break;
  
-
         case ELS_TPRLO:
                 if (le16_to_cpu(iocb->u.isp24.flags) &
                         NOTIFY24XX_FLAGS_GLOBAL_TPRLO) {
@@ -5079,16 +5204,22 @@ qlt_send_busy(struct scsi_qla_host *vha,
  
  static int
  qlt_chk_qfull_thresh_hold(struct scsi_qla_host *vha,
-       struct atio_from_isp *atio)
+       struct atio_from_isp *atio, bool ha_locked)
  {
         struct qla_hw_data *ha = vha->hw;
         uint16_t status;
+       unsigned long flags;
  
         if (ha->tgt.num_pend_cmds < Q_FULL_THRESH_HOLD(ha))
                 return 0;
  
+       if (!ha_locked)
+               spin_lock_irqsave(&ha->hardware_lock, flags);
         status = temp_sam_status;
         qlt_send_busy(vha, atio, status);
+       if (!ha_locked)
+               spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
         return 1;
  }
  
@@ -5103,7 +5234,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
         unsigned long flags;
  
         if (unlikely(tgt == NULL)) {
-               ql_dbg(ql_dbg_io, vha, 0x3064,
+               ql_dbg(ql_dbg_tgt, vha, 0x3064,
                     "ATIO pkt, but no tgt (ha %p)", ha);
                 return;
         }
@@ -5133,7 +5264,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
  
  
                 if (likely(atio->u.isp24.fcp_cmnd.task_mgmt_flags == 0)) {
-                       rc = qlt_chk_qfull_thresh_hold(vha, atio);
+                       rc = qlt_chk_qfull_thresh_hold(vha, atio, ha_locked);
                         if (rc != 0) {
                                 tgt->atio_irq_cmd_count--;
                                 return;
@@ -5256,7 +5387,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt)
                         break;
                 }
  
-               rc = qlt_chk_qfull_thresh_hold(vha, atio);
+               rc = qlt_chk_qfull_thresh_hold(vha, atio, true);
                 if (rc != 0) {
                         tgt->irq_cmd_count--;
                         return;
@@ -5531,7 +5662,7 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
  
         fcport->loop_id = loop_id;
  
-       rc = qla2x00_get_port_database(vha, fcport, 0);
+       rc = qla24xx_gpdb_wait(vha, fcport, 0);
         if (rc != QLA_SUCCESS) {
                 ql_dbg(ql_dbg_tgt_mgt, vha, 0xf070,
                     "qla_target(%d): Failed to retrieve fcport "
@@ -5713,30 +5844,23 @@ static void qlt_abort_work(struct qla_tgt *tgt,
                 }
         }
  
-       spin_lock_irqsave(&ha->hardware_lock, flags);
-
-       if (tgt->tgt_stop)
-               goto out_term;
-
         rc = __qlt_24xx_handle_abts(vha, &prm->abts, sess);
+       ha->tgt.tgt_ops->put_sess(sess);
+       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
+
         if (rc != 0)
                 goto out_term;
-       spin_unlock_irqrestore(&ha->hardware_lock, flags);
-       if (sess)
-               ha->tgt.tgt_ops->put_sess(sess);
-       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
         return;
  
  out_term2:
-       spin_lock_irqsave(&ha->hardware_lock, flags);
+       if (sess)
+               ha->tgt.tgt_ops->put_sess(sess);
+       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
  
  out_term:
+       spin_lock_irqsave(&ha->hardware_lock, flags);
         qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false);
         spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
-       if (sess)
-               ha->tgt.tgt_ops->put_sess(sess);
-       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
  }
  
  static void qlt_tmr_work(struct qla_tgt *tgt,
@@ -5756,7 +5880,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
         spin_lock_irqsave(&ha->tgt.sess_lock, flags);
  
         if (tgt->tgt_stop)
-               goto out_term;
+               goto out_term2;
  
         s_id = prm->tm_iocb2.u.isp24.fcp_hdr.s_id;
         sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id);
@@ -5768,11 +5892,11 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
  
                 spin_lock_irqsave(&ha->tgt.sess_lock, flags);
                 if (!sess)
-                       goto out_term;
+                       goto out_term2;
         } else {
                 if (sess->deleted) {
                         sess = NULL;
-                       goto out_term;
+                       goto out_term2;
                 }
  
                 if (!kref_get_unless_zero(&sess->sess_kref)) {
@@ -5780,7 +5904,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
                             "%s: kref_get fail %8phC\n",
                              __func__, sess->port_name);
                         sess = NULL;
-                       goto out_term;
+                       goto out_term2;
                 }
         }
  
@@ -5790,17 +5914,19 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
         unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
  
         rc = qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0);
-       if (rc != 0)
-               goto out_term;
-
         ha->tgt.tgt_ops->put_sess(sess);
         spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+       if (rc != 0)
+               goto out_term;
         return;
  
+out_term2:
+       if (sess)
+               ha->tgt.tgt_ops->put_sess(sess);
+       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
  out_term:
         qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0);
-       ha->tgt.tgt_ops->put_sess(sess);
-       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
  }
  
  static void qlt_sess_work_fn(struct work_struct *work)
@@ -5893,13 +6019,13 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
         tgt->datasegs_per_cmd = QLA_TGT_DATASEGS_PER_CMD_24XX;
         tgt->datasegs_per_cont = QLA_TGT_DATASEGS_PER_CONT_24XX;
  
-       if (base_vha->fc_vport)
-               return 0;
-
         mutex_lock(&qla_tgt_mutex);
         list_add_tail(&tgt->tgt_list_entry, &qla_tgt_glist);
         mutex_unlock(&qla_tgt_mutex);
  
+       if (ha->tgt.tgt_ops && ha->tgt.tgt_ops->add_target)
+               ha->tgt.tgt_ops->add_target(base_vha);
+
         return 0;
  }
  
@@ -5928,6 +6054,17 @@ int qlt_remove_target(struct qla_hw_data *ha, struct scsi_qla_host *vha)
         return 0;
  }
  
+void qlt_remove_target_resources(struct qla_hw_data *ha)
+{
+       struct scsi_qla_host *node;
+       u32 key = 0;
+
+       btree_for_each_safe32(&ha->tgt.host_map, key, node)
+               btree_remove32(&ha->tgt.host_map, key);
+
+       btree_destroy32(&ha->tgt.host_map);
+}
+
  static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn,
         unsigned char *b)
  {
@@ -6234,7 +6371,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked)
         struct atio_from_isp *pkt;
         int cnt, i;
  
-       if (!vha->flags.online)
+       if (!ha->flags.fw_started)
                 return;
  
         while ((ha->tgt.atio_ring_ptr->signature != ATIO_PROCESSED) ||
@@ -6581,6 +6718,8 @@ qlt_modify_vp_config(struct scsi_qla_host *vha,
  void
  qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
  {
+       int rc;
+
         if (!QLA_TGT_MODE_ENABLED())
                 return;
  
@@ -6600,6 +6739,13 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
             qlt_unknown_atio_work_fn);
  
         qlt_clear_mode(base_vha);
+
+       rc = btree_init32(&ha->tgt.host_map);
+       if (rc)
+               ql_log(ql_log_info, base_vha, 0xffff,
+                   "Unable to initialize ha->host_map btree\n");
+
+       qlt_update_vp_map(base_vha, SET_VP_IDX);
  }
  
  irqreturn_t
@@ -6642,6 +6788,8 @@ qlt_handle_abts_recv_work(struct work_struct *work)
         spin_lock_irqsave(&ha->hardware_lock, flags);
         qlt_response_pkt_all_vps(vha, (response_t *)&op->atio);
         spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+       kfree(op);
  }
  
  void
@@ -6706,25 +6854,69 @@ qlt_mem_free(struct qla_hw_data *ha)
  void
  qlt_update_vp_map(struct scsi_qla_host *vha, int cmd)
  {
+       void *slot;
+       u32 key;
+       int rc;
+
         if (!QLA_TGT_MODE_ENABLED())
                 return;
  
+       key = vha->d_id.b24;
+
         switch (cmd) {
         case SET_VP_IDX:
                 vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = vha;
                 break;
         case SET_AL_PA:
-               vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = vha->vp_idx;
+               slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+               if (!slot) {
+                       ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+                           "Save vha in host_map %p %06x\n", vha, key);
+                       rc = btree_insert32(&vha->hw->tgt.host_map,
+                               key, vha, GFP_ATOMIC);
+                       if (rc)
+                               ql_log(ql_log_info, vha, 0xffff,
+                                   "Unable to insert s_id into host_map: %06x\n",
+                                   key);
+                       return;
+               }
+               ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+                       "replace existing vha in host_map %p %06x\n", vha, key);
+               btree_update32(&vha->hw->tgt.host_map, key, vha);
                 break;
         case RESET_VP_IDX:
                 vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = NULL;
                 break;
         case RESET_AL_PA:
-               vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = 0;
+               ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+                  "clear vha in host_map %p %06x\n", vha, key);
+               slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+               if (slot)
+                       btree_remove32(&vha->hw->tgt.host_map, key);
+               vha->d_id.b24 = 0;
                 break;
         }
  }
  
+void qlt_update_host_map(struct scsi_qla_host *vha, port_id_t id)
+{
+       unsigned long flags;
+       struct qla_hw_data *ha = vha->hw;
+
+       if (!vha->d_id.b24) {
+               spin_lock_irqsave(&ha->vport_slock, flags);
+               vha->d_id = id;
+               qlt_update_vp_map(vha, SET_AL_PA);
+               spin_unlock_irqrestore(&ha->vport_slock, flags);
+       } else if (vha->d_id.b24 != id.b24) {
+               spin_lock_irqsave(&ha->vport_slock, flags);
+               qlt_update_vp_map(vha, RESET_AL_PA);
+               vha->d_id = id;
+               qlt_update_vp_map(vha, SET_AL_PA);
+               spin_unlock_irqrestore(&ha->vport_slock, flags);
+       }
+}
+
  static int __init qlt_parse_ini_mode(void)
  {
         if (strcasecmp(qlini_mode, QLA2XXX_INI_MODE_STR_EXCLUSIVE) == 0)
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h

index a7f90dcaae37d3eaad551544c6151785faf84cb9..d64420251194eb5fa634a36699ecf07c69e09edd 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -378,6 +378,14 @@ static inline void adjust_corrupted_atio(struct atio_from_isp *atio)
         atio->u.isp24.fcp_cmnd.add_cdb_len = 0;
  }
  
+static inline int get_datalen_for_atio(struct atio_from_isp *atio)
+{
+       int len = atio->u.isp24.fcp_cmnd.add_cdb_len;
+
+       return (be32_to_cpu(get_unaligned((uint32_t *)
+           &atio->u.isp24.fcp_cmnd.add_cdb[len * 4])));
+}
+
  #define CTIO_TYPE7 0x12 /* Continue target I/O entry (for 24xx) */
  
  /*
@@ -667,7 +675,6 @@ struct qla_tgt_func_tmpl {
         int (*handle_cmd)(struct scsi_qla_host *, struct qla_tgt_cmd *,
                         unsigned char *, uint32_t, int, int, int);
         void (*handle_data)(struct qla_tgt_cmd *);
-       void (*handle_dif_err)(struct qla_tgt_cmd *);
         int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint16_t,
                         uint32_t);
         void (*free_cmd)(struct qla_tgt_cmd *);
@@ -684,6 +691,9 @@ struct qla_tgt_func_tmpl {
         void (*clear_nacl_from_fcport_map)(struct fc_port *);
         void (*put_sess)(struct fc_port *);
         void (*shutdown_sess)(struct fc_port *);
+       int (*get_dif_tags)(struct qla_tgt_cmd *cmd, uint16_t *pfw_prot_opts);
+       int (*chk_dif_tags)(uint32_t tag);
+       void (*add_target)(struct scsi_qla_host *);
  };
  
  int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
@@ -720,8 +730,8 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
  #define QLA_TGT_ABORT_ALL               0xFFFE
  #define QLA_TGT_NEXUS_LOSS_SESS         0xFFFD
  #define QLA_TGT_NEXUS_LOSS              0xFFFC
-#define QLA_TGT_ABTS                                   0xFFFB
-#define QLA_TGT_2G_ABORT_TASK                  0xFFFA
+#define QLA_TGT_ABTS                   0xFFFB
+#define QLA_TGT_2G_ABORT_TASK          0xFFFA
  
  /* Notify Acknowledge flags */
  #define NOTIFY_ACK_RES_COUNT        BIT_8
@@ -845,6 +855,7 @@ enum trace_flags {
         TRC_CMD_FREE = BIT_17,
         TRC_DATA_IN = BIT_18,
         TRC_ABORT = BIT_19,
+       TRC_DIF_ERR = BIT_20,
  };
  
  struct qla_tgt_cmd {
@@ -862,7 +873,6 @@ struct qla_tgt_cmd {
         unsigned int sg_mapped:1;
         unsigned int free_sg:1;
         unsigned int write_data_transferred:1;
-       unsigned int ctx_dsd_alloced:1;
         unsigned int q_full:1;
         unsigned int term_exchg:1;
         unsigned int cmd_sent_to_fw:1;
@@ -885,11 +895,25 @@ struct qla_tgt_cmd {
         struct list_head cmd_list;
  
         struct atio_from_isp atio;
-       /* t10dif */
+
+       uint8_t ctx_dsd_alloced;
+
+       /* T10-DIF */
+#define DIF_ERR_NONE 0
+#define DIF_ERR_GRD 1
+#define DIF_ERR_REF 2
+#define DIF_ERR_APP 3
+       int8_t dif_err_code;
         struct scatterlist *prot_sg;
         uint32_t prot_sg_cnt;
-       uint32_t blk_sz;
+       uint32_t blk_sz, num_blks;
+       uint8_t scsi_status, sense_key, asc, ascq;
+
         struct crc_context *ctx;
+       uint8_t         *cdb;
+       uint64_t        lba;
+       uint16_t        a_guard, e_guard, a_app_tag, e_app_tag;
+       uint32_t        a_ref_tag, e_ref_tag;
  
         uint64_t jiffies_at_alloc;
         uint64_t jiffies_at_free;
@@ -1053,4 +1077,7 @@ extern int qlt_free_qfull_cmds(struct scsi_qla_host *);
  extern void qlt_logo_completion_handler(fc_port_t *, int);
  extern void qlt_do_generation_tick(struct scsi_qla_host *, int *);
  
+void qlt_send_resp_ctio(scsi_qla_host_t *, struct qla_tgt_cmd *, uint8_t,
+    uint8_t, uint8_t, uint8_t);
+
  #endif /* __QLA_TARGET_H */
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h

index 3cb1964b7786e4e2add64d7c8f5788fd73b90134..45bc84e8e3bf50f798616de47a2f348d684222b4 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,9 +7,9 @@
  /*
   * Driver version
   */
-#define QLA2XXX_VERSION      "8.07.00.38-k"
+#define QLA2XXX_VERSION      "9.00.00.00-k"
  
-#define QLA_DRIVER_MAJOR_VER   8
-#define QLA_DRIVER_MINOR_VER   7
+#define QLA_DRIVER_MAJOR_VER   9
+#define QLA_DRIVER_MINOR_VER   0
  #define QLA_DRIVER_PATCH_VER   0
  #define QLA_DRIVER_BETA_VER    0
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c

index 8e8ab0fa9672a6674d3cc9556beeccc44dfc70b2..7443e4efa3aed461f225f6b04bae9223f615dd0b 100644 (file)
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -531,6 +531,24 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
                         return;
                 }
  
+               switch (cmd->dif_err_code) {
+               case DIF_ERR_GRD:
+                       cmd->se_cmd.pi_err =
+                           TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
+                       break;
+               case DIF_ERR_REF:
+                       cmd->se_cmd.pi_err =
+                           TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
+                       break;
+               case DIF_ERR_APP:
+                       cmd->se_cmd.pi_err =
+                           TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
+                       break;
+               case DIF_ERR_NONE:
+               default:
+                       break;
+               }
+
                 if (cmd->se_cmd.pi_err)
                         transport_generic_request_failure(&cmd->se_cmd,
                                 cmd->se_cmd.pi_err);
@@ -555,25 +573,23 @@ static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
         queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
  }
  
-static void tcm_qla2xxx_handle_dif_work(struct work_struct *work)
+static int tcm_qla2xxx_chk_dif_tags(uint32_t tag)
  {
-       struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
-
-       /* take an extra kref to prevent cmd free too early.
-        * need to wait for SCSI status/check condition to
-        * finish responding generate by transport_generic_request_failure.
-        */
-       kref_get(&cmd->se_cmd.cmd_kref);
-       transport_generic_request_failure(&cmd->se_cmd, cmd->se_cmd.pi_err);
+       return 0;
  }
  
-/*
- * Called from qla_target.c:qlt_do_ctio_completion()
- */
-static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd)
+static int tcm_qla2xxx_dif_tags(struct qla_tgt_cmd *cmd,
+    uint16_t *pfw_prot_opts)
  {
-       INIT_WORK(&cmd->work, tcm_qla2xxx_handle_dif_work);
-       queue_work(tcm_qla2xxx_free_wq, &cmd->work);
+       struct se_cmd *se_cmd = &cmd->se_cmd;
+
+       if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD))
+               *pfw_prot_opts |= PO_DISABLE_GUARD_CHECK;
+
+       if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_APPTAG))
+               *pfw_prot_opts |= PO_DIS_APP_TAG_VALD;
+
+       return 0;
  }
  
  /*
@@ -1610,7 +1626,6 @@ static void tcm_qla2xxx_update_sess(struct fc_port *sess, port_id_t s_id,
  static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
         .handle_cmd             = tcm_qla2xxx_handle_cmd,
         .handle_data            = tcm_qla2xxx_handle_data,
-       .handle_dif_err         = tcm_qla2xxx_handle_dif_err,
         .handle_tmr             = tcm_qla2xxx_handle_tmr,
         .free_cmd               = tcm_qla2xxx_free_cmd,
         .free_mcmd              = tcm_qla2xxx_free_mcmd,
@@ -1622,6 +1637,8 @@ static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
         .clear_nacl_from_fcport_map = tcm_qla2xxx_clear_nacl_from_fcport_map,
         .put_sess               = tcm_qla2xxx_put_sess,
         .shutdown_sess          = tcm_qla2xxx_shutdown_sess,
+       .get_dif_tags           = tcm_qla2xxx_dif_tags,
+       .chk_dif_tags           = tcm_qla2xxx_chk_dif_tags,
  };
  
  static int tcm_qla2xxx_init_lport(struct tcm_qla2xxx_lport *lport)
diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c

new file mode 100644 (file)

index 0000000..a97c950
--- /dev/null
+++ b/drivers/scsi/scsi_debugfs.c
@@ -0,0 +1,13 @@
+#include <linux/seq_file.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_dbg.h>
+#include "scsi_debugfs.h"
+
+void scsi_show_rq(struct seq_file *m, struct request *rq)
+{
+       struct scsi_cmnd *cmd = container_of(scsi_req(rq), typeof(*cmd), req);
+       char buf[80];
+
+       __scsi_format_command(buf, sizeof(buf), cmd->cmnd, cmd->cmd_len);
+       seq_printf(m, ", .cmd=%s", buf);
+}
diff --git a/drivers/scsi/scsi_debugfs.h b/drivers/scsi/scsi_debugfs.h

new file mode 100644 (file)

index 0000000..951b043
--- /dev/null
+++ b/drivers/scsi/scsi_debugfs.h
@@ -0,0 +1,4 @@
+struct request;
+struct seq_file;
+
+void scsi_show_rq(struct seq_file *m, struct request *rq);
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c

index f2cafae150bcdb85292d86945cfb9d6e9fd7c340..2db412dd4b4477f973d25d24f543fb0601f2ba2a 100644 (file)
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1988,7 +1988,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
  
         req->rq_flags |= RQF_QUIET;
         req->timeout = 10 * HZ;
-       req->retries = 5;
+       rq->retries = 5;
  
         blk_execute_rq_nowait(req->q, NULL, req, 1, eh_lock_door_done);
  }
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c

index ba2286652ff647f285761e046f3699bed5c39eba..1c3e87d6c48f19132ea87eab5186a86464e248d9 100644 (file)
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -34,6 +34,7 @@
  
  #include <trace/events/scsi.h>
  
+#include "scsi_debugfs.h"
  #include "scsi_priv.h"
  #include "scsi_logging.h"
  
@@ -229,8 +230,8 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
   * @rq_flags:  flags for ->rq_flags
   * @resid:     optional residual length
   *
- * returns the req->errors value which is the scsi_cmnd result
- * field.
+ * Returns the scsi_cmnd result field if a command was executed, or a negative
+ * Linux error code if we didn't get that far.
   */
  int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
                  int data_direction, void *buffer, unsigned bufflen,
@@ -256,7 +257,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
  
         rq->cmd_len = COMMAND_SIZE(cmd[0]);
         memcpy(rq->cmd, cmd, rq->cmd_len);
-       req->retries = retries;
+       rq->retries = retries;
         req->timeout = timeout;
         req->cmd_flags |= flags;
         req->rq_flags |= rq_flags | RQF_QUIET | RQF_PREEMPT;
@@ -281,7 +282,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
                 memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
         if (sshdr)
                 scsi_normalize_sense(rq->sense, rq->sense_len, sshdr);
-       ret = req->errors;
+       ret = rq->result;
   out:
         blk_put_request(req);
  
@@ -496,7 +497,7 @@ static void scsi_run_queue(struct request_queue *q)
                 scsi_starved_list_run(sdev->host);
  
         if (q->mq_ops)
-               blk_mq_start_stopped_hw_queues(q, false);
+               blk_mq_run_hw_queues(q, false);
         else
                 blk_run_queue(q);
  }
@@ -667,7 +668,7 @@ static bool scsi_end_request(struct request *req, int error,
                     !list_empty(&sdev->host->starved_list))
                         kblockd_schedule_work(&sdev->requeue_work);
                 else
-                       blk_mq_start_stopped_hw_queues(q, true);
+                       blk_mq_run_hw_queues(q, true);
         } else {
                 unsigned long flags;
  
@@ -797,8 +798,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
                 /*
                  * __scsi_error_from_host_byte may have reset the host_byte
                  */
-               req->errors = cmd->result;
-
+               scsi_req(req)->result = cmd->result;
                 scsi_req(req)->resid_len = scsi_get_resid(cmd);
  
                 if (scsi_bidi_cmnd(cmd)) {
@@ -835,7 +835,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
         /*
          * Recovered errors need reporting, but they're always treated as
          * success, so fiddle the result code here.  For passthrough requests
-        * we already took a copy of the original into rq->errors which
+        * we already took a copy of the original into sreq->result which
          * is what gets returned to the user
          */
         if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) {
@@ -1061,10 +1061,10 @@ int scsi_init_io(struct scsi_cmnd *cmd)
         struct scsi_device *sdev = cmd->device;
         struct request *rq = cmd->request;
         bool is_mq = (rq->mq_ctx != NULL);
-       int error;
+       int error = BLKPREP_KILL;
  
         if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq)))
-               return -EINVAL;
+               goto err_exit;
  
         error = scsi_init_sgtable(rq, &cmd->sdb);
         if (error)
@@ -1177,7 +1177,7 @@ static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req)
         cmd->cmd_len = scsi_req(req)->cmd_len;
         cmd->cmnd = scsi_req(req)->cmd;
         cmd->transfersize = blk_rq_bytes(req);
-       cmd->allowed = req->retries;
+       cmd->allowed = scsi_req(req)->retries;
         return BLKPREP_OK;
  }
  
@@ -1281,7 +1281,7 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret)
         switch (ret) {
         case BLKPREP_KILL:
         case BLKPREP_INVALID:
-               req->errors = DID_NO_CONNECT << 16;
+               scsi_req(req)->result = DID_NO_CONNECT << 16;
                 /* release the command and kill it */
                 if (req->special) {
                         struct scsi_cmnd *cmd = req->special;
@@ -1905,7 +1905,7 @@ static int scsi_mq_prep_fn(struct request *req)
  static void scsi_mq_done(struct scsi_cmnd *cmd)
  {
         trace_scsi_dispatch_cmd_done(cmd);
-       blk_mq_complete_request(cmd->request, cmd->request->errors);
+       blk_mq_complete_request(cmd->request);
  }
  
  static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -1974,7 +1974,7 @@ out:
         case BLK_MQ_RQ_QUEUE_BUSY:
                 if (atomic_read(&sdev->device_busy) == 0 &&
                     !scsi_device_blocked(sdev))
-                       blk_mq_delay_queue(hctx, SCSI_QUEUE_DELAY);
+                       blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
                 break;
         case BLK_MQ_RQ_QUEUE_ERROR:
                 /*
@@ -2154,10 +2154,13 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
         return q;
  }
  
-static struct blk_mq_ops scsi_mq_ops = {
+static const struct blk_mq_ops scsi_mq_ops = {
         .queue_rq       = scsi_queue_rq,
         .complete       = scsi_softirq_done,
         .timeout        = scsi_timeout,
+#ifdef CONFIG_BLK_DEBUG_FS
+       .show_rq        = scsi_show_rq,
+#endif
         .init_request   = scsi_init_request,
         .exit_request   = scsi_exit_request,
         .map_queues     = scsi_map_queues,
@@ -2932,6 +2935,8 @@ EXPORT_SYMBOL(scsi_target_resume);
  /**
   * scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state
   * @sdev:      device to block
+ * @wait:      Whether or not to wait until ongoing .queuecommand() /
+ *             .queue_rq() calls have finished.
   *
   * Block request made by scsi lld's to temporarily stop all
   * scsi commands on the specified device. May sleep.
@@ -2949,7 +2954,7 @@ EXPORT_SYMBOL(scsi_target_resume);
   * remove the rport mutex lock and unlock calls from srp_queuecommand().
   */
  int
-scsi_internal_device_block(struct scsi_device *sdev)
+scsi_internal_device_block(struct scsi_device *sdev, bool wait)
  {
         struct request_queue *q = sdev->request_queue;
         unsigned long flags;
@@ -2969,12 +2974,16 @@ scsi_internal_device_block(struct scsi_device *sdev)
          * request queue. 
          */
         if (q->mq_ops) {
-               blk_mq_quiesce_queue(q);
+               if (wait)
+                       blk_mq_quiesce_queue(q);
+               else
+                       blk_mq_stop_hw_queues(q);
         } else {
                 spin_lock_irqsave(q->queue_lock, flags);
                 blk_stop_queue(q);
                 spin_unlock_irqrestore(q->queue_lock, flags);
-               scsi_wait_for_queuecommand(sdev);
+               if (wait)
+                       scsi_wait_for_queuecommand(sdev);
         }
  
         return 0;
@@ -3036,7 +3045,7 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
  static void
  device_block(struct scsi_device *sdev, void *data)
  {
-       scsi_internal_device_block(sdev);
+       scsi_internal_device_block(sdev, true);
  }
  
  static int
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h

index 99bfc985e1903bcffd10762d52f1a72448aaed96..f11bd102d6d5d6b5a390762448002cd5b0f357d7 100644 (file)
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -188,8 +188,5 @@ static inline void scsi_dh_remove_device(struct scsi_device *sdev) { }
   */
  
  #define SCSI_DEVICE_BLOCK_MAX_TIMEOUT  600     /* units in seconds */
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
-                                       enum scsi_device_state new_state);
  
  #endif /* _SCSI_PRIV_H */
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c

index cdbb293aca08fd3b386b9c03ca02c6f27fe2056e..9fdbd50c31b4545d455496e9e27f213c9eb6d7f9 100644 (file)
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -184,9 +184,9 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
                                 blk_rq_bytes(req->next_rq);
                 handler = to_sas_internal(shost->transportt)->f->smp_handler;
                 ret = handler(shost, rphy, req);
-               req->errors = ret;
+               scsi_req(req)->result = ret;
  
-               blk_end_request_all(req, ret);
+               blk_end_request_all(req, 0);
  
                 spin_lock_irq(q->queue_lock);
         }
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c

index c7839f6c35ccc479c8f7a044407b35f203cd3102..0dc95e102e69736caccd9745e97cdf76842999fb 100644 (file)
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -418,6 +418,46 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
  }
  static DEVICE_ATTR_RW(provisioning_mode);
  
+static const char *zeroing_mode[] = {
+       [SD_ZERO_WRITE]         = "write",
+       [SD_ZERO_WS]            = "writesame",
+       [SD_ZERO_WS16_UNMAP]    = "writesame_16_unmap",
+       [SD_ZERO_WS10_UNMAP]    = "writesame_10_unmap",
+};
+
+static ssize_t
+zeroing_mode_show(struct device *dev, struct device_attribute *attr,
+                 char *buf)
+{
+       struct scsi_disk *sdkp = to_scsi_disk(dev);
+
+       return snprintf(buf, 20, "%s\n", zeroing_mode[sdkp->zeroing_mode]);
+}
+
+static ssize_t
+zeroing_mode_store(struct device *dev, struct device_attribute *attr,
+                  const char *buf, size_t count)
+{
+       struct scsi_disk *sdkp = to_scsi_disk(dev);
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       if (!strncmp(buf, zeroing_mode[SD_ZERO_WRITE], 20))
+               sdkp->zeroing_mode = SD_ZERO_WRITE;
+       else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS], 20))
+               sdkp->zeroing_mode = SD_ZERO_WS;
+       else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS16_UNMAP], 20))
+               sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP;
+       else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS10_UNMAP], 20))
+               sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP;
+       else
+               return -EINVAL;
+
+       return count;
+}
+static DEVICE_ATTR_RW(zeroing_mode);
+
  static ssize_t
  max_medium_access_timeouts_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
@@ -496,6 +536,7 @@ static struct attribute *sd_disk_attrs[] = {
         &dev_attr_app_tag_own.attr,
         &dev_attr_thin_provisioning.attr,
         &dev_attr_provisioning_mode.attr,
+       &dev_attr_zeroing_mode.attr,
         &dev_attr_max_write_same_blocks.attr,
         &dev_attr_max_medium_access_timeouts.attr,
         NULL,
@@ -644,26 +685,11 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
         unsigned int logical_block_size = sdkp->device->sector_size;
         unsigned int max_blocks = 0;
  
-       q->limits.discard_zeroes_data = 0;
-
-       /*
-        * When LBPRZ is reported, discard alignment and granularity
-        * must be fixed to the logical block size. Otherwise the block
-        * layer will drop misaligned portions of the request which can
-        * lead to data corruption. If LBPRZ is not set, we honor the
-        * device preference.
-        */
-       if (sdkp->lbprz) {
-               q->limits.discard_alignment = 0;
-               q->limits.discard_granularity = logical_block_size;
-       } else {
-               q->limits.discard_alignment = sdkp->unmap_alignment *
-                       logical_block_size;
-               q->limits.discard_granularity =
-                       max(sdkp->physical_block_size,
-                           sdkp->unmap_granularity * logical_block_size);
-       }
-
+       q->limits.discard_alignment =
+               sdkp->unmap_alignment * logical_block_size;
+       q->limits.discard_granularity =
+               max(sdkp->physical_block_size,
+                   sdkp->unmap_granularity * logical_block_size);
         sdkp->provisioning_mode = mode;
  
         switch (mode) {
@@ -681,19 +707,16 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
         case SD_LBP_WS16:
                 max_blocks = min_not_zero(sdkp->max_ws_blocks,
                                           (u32)SD_MAX_WS16_BLOCKS);
-               q->limits.discard_zeroes_data = sdkp->lbprz;
                 break;
  
         case SD_LBP_WS10:
                 max_blocks = min_not_zero(sdkp->max_ws_blocks,
                                           (u32)SD_MAX_WS10_BLOCKS);
-               q->limits.discard_zeroes_data = sdkp->lbprz;
                 break;
  
         case SD_LBP_ZERO:
                 max_blocks = min_not_zero(sdkp->max_ws_blocks,
                                           (u32)SD_MAX_WS10_BLOCKS);
-               q->limits.discard_zeroes_data = 1;
                 break;
         }
  
@@ -701,93 +724,122 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
  }
  
-/**
- * sd_setup_discard_cmnd - unmap blocks on thinly provisioned device
- * @sdp: scsi device to operate on
- * @rq: Request to prepare
- *
- * Will issue either UNMAP or WRITE SAME(16) depending on preference
- * indicated by target device.
- **/
-static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
+static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
  {
-       struct request *rq = cmd->request;
         struct scsi_device *sdp = cmd->device;
-       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
-       sector_t sector = blk_rq_pos(rq);
-       unsigned int nr_sectors = blk_rq_sectors(rq);
-       unsigned int len;
-       int ret;
+       struct request *rq = cmd->request;
+       u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+       u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+       unsigned int data_len = 24;
         char *buf;
-       struct page *page;
  
-       sector >>= ilog2(sdp->sector_size) - 9;
-       nr_sectors >>= ilog2(sdp->sector_size) - 9;
-
-       page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
-       if (!page)
+       rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+       if (!rq->special_vec.bv_page)
                 return BLKPREP_DEFER;
+       rq->special_vec.bv_offset = 0;
+       rq->special_vec.bv_len = data_len;
+       rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
  
-       switch (sdkp->provisioning_mode) {
-       case SD_LBP_UNMAP:
-               buf = page_address(page);
+       cmd->cmd_len = 10;
+       cmd->cmnd[0] = UNMAP;
+       cmd->cmnd[8] = 24;
  
-               cmd->cmd_len = 10;
-               cmd->cmnd[0] = UNMAP;
-               cmd->cmnd[8] = 24;
+       buf = page_address(rq->special_vec.bv_page);
+       put_unaligned_be16(6 + 16, &buf[0]);
+       put_unaligned_be16(16, &buf[2]);
+       put_unaligned_be64(sector, &buf[8]);
+       put_unaligned_be32(nr_sectors, &buf[16]);
  
-               put_unaligned_be16(6 + 16, &buf[0]);
-               put_unaligned_be16(16, &buf[2]);
-               put_unaligned_be64(sector, &buf[8]);
-               put_unaligned_be32(nr_sectors, &buf[16]);
+       cmd->allowed = SD_MAX_RETRIES;
+       cmd->transfersize = data_len;
+       rq->timeout = SD_TIMEOUT;
+       scsi_req(rq)->resid_len = data_len;
  
-               len = 24;
-               break;
+       return scsi_init_io(cmd);
+}
  
-       case SD_LBP_WS16:
-               cmd->cmd_len = 16;
-               cmd->cmnd[0] = WRITE_SAME_16;
+static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap)
+{
+       struct scsi_device *sdp = cmd->device;
+       struct request *rq = cmd->request;
+       u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+       u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+       u32 data_len = sdp->sector_size;
+
+       rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+       if (!rq->special_vec.bv_page)
+               return BLKPREP_DEFER;
+       rq->special_vec.bv_offset = 0;
+       rq->special_vec.bv_len = data_len;
+       rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+       cmd->cmd_len = 16;
+       cmd->cmnd[0] = WRITE_SAME_16;
+       if (unmap)
                 cmd->cmnd[1] = 0x8; /* UNMAP */
-               put_unaligned_be64(sector, &cmd->cmnd[2]);
-               put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
+       put_unaligned_be64(sector, &cmd->cmnd[2]);
+       put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
  
-               len = sdkp->device->sector_size;
-               break;
+       cmd->allowed = SD_MAX_RETRIES;
+       cmd->transfersize = data_len;
+       rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
+       scsi_req(rq)->resid_len = data_len;
  
-       case SD_LBP_WS10:
-       case SD_LBP_ZERO:
-               cmd->cmd_len = 10;
-               cmd->cmnd[0] = WRITE_SAME;
-               if (sdkp->provisioning_mode == SD_LBP_WS10)
-                       cmd->cmnd[1] = 0x8; /* UNMAP */
-               put_unaligned_be32(sector, &cmd->cmnd[2]);
-               put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
+       return scsi_init_io(cmd);
+}
  
-               len = sdkp->device->sector_size;
-               break;
+static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
+{
+       struct scsi_device *sdp = cmd->device;
+       struct request *rq = cmd->request;
+       u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+       u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+       u32 data_len = sdp->sector_size;
  
-       default:
-               ret = BLKPREP_INVALID;
-               goto out;
-       }
+       rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+       if (!rq->special_vec.bv_page)
+               return BLKPREP_DEFER;
+       rq->special_vec.bv_offset = 0;
+       rq->special_vec.bv_len = data_len;
+       rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
  
-       rq->timeout = SD_TIMEOUT;
+       cmd->cmd_len = 10;
+       cmd->cmnd[0] = WRITE_SAME;
+       if (unmap)
+               cmd->cmnd[1] = 0x8; /* UNMAP */
+       put_unaligned_be32(sector, &cmd->cmnd[2]);
+       put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
  
-       cmd->transfersize = len;
         cmd->allowed = SD_MAX_RETRIES;
+       cmd->transfersize = data_len;
+       rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
+       scsi_req(rq)->resid_len = data_len;
  
-       rq->special_vec.bv_page = page;
-       rq->special_vec.bv_offset = 0;
-       rq->special_vec.bv_len = len;
+       return scsi_init_io(cmd);
+}
  
-       rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
-       scsi_req(rq)->resid_len = len;
+static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
+{
+       struct request *rq = cmd->request;
+       struct scsi_device *sdp = cmd->device;
+       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+       u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+       u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+
+       if (!(rq->cmd_flags & REQ_NOUNMAP)) {
+               switch (sdkp->zeroing_mode) {
+               case SD_ZERO_WS16_UNMAP:
+                       return sd_setup_write_same16_cmnd(cmd, true);
+               case SD_ZERO_WS10_UNMAP:
+                       return sd_setup_write_same10_cmnd(cmd, true);
+               }
+       }
  
-       ret = scsi_init_io(cmd);
-out:
-       if (ret != BLKPREP_OK)
-               __free_page(page);
-       return ret;
+       if (sdp->no_write_same)
+               return BLKPREP_INVALID;
+       if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
+               return sd_setup_write_same16_cmnd(cmd, false);
+       return sd_setup_write_same10_cmnd(cmd, false);
  }
  
  static void sd_config_write_same(struct scsi_disk *sdkp)
@@ -816,9 +868,20 @@ static void sd_config_write_same(struct scsi_disk *sdkp)
                 sdkp->max_ws_blocks = 0;
         }
  
+       if (sdkp->lbprz && sdkp->lbpws)
+               sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP;
+       else if (sdkp->lbprz && sdkp->lbpws10)
+               sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP;
+       else if (sdkp->max_ws_blocks)
+               sdkp->zeroing_mode = SD_ZERO_WS;
+       else
+               sdkp->zeroing_mode = SD_ZERO_WRITE;
+
  out:
         blk_queue_max_write_same_sectors(q, sdkp->max_ws_blocks *
                                          (logical_block_size >> 9));
+       blk_queue_max_write_zeroes_sectors(q, sdkp->max_ws_blocks *
+                                        (logical_block_size >> 9));
  }
  
  /**
@@ -1155,7 +1218,20 @@ static int sd_init_command(struct scsi_cmnd *cmd)
  
         switch (req_op(rq)) {
         case REQ_OP_DISCARD:
-               return sd_setup_discard_cmnd(cmd);
+               switch (scsi_disk(rq->rq_disk)->provisioning_mode) {
+               case SD_LBP_UNMAP:
+                       return sd_setup_unmap_cmnd(cmd);
+               case SD_LBP_WS16:
+                       return sd_setup_write_same16_cmnd(cmd, true);
+               case SD_LBP_WS10:
+                       return sd_setup_write_same10_cmnd(cmd, true);
+               case SD_LBP_ZERO:
+                       return sd_setup_write_same10_cmnd(cmd, false);
+               default:
+                       return BLKPREP_INVALID;
+               }
+       case REQ_OP_WRITE_ZEROES:
+               return sd_setup_write_zeroes_cmnd(cmd);
         case REQ_OP_WRITE_SAME:
                 return sd_setup_write_same_cmnd(cmd);
         case REQ_OP_FLUSH:
@@ -1783,6 +1859,8 @@ static int sd_done(struct scsi_cmnd *SCpnt)
  {
         int result = SCpnt->result;
         unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
+       unsigned int sector_size = SCpnt->device->sector_size;
+       unsigned int resid;
         struct scsi_sense_hdr sshdr;
         struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
         struct request *req = SCpnt->request;
@@ -1793,6 +1871,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
  
         switch (req_op(req)) {
         case REQ_OP_DISCARD:
+       case REQ_OP_WRITE_ZEROES:
         case REQ_OP_WRITE_SAME:
         case REQ_OP_ZONE_RESET:
                 if (!result) {
@@ -1813,6 +1892,21 @@ static int sd_done(struct scsi_cmnd *SCpnt)
                         scsi_set_resid(SCpnt, blk_rq_bytes(req));
                 }
                 break;
+       default:
+               /*
+                * In case of bogus fw or device, we could end up having
+                * an unaligned partial completion. Check this here and force
+                * alignment.
+                */
+               resid = scsi_get_resid(SCpnt);
+               if (resid & (sector_size - 1)) {
+                       sd_printk(KERN_INFO, sdkp,
+                               "Unaligned partial completion (resid=%u, sector_sz=%u)\n",
+                               resid, sector_size);
+                       resid = min(scsi_bufflen(SCpnt),
+                                   round_up(resid, sector_size));
+                       scsi_set_resid(SCpnt, resid);
+               }
         }
  
         if (result) {
@@ -2085,6 +2179,22 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp,
  
  #define READ_CAPACITY_RETRIES_ON_RESET 10
  
+/*
+ * Ensure that we don't overflow sector_t when CONFIG_LBDAF is not set
+ * and the reported logical block size is bigger than 512 bytes. Note
+ * that last_sector is a u64 and therefore logical_to_sectors() is not
+ * applicable.
+ */
+static bool sd_addressable_capacity(u64 lba, unsigned int sector_size)
+{
+       u64 last_sector = (lba + 1ULL) << (ilog2(sector_size) - 9);
+
+       if (sizeof(sector_t) == 4 && last_sector > U32_MAX)
+               return false;
+
+       return true;
+}
+
  static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
                                                 unsigned char *buffer)
  {
@@ -2150,7 +2260,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
                 return -ENODEV;
         }
  
-       if ((sizeof(sdkp->capacity) == 4) && (lba >= 0xffffffffULL)) {
+       if (!sd_addressable_capacity(lba, sector_size)) {
                 sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a "
                         "kernel compiled with support for large block "
                         "devices.\n");
@@ -2239,7 +2349,7 @@ static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp,
                 return sector_size;
         }
  
-       if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) {
+       if (!sd_addressable_capacity(lba, sector_size)) {
                 sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a "
                         "kernel compiled with support for large block "
                         "devices.\n");
@@ -2735,7 +2845,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
                                 sd_config_discard(sdkp, SD_LBP_WS16);
  
                 } else {        /* LBP VPD page tells us what to use */
-                       if (sdkp->lbpu && sdkp->max_unmap_blocks && !sdkp->lbprz)
+                       if (sdkp->lbpu && sdkp->max_unmap_blocks)
                                 sd_config_discard(sdkp, SD_LBP_UNMAP);
                         else if (sdkp->lbpws)
                                 sd_config_discard(sdkp, SD_LBP_WS16);
@@ -2939,7 +3049,8 @@ static int sd_revalidate_disk(struct gendisk *disk)
                 q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
                 rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
         } else
-               rw_max = BLK_DEF_MAX_SECTORS;
+               rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
+                                     (sector_t)BLK_DEF_MAX_SECTORS);
  
         /* Combine with controller limits */
         q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
@@ -3075,23 +3186,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
         put_device(&sdkp->dev);
  }
  
-struct sd_devt {
-       int idx;
-       struct disk_devt disk_devt;
-};
-
-static void sd_devt_release(struct disk_devt *disk_devt)
-{
-       struct sd_devt *sd_devt = container_of(disk_devt, struct sd_devt,
-                       disk_devt);
-
-       spin_lock(&sd_index_lock);
-       ida_remove(&sd_index_ida, sd_devt->idx);
-       spin_unlock(&sd_index_lock);
-
-       kfree(sd_devt);
-}
-
  /**
   *     sd_probe - called during driver initialization and whenever a
   *     new scsi device is attached to the system. It is called once
@@ -3113,7 +3207,6 @@ static void sd_devt_release(struct disk_devt *disk_devt)
  static int sd_probe(struct device *dev)
  {
         struct scsi_device *sdp = to_scsi_device(dev);
-       struct sd_devt *sd_devt;
         struct scsi_disk *sdkp;
         struct gendisk *gd;
         int index;
@@ -3139,13 +3232,9 @@ static int sd_probe(struct device *dev)
         if (!sdkp)
                 goto out;
  
-       sd_devt = kzalloc(sizeof(*sd_devt), GFP_KERNEL);
-       if (!sd_devt)
-               goto out_free;
-
         gd = alloc_disk(SD_MINORS);
         if (!gd)
-               goto out_free_devt;
+               goto out_free;
  
         do {
                 if (!ida_pre_get(&sd_index_ida, GFP_KERNEL))
@@ -3161,11 +3250,6 @@ static int sd_probe(struct device *dev)
                 goto out_put;
         }
  
-       atomic_set(&sd_devt->disk_devt.count, 1);
-       sd_devt->disk_devt.release = sd_devt_release;
-       sd_devt->idx = index;
-       gd->disk_devt = &sd_devt->disk_devt;
-
         error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
         if (error) {
                 sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n");
@@ -3205,12 +3289,11 @@ static int sd_probe(struct device *dev)
         return 0;
  
   out_free_index:
-       put_disk_devt(&sd_devt->disk_devt);
-       sd_devt = NULL;
+       spin_lock(&sd_index_lock);
+       ida_remove(&sd_index_ida, index);
+       spin_unlock(&sd_index_lock);
   out_put:
         put_disk(gd);
- out_free_devt:
-       kfree(sd_devt);
   out_free:
         kfree(sdkp);
   out:
@@ -3271,7 +3354,10 @@ static void scsi_disk_release(struct device *dev)
         struct scsi_disk *sdkp = to_scsi_disk(dev);
         struct gendisk *disk = sdkp->disk;
         
-       put_disk_devt(disk->disk_devt);
+       spin_lock(&sd_index_lock);
+       ida_remove(&sd_index_ida, sdkp->index);
+       spin_unlock(&sd_index_lock);
+
         disk->private_data = NULL;
         put_disk(disk);
         put_device(&sdkp->device->sdev_gendev);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h

index 4dac35e96a75baf833acbfe017c3edef8f8f23b8..a2c4b5c35379d1747a4857fa80bd95b1cca3c9e4 100644 (file)
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -59,6 +59,13 @@ enum {
         SD_LBP_DISABLE,         /* Discard disabled due to failed cmd */
  };
  
+enum {
+       SD_ZERO_WRITE = 0,      /* Use WRITE(10/16) command */
+       SD_ZERO_WS,             /* Use WRITE SAME(10/16) command */
+       SD_ZERO_WS16_UNMAP,     /* Use WRITE SAME(16) with UNMAP */
+       SD_ZERO_WS10_UNMAP,     /* Use WRITE SAME(10) with UNMAP */
+};
+
  struct scsi_disk {
         struct scsi_driver *driver;     /* always &sd_template */
         struct scsi_device *device;
@@ -89,6 +96,7 @@ struct scsi_disk {
         u8              write_prot;
         u8              protection_type;/* Data Integrity Field */
         u8              provisioning_mode;
+       u8              zeroing_mode;
         unsigned        ATO : 1;        /* state of disk ATO bit */
         unsigned        cache_override : 1; /* temp override of WCE,RCD */
         unsigned        WCE : 1;        /* state of disk WCE bit */
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c

index 92620c8ea8ad93722c466652a4b82cd43013d8a8..1994f7799fceddfb7705a6ab4b36a370ece0fcbd 100644 (file)
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -329,6 +329,7 @@ void sd_zbc_complete(struct scsi_cmnd *cmd,
  
         switch (req_op(rq)) {
         case REQ_OP_WRITE:
+       case REQ_OP_WRITE_ZEROES:
         case REQ_OP_WRITE_SAME:
         case REQ_OP_ZONE_RESET:
  
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c

index 29b86505f796d9fa57c19b958c1c7ae4ef31b298..504504beaa5e0bb2033dafc95ed2e59f779d94ec 100644 (file)
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -996,6 +996,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
                 result = get_user(val, ip);
                 if (result)
                         return result;
+               if (val > SG_MAX_CDB_SIZE)
+                       return -ENOMEM;
                 sfp->next_cmd_len = (val > 0) ? val : 0;
                 return 0;
         case SG_GET_VERSION_NUM:
@@ -1298,7 +1300,7 @@ sg_rq_end_io(struct request *rq, int uptodate)
                 pr_info("%s: device detaching\n", __func__);
  
         sense = req->sense;
-       result = rq->errors;
+       result = req->result;
         resid = req->resid_len;
  
         SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp,
@@ -1716,7 +1718,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
  
         srp->rq = rq;
         rq->end_io_data = srp;
-       rq->retries = SG_DEFAULT_RETRIES;
+       req->retries = SG_DEFAULT_RETRIES;
  
         if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE))
                 return 0;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c

index 0b29b9329b1c2f5c8207188884498f314e0a414a..a8f630213a1a0fce250e23b00a9452676389b159 100644 (file)
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -836,6 +836,7 @@ static void get_capabilities(struct scsi_cd *cd)
         unsigned char *buffer;
         struct scsi_mode_data data;
         struct scsi_sense_hdr sshdr;
+       unsigned int ms_len = 128;
         int rc, n;
  
         static const char *loadmech[] =
@@ -862,10 +863,11 @@ static void get_capabilities(struct scsi_cd *cd)
         scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr);
  
         /* ask for mode page 0x2a */
-       rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128,
+       rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, ms_len,
                              SR_TIMEOUT, 3, &data, NULL);
  
-       if (!scsi_status_is_good(rc)) {
+       if (!scsi_status_is_good(rc) || data.length > ms_len ||
+           data.header_length + data.block_descriptor_length > data.length) {
                 /* failed, drive doesn't have capabilities mode page */
                 cd->cdi.speed = 1;
                 cd->cdi.mask |= (CDC_CD_R | CDC_CD_RW | CDC_DVD_R |
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c

index e5ef78a6848ef11f0eee8e9c462f5d8daecd59dc..1ea34d6f54370f0beece52e2de1d7b3aa76516fe 100644 (file)
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -480,7 +480,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
                 atomic64_add(ktime_to_ns(now), &STp->stats->tot_write_time);
                 atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time);
                 atomic64_inc(&STp->stats->write_cnt);
-               if (req->errors) {
+               if (scsi_req(req)->result) {
                         atomic64_add(atomic_read(&STp->stats->last_write_size)
                                 - STp->buffer->cmdstat.residual,
                                 &STp->stats->write_byte_cnt);
@@ -494,7 +494,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
                 atomic64_add(ktime_to_ns(now), &STp->stats->tot_read_time);
                 atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time);
                 atomic64_inc(&STp->stats->read_cnt);
-               if (req->errors) {
+               if (scsi_req(req)->result) {
                         atomic64_add(atomic_read(&STp->stats->last_read_size)
                                 - STp->buffer->cmdstat.residual,
                                 &STp->stats->read_byte_cnt);
@@ -518,7 +518,7 @@ static void st_scsi_execute_end(struct request *req, int uptodate)
         struct scsi_tape *STp = SRpnt->stp;
         struct bio *tmp;
  
-       STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
+       STp->buffer->cmdstat.midlevel_result = SRpnt->result = rq->result;
         STp->buffer->cmdstat.residual = rq->resid_len;
  
         st_do_stats(STp, req);
@@ -579,7 +579,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
         memset(rq->cmd, 0, BLK_MAX_CDB);
         memcpy(rq->cmd, cmd, rq->cmd_len);
         req->timeout = timeout;
-       req->retries = retries;
+       rq->retries = retries;
         req->end_io_data = SRpnt;
  
         blk_execute_rq_nowait(req->q, NULL, req, 1, st_scsi_execute_end);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c

index 638e5f427c901fddee96a53328cd8cc76f1368be..016639d7fef176da5e54ddc5d4d32e110ea0656a 100644 (file)
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -400,8 +400,6 @@ MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels")
   */
  static int storvsc_timeout = 180;
  
-static int msft_blist_flags = BLIST_TRY_VPD_PAGES;
-
  #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
  static struct scsi_transport_template *fc_transport_template;
  #endif
@@ -1383,6 +1381,22 @@ static int storvsc_do_io(struct hv_device *device,
         return ret;
  }
  
+static int storvsc_device_alloc(struct scsi_device *sdevice)
+{
+       /*
+        * Set blist flag to permit the reading of the VPD pages even when
+        * the target may claim SPC-2 compliance. MSFT targets currently
+        * claim SPC-2 compliance while they implement post SPC-2 features.
+        * With this flag we can correctly handle WRITE_SAME_16 issues.
+        *
+        * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but
+        * still supports REPORT LUN.
+        */
+       sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES;
+
+       return 0;
+}
+
  static int storvsc_device_configure(struct scsi_device *sdevice)
  {
  
@@ -1395,14 +1409,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
  
         sdevice->no_write_same = 1;
  
-       /*
-        * Add blist flags to permit the reading of the VPD pages even when
-        * the target may claim SPC-2 compliance. MSFT targets currently
-        * claim SPC-2 compliance while they implement post SPC-2 features.
-        * With this patch we can correctly handle WRITE_SAME_16 issues.
-        */
-       sdevice->sdev_bflags |= msft_blist_flags;
-
         /*
          * If the host is WIN8 or WIN8 R2, claim conformance to SPC-3
          * if the device is a MSFT virtual device.  If the host is
@@ -1661,6 +1667,7 @@ static struct scsi_host_template scsi_driver = {
         .eh_host_reset_handler =        storvsc_host_reset_handler,
         .proc_name =            "storvsc_host",
         .eh_timed_out =         storvsc_eh_timed_out,
+       .slave_alloc =          storvsc_device_alloc,
         .slave_configure =      storvsc_device_configure,
         .cmd_per_lun =          255,
         .this_id =              -1,
diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h

index 318e4a1f76c92bab27954b5cc29a3d374c8eb6e7..54deeb754db5fccf7d918b604916b30d8defa8e5 100644 (file)
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -146,7 +146,7 @@ enum attr_idn {
  /* Descriptor idn for Query requests */
  enum desc_idn {
         QUERY_DESC_IDN_DEVICE           = 0x0,
-       QUERY_DESC_IDN_CONFIGURAION     = 0x1,
+       QUERY_DESC_IDN_CONFIGURATION    = 0x1,
         QUERY_DESC_IDN_UNIT             = 0x2,
         QUERY_DESC_IDN_RFU_0            = 0x3,
         QUERY_DESC_IDN_INTERCONNECT     = 0x4,
@@ -162,19 +162,13 @@ enum desc_header_offset {
         QUERY_DESC_DESC_TYPE_OFFSET     = 0x01,
  };
  
-enum ufs_desc_max_size {
-       QUERY_DESC_DEVICE_MAX_SIZE              = 0x40,
-       QUERY_DESC_CONFIGURAION_MAX_SIZE        = 0x90,
-       QUERY_DESC_UNIT_MAX_SIZE                = 0x23,
-       QUERY_DESC_INTERCONNECT_MAX_SIZE        = 0x06,
-       /*
-        * Max. 126 UNICODE characters (2 bytes per character) plus 2 bytes
-        * of descriptor header.
-        */
-       QUERY_DESC_STRING_MAX_SIZE              = 0xFE,
-       QUERY_DESC_GEOMETRY_MAX_SIZE            = 0x44,
-       QUERY_DESC_POWER_MAX_SIZE               = 0x62,
-       QUERY_DESC_RFU_MAX_SIZE                 = 0x00,
+enum ufs_desc_def_size {
+       QUERY_DESC_DEVICE_DEF_SIZE              = 0x40,
+       QUERY_DESC_CONFIGURATION_DEF_SIZE       = 0x90,
+       QUERY_DESC_UNIT_DEF_SIZE                = 0x23,
+       QUERY_DESC_INTERCONNECT_DEF_SIZE        = 0x06,
+       QUERY_DESC_GEOMETRY_DEF_SIZE            = 0x44,
+       QUERY_DESC_POWER_DEF_SIZE               = 0x62,
  };
  
  /* Unit descriptor parameters offsets in bytes*/
diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c

index a72a4ba78125b09a135c781533d46b1cbf0bc042..8e5e6c04c035e1e134d84670ff19459597df35b1 100644 (file)
--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
@@ -309,8 +309,8 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
  
         mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
         mmio_base = devm_ioremap_resource(dev, mem_res);
-       if (IS_ERR(*(void **)&mmio_base)) {
-               err = PTR_ERR(*(void **)&mmio_base);
+       if (IS_ERR(mmio_base)) {
+               err = PTR_ERR(mmio_base);
                 goto out;
         }
  
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c

index dc6efbd1be8ef344bb994589054c24843e442230..096e95b911bd7b29abf2c06a58191b76ce0a4139 100644 (file)
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -100,19 +100,6 @@
  #define ufshcd_hex_dump(prefix_str, buf, len) \
  print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 4, buf, len, false)
  
-static u32 ufs_query_desc_max_size[] = {
-       QUERY_DESC_DEVICE_MAX_SIZE,
-       QUERY_DESC_CONFIGURAION_MAX_SIZE,
-       QUERY_DESC_UNIT_MAX_SIZE,
-       QUERY_DESC_RFU_MAX_SIZE,
-       QUERY_DESC_INTERCONNECT_MAX_SIZE,
-       QUERY_DESC_STRING_MAX_SIZE,
-       QUERY_DESC_RFU_MAX_SIZE,
-       QUERY_DESC_GEOMETRY_MAX_SIZE,
-       QUERY_DESC_POWER_MAX_SIZE,
-       QUERY_DESC_RFU_MAX_SIZE,
-};
-
  enum {
         UFSHCD_MAX_CHANNEL      = 0,
         UFSHCD_MAX_ID           = 1,
@@ -2857,7 +2844,7 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba,
                 goto out;
         }
  
-       if (*buf_len <= QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
+       if (*buf_len < QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
                 dev_err(hba->dev, "%s: descriptor buffer size (%d) is out of range\n",
                                 __func__, *buf_len);
                 err = -EINVAL;
@@ -2937,6 +2924,92 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
         return err;
  }
  
+/**
+ * ufshcd_read_desc_length - read the specified descriptor length from header
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_index: descriptor index
+ * @desc_length: pointer to variable to read the length of descriptor
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+static int ufshcd_read_desc_length(struct ufs_hba *hba,
+       enum desc_idn desc_id,
+       int desc_index,
+       int *desc_length)
+{
+       int ret;
+       u8 header[QUERY_DESC_HDR_SIZE];
+       int header_len = QUERY_DESC_HDR_SIZE;
+
+       if (desc_id >= QUERY_DESC_IDN_MAX)
+               return -EINVAL;
+
+       ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
+                                       desc_id, desc_index, 0, header,
+                                       &header_len);
+
+       if (ret) {
+               dev_err(hba->dev, "%s: Failed to get descriptor header id %d",
+                       __func__, desc_id);
+               return ret;
+       } else if (desc_id != header[QUERY_DESC_DESC_TYPE_OFFSET]) {
+               dev_warn(hba->dev, "%s: descriptor header id %d and desc_id %d mismatch",
+                       __func__, header[QUERY_DESC_DESC_TYPE_OFFSET],
+                       desc_id);
+               ret = -EINVAL;
+       }
+
+       *desc_length = header[QUERY_DESC_LENGTH_OFFSET];
+       return ret;
+
+}
+
+/**
+ * ufshcd_map_desc_id_to_length - map descriptor IDN to its length
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_len: mapped desc length (out)
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba,
+       enum desc_idn desc_id, int *desc_len)
+{
+       switch (desc_id) {
+       case QUERY_DESC_IDN_DEVICE:
+               *desc_len = hba->desc_size.dev_desc;
+               break;
+       case QUERY_DESC_IDN_POWER:
+               *desc_len = hba->desc_size.pwr_desc;
+               break;
+       case QUERY_DESC_IDN_GEOMETRY:
+               *desc_len = hba->desc_size.geom_desc;
+               break;
+       case QUERY_DESC_IDN_CONFIGURATION:
+               *desc_len = hba->desc_size.conf_desc;
+               break;
+       case QUERY_DESC_IDN_UNIT:
+               *desc_len = hba->desc_size.unit_desc;
+               break;
+       case QUERY_DESC_IDN_INTERCONNECT:
+               *desc_len = hba->desc_size.interc_desc;
+               break;
+       case QUERY_DESC_IDN_STRING:
+               *desc_len = QUERY_DESC_MAX_SIZE;
+               break;
+       case QUERY_DESC_IDN_RFU_0:
+       case QUERY_DESC_IDN_RFU_1:
+               *desc_len = 0;
+               break;
+       default:
+               *desc_len = 0;
+               return -EINVAL;
+       }
+       return 0;
+}
+EXPORT_SYMBOL(ufshcd_map_desc_id_to_length);
+
  /**
   * ufshcd_read_desc_param - read the specified descriptor parameter
   * @hba: Pointer to adapter instance
@@ -2951,42 +3024,49 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
  static int ufshcd_read_desc_param(struct ufs_hba *hba,
                                   enum desc_idn desc_id,
                                   int desc_index,
-                                 u32 param_offset,
+                                 u8 param_offset,
                                   u8 *param_read_buf,
-                                 u32 param_size)
+                                 u8 param_size)
  {
         int ret;
         u8 *desc_buf;
-       u32 buff_len;
+       int buff_len;
         bool is_kmalloc = true;
  
-       /* safety checks */
-       if (desc_id >= QUERY_DESC_IDN_MAX)
+       /* Safety check */
+       if (desc_id >= QUERY_DESC_IDN_MAX || !param_size)
                 return -EINVAL;
  
-       buff_len = ufs_query_desc_max_size[desc_id];
-       if ((param_offset + param_size) > buff_len)
-               return -EINVAL;
+       /* Get the max length of descriptor from structure filled up at probe
+        * time.
+        */
+       ret = ufshcd_map_desc_id_to_length(hba, desc_id, &buff_len);
  
-       if (!param_offset && (param_size == buff_len)) {
-               /* memory space already available to hold full descriptor */
-               desc_buf = param_read_buf;
-               is_kmalloc = false;
-       } else {
-               /* allocate memory to hold full descriptor */
+       /* Sanity checks */
+       if (ret || !buff_len) {
+               dev_err(hba->dev, "%s: Failed to get full descriptor length",
+                       __func__);
+               return ret;
+       }
+
+       /* Check whether we need temp memory */
+       if (param_offset != 0 || param_size < buff_len) {
                 desc_buf = kmalloc(buff_len, GFP_KERNEL);
                 if (!desc_buf)
                         return -ENOMEM;
+       } else {
+               desc_buf = param_read_buf;
+               is_kmalloc = false;
         }
  
+       /* Request for full descriptor */
         ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
-                                       desc_id, desc_index, 0, desc_buf,
-                                       &buff_len);
+                                       desc_id, desc_index, 0,
+                                       desc_buf, &buff_len);
  
         if (ret) {
                 dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d",
                         __func__, desc_id, desc_index, param_offset, ret);
-
                 goto out;
         }
  
@@ -2998,25 +3078,9 @@ static int ufshcd_read_desc_param(struct ufs_hba *hba,
                 goto out;
         }
  
-       /*
-        * While reading variable size descriptors (like string descriptor),
-        * some UFS devices may report the "LENGTH" (field in "Transaction
-        * Specific fields" of Query Response UPIU) same as what was requested
-        * in Query Request UPIU instead of reporting the actual size of the
-        * variable size descriptor.
-        * Although it's safe to ignore the "LENGTH" field for variable size
-        * descriptors as we can always derive the length of the descriptor from
-        * the descriptor header fields. Hence this change impose the length
-        * match check only for fixed size descriptors (for which we always
-        * request the correct size as part of Query Request UPIU).
-        */
-       if ((desc_id != QUERY_DESC_IDN_STRING) &&
-           (buff_len != desc_buf[QUERY_DESC_LENGTH_OFFSET])) {
-               dev_err(hba->dev, "%s: desc_buf length mismatch: buff_len %d, buff_len(desc_header) %d",
-                       __func__, buff_len, desc_buf[QUERY_DESC_LENGTH_OFFSET]);
-               ret = -EINVAL;
-               goto out;
-       }
+       /* Check wherher we will not copy more data, than available */
+       if (is_kmalloc && param_size > buff_len)
+               param_size = buff_len;
  
         if (is_kmalloc)
                 memcpy(param_read_buf, &desc_buf[param_offset], param_size);
@@ -4598,8 +4662,6 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
                 }
                 if (ufshcd_is_clkscaling_supported(hba))
                         hba->clk_scaling.active_reqs--;
-               if (ufshcd_is_clkscaling_supported(hba))
-                       hba->clk_scaling.active_reqs--;
         }
  
         /* clear corresponding bits of completed commands */
@@ -5919,8 +5981,8 @@ static int ufshcd_set_icc_levels_attr(struct ufs_hba *hba, u32 icc_level)
  static void ufshcd_init_icc_levels(struct ufs_hba *hba)
  {
         int ret;
-       int buff_len = QUERY_DESC_POWER_MAX_SIZE;
-       u8 desc_buf[QUERY_DESC_POWER_MAX_SIZE];
+       int buff_len = hba->desc_size.pwr_desc;
+       u8 desc_buf[hba->desc_size.pwr_desc];
  
         ret = ufshcd_read_power_desc(hba, desc_buf, buff_len);
         if (ret) {
@@ -6017,11 +6079,10 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
  {
         int err;
         u8 model_index;
-       u8 str_desc_buf[QUERY_DESC_STRING_MAX_SIZE + 1] = {0};
-       u8 desc_buf[QUERY_DESC_DEVICE_MAX_SIZE];
+       u8 str_desc_buf[QUERY_DESC_MAX_SIZE + 1] = {0};
+       u8 desc_buf[hba->desc_size.dev_desc];
  
-       err = ufshcd_read_device_desc(hba, desc_buf,
-                                       QUERY_DESC_DEVICE_MAX_SIZE);
+       err = ufshcd_read_device_desc(hba, desc_buf, hba->desc_size.dev_desc);
         if (err) {
                 dev_err(hba->dev, "%s: Failed reading Device Desc. err = %d\n",
                         __func__, err);
@@ -6038,14 +6099,14 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
         model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
  
         err = ufshcd_read_string_desc(hba, model_index, str_desc_buf,
-                                       QUERY_DESC_STRING_MAX_SIZE, ASCII_STD);
+                               QUERY_DESC_MAX_SIZE, ASCII_STD);
         if (err) {
                 dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n",
                         __func__, err);
                 goto out;
         }
  
-       str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0';
+       str_desc_buf[QUERY_DESC_MAX_SIZE] = '\0';
         strlcpy(dev_desc->model, (str_desc_buf + QUERY_DESC_HDR_SIZE),
                 min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET],
                       MAX_MODEL_LEN));
@@ -6251,6 +6312,51 @@ static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba)
         hba->req_abort_count = 0;
  }
  
+static void ufshcd_init_desc_sizes(struct ufs_hba *hba)
+{
+       int err;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_DEVICE, 0,
+               &hba->desc_size.dev_desc);
+       if (err)
+               hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_POWER, 0,
+               &hba->desc_size.pwr_desc);
+       if (err)
+               hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_INTERCONNECT, 0,
+               &hba->desc_size.interc_desc);
+       if (err)
+               hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_CONFIGURATION, 0,
+               &hba->desc_size.conf_desc);
+       if (err)
+               hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_UNIT, 0,
+               &hba->desc_size.unit_desc);
+       if (err)
+               hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_GEOMETRY, 0,
+               &hba->desc_size.geom_desc);
+       if (err)
+               hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
+static void ufshcd_def_desc_sizes(struct ufs_hba *hba)
+{
+       hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+       hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+       hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+       hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+       hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+       hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
  /**
   * ufshcd_probe_hba - probe hba to detect device and initialize
   * @hba: per-adapter instance
@@ -6285,6 +6391,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
         if (ret)
                 goto out;
  
+       /* Init check for device descriptor sizes */
+       ufshcd_init_desc_sizes(hba);
+
         ret = ufs_get_device_desc(hba, &card);
         if (ret) {
                 dev_err(hba->dev, "%s: Failed getting device info. err = %d\n",
@@ -6320,6 +6429,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
  
         /* set the state as operational after switching to desired gear */
         hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+
         /*
          * If we are in error handling context or in power management callbacks
          * context, no need to scan the host
@@ -7530,7 +7640,7 @@ static inline ssize_t ufshcd_pm_lvl_store(struct device *dev,
         if (kstrtoul(buf, 0, &value))
                 return -EINVAL;
  
-       if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX))
+       if (value >= UFS_PM_LVL_MAX)
                 return -EINVAL;
  
         spin_lock_irqsave(hba->host->host_lock, flags);
@@ -7774,6 +7884,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
         hba->mmio_base = mmio_base;
         hba->irq = irq;
  
+       /* Set descriptor lengths to specification defaults */
+       ufshcd_def_desc_sizes(hba);
+
         err = ufshcd_hba_init(hba);
         if (err)
                 goto out_error;
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h

index 7630600217a2ef91e7d3629e5a4d2a0716c24c2a..cdc8bd05f7dfcf7189a4a616bb90c2ea3a8e2ece 100644 (file)
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -220,6 +220,15 @@ struct ufs_dev_cmd {
         struct ufs_query query;
  };
  
+struct ufs_desc_size {
+       int dev_desc;
+       int pwr_desc;
+       int geom_desc;
+       int interc_desc;
+       int unit_desc;
+       int conf_desc;
+};
+
  /**
   * struct ufs_clk_info - UFS clock related info
   * @list: list headed by hba->clk_list_head
@@ -483,6 +492,7 @@ struct ufs_stats {
   * @clk_list_head: UFS host controller clocks list node head
   * @pwr_info: holds current power mode
   * @max_pwr_info: keeps the device max valid pwm
+ * @desc_size: descriptor sizes reported by device
   * @urgent_bkops_lvl: keeps track of urgent bkops level for device
   * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
   *  device is known or not.
@@ -666,6 +676,7 @@ struct ufs_hba {
         bool is_urgent_bkops_lvl_checked;
  
         struct rw_semaphore clk_scaling_lock;
+       struct ufs_desc_size desc_size;
  };
  
  /* Returns true if clocks can be gated. Otherwise false */
@@ -832,6 +843,10 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
         enum flag_idn idn, bool *flag_res);
  int ufshcd_hold(struct ufs_hba *hba, bool async);
  void ufshcd_release(struct ufs_hba *hba);
+
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba, enum desc_idn desc_id,
+       int *desc_length);
+
  u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba);
  
  /* Wrapper functions for safely calling variant operations */
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c

index ef474a7487449b4c1d51f82643988eb08fc1ed86..c374e3b5c678d215bfa9e7ed33e2d033e5d4bfb3 100644 (file)
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -1487,7 +1487,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                 irq_flag &= ~PCI_IRQ_MSI;
  
         error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
-       if (error)
+       if (error < 0)
                 goto out_reset_adapter;
  
         adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c

index 7cbad0d45b9c8053dc77b7ec696966ed6318324d..6ba270e0494ddc689eb7be7a12eecc819bc1ac7b 100644 (file)
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -409,6 +409,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
                         ret = PTR_ERR(vmfile);
                         goto out;
                 }
+               vmfile->f_mode |= FMODE_LSEEK;
                 asma->file = vmfile;
         }
         get_file(asma->file);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c

index b7b87ecefcdfc712b74a79298594cdaa94ab68bc..9fca8d225ee092e92e1fb71a7a6faff5ee1a831c 100644 (file)
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -532,7 +532,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock)
  
         newsock->ops = sock->ops;
  
-       rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+       rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
         if (rc == -EAGAIN) {
                 /* Nothing ready, so wait for activity */
                 init_waitqueue_entry(&wait, current);
@@ -540,7 +540,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock)
                 set_current_state(TASK_INTERRUPTIBLE);
                 schedule();
                 remove_wait_queue(sk_sleep(sock->sk), &wait);
-               rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+               rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
         }
  
         if (rc)
diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h

index 8886458748c1316f27c5825da73c818605cdce0d..a676bccabd436599512975b2153e4df7cf9e1217 100644 (file)
--- a/drivers/staging/lustre/lustre/include/lustre_disk.h
+++ b/drivers/staging/lustre/lustre/include/lustre_disk.h
@@ -133,13 +133,9 @@ struct lustre_sb_info {
         struct obd_export        *lsi_osd_exp;
         char                      lsi_osd_type[16];
         char                      lsi_fstype[16];
-       struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs
-                                               * own backing_dev_info
-                                               */
  };
  
  #define LSI_UMOUNT_FAILOVER          0x00200000
-#define LSI_BDI_INITIALIZED          0x00400000
  
  #define     s2lsi(sb)  ((struct lustre_sb_info *)((sb)->s_fs_info))
  #define     s2lsi_nocast(sb) ((sb)->s_fs_info)
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c

index b229cbc7bb3341dba2adaf3792072c67efcc33ea..d483c44aafe5d3cd16b62978877e216b9712ddee 100644 (file)
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -863,15 +863,6 @@ void ll_lli_init(struct ll_inode_info *lli)
         mutex_init(&lli->lli_layout_mutex);
  }
  
-static inline int ll_bdi_register(struct backing_dev_info *bdi)
-{
-       static atomic_t ll_bdi_num = ATOMIC_INIT(0);
-
-       bdi->name = "lustre";
-       return bdi_register(bdi, NULL, "lustre-%d",
-                           atomic_inc_return(&ll_bdi_num));
-}
-
  int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
  {
         struct lustre_profile *lprof = NULL;
@@ -881,6 +872,7 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
         char  *profilenm = get_profile_name(sb);
         struct config_llog_instance *cfg;
         int    err;
+       static atomic_t ll_bdi_num = ATOMIC_INIT(0);
  
         CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
  
@@ -903,16 +895,11 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
         if (err)
                 goto out_free;
  
-       err = bdi_init(&lsi->lsi_bdi);
-       if (err)
-               goto out_free;
-       lsi->lsi_flags |= LSI_BDI_INITIALIZED;
-       lsi->lsi_bdi.capabilities = 0;
-       err = ll_bdi_register(&lsi->lsi_bdi);
+       err = super_setup_bdi_name(sb, "lustre-%d",
+                                  atomic_inc_return(&ll_bdi_num));
         if (err)
                 goto out_free;
  
-       sb->s_bdi = &lsi->lsi_bdi;
         /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */
         sb->s_d_op = &ll_d_ops;
  
@@ -1033,11 +1020,6 @@ void ll_put_super(struct super_block *sb)
         if (profilenm)
                 class_del_profile(profilenm);
  
-       if (lsi->lsi_flags & LSI_BDI_INITIALIZED) {
-               bdi_destroy(&lsi->lsi_bdi);
-               lsi->lsi_flags &= ~LSI_BDI_INITIALIZED;
-       }
-
         ll_free_sbi(sb);
         lsi->lsi_llsbi = NULL;
  
diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c

index 7f8cf875157c60009d51b22c71cc47b402394091..65a2856319948e4c3fb184814334c651f4f9eed4 100644 (file)
--- a/drivers/staging/octeon/ethernet-rx.c
+++ b/drivers/staging/octeon/ethernet-rx.c
@@ -336,7 +336,6 @@ static int cvm_oct_poll(struct oct_rx_group *rx_group, int budget)
                 if (likely((port < TOTAL_NUMBER_OF_PORTS) &&
                            cvm_oct_device[port])) {
                         struct net_device *dev = cvm_oct_device[port];
-                       struct octeon_ethernet *priv = netdev_priv(dev);
  
                         /*
                          * Only accept packets for devices that are
diff --git a/drivers/staging/vc04_services/Kconfig b/drivers/staging/vc04_services/Kconfig

index e61e4ca064a8ab43b4dc4954b22fa3a52310379f..74094fff4367813a03e5fb59357a1e1054ea002d 100644 (file)
--- a/drivers/staging/vc04_services/Kconfig
+++ b/drivers/staging/vc04_services/Kconfig
@@ -1,6 +1,7 @@
  config BCM2835_VCHIQ
         tristate "Videocore VCHIQ"
         depends on HAS_DMA
+       depends on OF
         depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE)
         default y
         help
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c

index a91802432f2f47d1b163ba9f8e2da90dabe28e62..e3f9ed3690b7a86103472de987c03fd76becd59b 100644 (file)
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -485,8 +485,7 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *);
  
  int iscsit_queue_rsp(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
  {
-       iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
-       return 0;
+       return iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
  }
  EXPORT_SYMBOL(iscsit_queue_rsp);
  
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c

index bf40f03755ddc50697652ccde864d40df840fa0b..5798810197ecf6f5fcf6f5d219de0f45e607b20c 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -167,10 +167,7 @@ static struct se_tpg_np *lio_target_call_addnptotpg(
         struct iscsi_portal_group *tpg;
         struct iscsi_tpg_np *tpg_np;
         char *str, *str2, *ip_str, *port_str;
-       struct sockaddr_storage sockaddr;
-       struct sockaddr_in *sock_in;
-       struct sockaddr_in6 *sock_in6;
-       unsigned long port;
+       struct sockaddr_storage sockaddr = { };
         int ret;
         char buf[MAX_PORTAL_LEN + 1];
  
@@ -182,21 +179,19 @@ static struct se_tpg_np *lio_target_call_addnptotpg(
         memset(buf, 0, MAX_PORTAL_LEN + 1);
         snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name);
  
-       memset(&sockaddr, 0, sizeof(struct sockaddr_storage));
-
         str = strstr(buf, "[");
         if (str) {
-               const char *end;
-
                 str2 = strstr(str, "]");
                 if (!str2) {
                         pr_err("Unable to locate trailing \"]\""
                                 " in IPv6 iSCSI network portal address\n");
                         return ERR_PTR(-EINVAL);
                 }
-               str++; /* Skip over leading "[" */
+
+               ip_str = str + 1; /* Skip over leading "[" */
                 *str2 = '\0'; /* Terminate the unbracketed IPv6 address */
                 str2++; /* Skip over the \0 */
+
                 port_str = strstr(str2, ":");
                 if (!port_str) {
                         pr_err("Unable to locate \":port\""
@@ -205,23 +200,8 @@ static struct se_tpg_np *lio_target_call_addnptotpg(
                 }
                 *port_str = '\0'; /* Terminate string for IP */
                 port_str++; /* Skip over ":" */
-
-               ret = kstrtoul(port_str, 0, &port);
-               if (ret < 0) {
-                       pr_err("kstrtoul() failed for port_str: %d\n", ret);
-                       return ERR_PTR(ret);
-               }
-               sock_in6 = (struct sockaddr_in6 *)&sockaddr;
-               sock_in6->sin6_family = AF_INET6;
-               sock_in6->sin6_port = htons((unsigned short)port);
-               ret = in6_pton(str, -1,
-                               (void *)&sock_in6->sin6_addr.in6_u, -1, &end);
-               if (ret <= 0) {
-                       pr_err("in6_pton returned: %d\n", ret);
-                       return ERR_PTR(-EINVAL);
-               }
         } else {
-               str = ip_str = &buf[0];
+               ip_str = &buf[0];
                 port_str = strstr(ip_str, ":");
                 if (!port_str) {
                         pr_err("Unable to locate \":port\""
@@ -230,17 +210,15 @@ static struct se_tpg_np *lio_target_call_addnptotpg(
                 }
                 *port_str = '\0'; /* Terminate string for IP */
                 port_str++; /* Skip over ":" */
+       }
  
-               ret = kstrtoul(port_str, 0, &port);
-               if (ret < 0) {
-                       pr_err("kstrtoul() failed for port_str: %d\n", ret);
-                       return ERR_PTR(ret);
-               }
-               sock_in = (struct sockaddr_in *)&sockaddr;
-               sock_in->sin_family = AF_INET;
-               sock_in->sin_port = htons((unsigned short)port);
-               sock_in->sin_addr.s_addr = in_aton(ip_str);
+       ret = inet_pton_with_scope(&init_net, AF_UNSPEC, ip_str,
+                       port_str, &sockaddr);
+       if (ret) {
+               pr_err("malformed ip/port passed: %s\n", name);
+               return ERR_PTR(ret);
         }
+
         tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg);
         ret = iscsit_get_tpg(tpg);
         if (ret < 0)
@@ -1398,11 +1376,10 @@ static u32 lio_sess_get_initiator_sid(
  static int lio_queue_data_in(struct se_cmd *se_cmd)
  {
         struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+       struct iscsi_conn *conn = cmd->conn;
  
         cmd->i_state = ISTATE_SEND_DATAIN;
-       cmd->conn->conn_transport->iscsit_queue_data_in(cmd->conn, cmd);
-
-       return 0;
+       return conn->conn_transport->iscsit_queue_data_in(conn, cmd);
  }
  
  static int lio_write_pending(struct se_cmd *se_cmd)
@@ -1431,16 +1408,14 @@ static int lio_write_pending_status(struct se_cmd *se_cmd)
  static int lio_queue_status(struct se_cmd *se_cmd)
  {
         struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+       struct iscsi_conn *conn = cmd->conn;
  
         cmd->i_state = ISTATE_SEND_STATUS;
  
         if (cmd->se_cmd.scsi_status || cmd->sense_reason) {
-               iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
-               return 0;
+               return iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
         }
-       cmd->conn->conn_transport->iscsit_queue_status(cmd->conn, cmd);
-
-       return 0;
+       return conn->conn_transport->iscsit_queue_status(conn, cmd);
  }
  
  static void lio_queue_tm_rsp(struct se_cmd *se_cmd)
diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c

index e65bf78ceef3740fc1923c1b3ed446aa2996b82d..fce627628200cf9917a8212da7a17ccff3d6136c 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -781,22 +781,6 @@ static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param)
         } else if (IS_TYPE_NUMBER(param)) {
                 if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH))
                         SET_PSTATE_REPLY_OPTIONAL(param);
-               /*
-                * The GlobalSAN iSCSI Initiator for MacOSX does
-                * not respond to MaxBurstLength, FirstBurstLength,
-                * DefaultTime2Wait or DefaultTime2Retain parameter keys.
-                * So, we set them to 'reply optional' here, and assume the
-                * the defaults from iscsi_parameters.h if the initiator
-                * is not RFC compliant and the keys are not negotiated.
-                */
-               if (!strcmp(param->name, MAXBURSTLENGTH))
-                       SET_PSTATE_REPLY_OPTIONAL(param);
-               if (!strcmp(param->name, FIRSTBURSTLENGTH))
-                       SET_PSTATE_REPLY_OPTIONAL(param);
-               if (!strcmp(param->name, DEFAULTTIME2WAIT))
-                       SET_PSTATE_REPLY_OPTIONAL(param);
-               if (!strcmp(param->name, DEFAULTTIME2RETAIN))
-                       SET_PSTATE_REPLY_OPTIONAL(param);
                 /*
                  * Required for gPXE iSCSI boot client
                  */
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c

index 5041a9c8bdcbfd9bf9eb9368e850bdb8792a6be9..7d3e2fcc26a0da82629102693a99750622afed95 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -567,7 +567,7 @@ static void iscsit_remove_cmd_from_immediate_queue(
         }
  }
  
-void iscsit_add_cmd_to_response_queue(
+int iscsit_add_cmd_to_response_queue(
         struct iscsi_cmd *cmd,
         struct iscsi_conn *conn,
         u8 state)
@@ -578,7 +578,7 @@ void iscsit_add_cmd_to_response_queue(
         if (!qr) {
                 pr_err("Unable to allocate memory for"
                         " struct iscsi_queue_req\n");
-               return;
+               return -ENOMEM;
         }
         INIT_LIST_HEAD(&qr->qr_list);
         qr->cmd = cmd;
@@ -590,6 +590,7 @@ void iscsit_add_cmd_to_response_queue(
         spin_unlock_bh(&conn->response_queue_lock);
  
         wake_up(&conn->queues_wq);
+       return 0;
  }
  
  struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *conn)
@@ -737,21 +738,23 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown)
  {
         struct se_cmd *se_cmd = NULL;
         int rc;
+       bool op_scsi = false;
         /*
          * Determine if a struct se_cmd is associated with
          * this struct iscsi_cmd.
          */
         switch (cmd->iscsi_opcode) {
         case ISCSI_OP_SCSI_CMD:
-               se_cmd = &cmd->se_cmd;
-               __iscsit_free_cmd(cmd, true, shutdown);
+               op_scsi = true;
                 /*
                  * Fallthrough
                  */
         case ISCSI_OP_SCSI_TMFUNC:
-               rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown);
-               if (!rc && shutdown && se_cmd && se_cmd->se_sess) {
-                       __iscsit_free_cmd(cmd, true, shutdown);
+               se_cmd = &cmd->se_cmd;
+               __iscsit_free_cmd(cmd, op_scsi, shutdown);
+               rc = transport_generic_free_cmd(se_cmd, shutdown);
+               if (!rc && shutdown && se_cmd->se_sess) {
+                       __iscsit_free_cmd(cmd, op_scsi, shutdown);
                         target_put_sess_cmd(se_cmd);
                 }
                 break;
diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h

index 8ff08856516aba68394fc07661ec71b635c8b6a2..9e4197af8708e1056a08f4c01d423581a075bef1 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_util.h
+++ b/drivers/target/iscsi/iscsi_target_util.h
@@ -31,7 +31,7 @@ extern int iscsit_find_cmd_for_recovery(struct iscsi_session *, struct iscsi_cmd
                         struct iscsi_conn_recovery **, itt_t);
  extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
  extern struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *);
-extern void iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
+extern int iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
  extern struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *);
  extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_conn *);
  extern bool iscsit_conn_all_queues_empty(struct iscsi_conn *);
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c

index f5e330099bfca713f4cb12bd2dc77826fdad1b3b..fc4a9c303d559f95b1216857efd8ae6ce77b96ca 100644 (file)
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -43,7 +43,7 @@
  #include "target_core_ua.h"
  
  static sense_reason_t core_alua_check_transition(int state, int valid,
-                                                int *primary);
+                                                int *primary, int explicit);
  static int core_alua_set_tg_pt_secondary_state(
                 struct se_lun *lun, int explicit, int offline);
  
@@ -197,8 +197,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
                 /*
                  * Set the ASYMMETRIC ACCESS State
                  */
-               buf[off++] |= (atomic_read(
-                       &tg_pt_gp->tg_pt_gp_alua_access_state) & 0xff);
+               buf[off++] |= tg_pt_gp->tg_pt_gp_alua_access_state & 0xff;
                 /*
                  * Set supported ASYMMETRIC ACCESS State bits
                  */
@@ -335,8 +334,8 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd)
                  * the state is a primary or secondary target port asymmetric
                  * access state.
                  */
-               rc = core_alua_check_transition(alua_access_state,
-                                               valid_states, &primary);
+               rc = core_alua_check_transition(alua_access_state, valid_states,
+                                               &primary, 1);
                 if (rc) {
                         /*
                          * If the SET TARGET PORT GROUPS attempts to establish
@@ -691,7 +690,7 @@ target_alua_state_check(struct se_cmd *cmd)
  
         if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
                 return 0;
-       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA)
                 return 0;
  
         /*
@@ -710,7 +709,7 @@ target_alua_state_check(struct se_cmd *cmd)
  
         spin_lock(&lun->lun_tg_pt_gp_lock);
         tg_pt_gp = lun->lun_tg_pt_gp;
-       out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
+       out_alua_state = tg_pt_gp->tg_pt_gp_alua_access_state;
         nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs;
  
         // XXX: keeps using tg_pt_gp witout reference after unlock
@@ -762,7 +761,7 @@ target_alua_state_check(struct se_cmd *cmd)
   * Check implicit and explicit ALUA state change request.
   */
  static sense_reason_t
-core_alua_check_transition(int state, int valid, int *primary)
+core_alua_check_transition(int state, int valid, int *primary, int explicit)
  {
         /*
          * OPTIMIZED, NON-OPTIMIZED, STANDBY and UNAVAILABLE are
@@ -804,11 +803,14 @@ core_alua_check_transition(int state, int valid, int *primary)
                 *primary = 0;
                 break;
         case ALUA_ACCESS_STATE_TRANSITION:
-               /*
-                * Transitioning is set internally, and
-                * cannot be selected manually.
-                */
-               goto not_supported;
+               if (!(valid & ALUA_T_SUP) || explicit)
+                       /*
+                        * Transitioning is set internally and by tcmu daemon,
+                        * and cannot be selected through a STPG.
+                        */
+                       goto not_supported;
+               *primary = 0;
+               break;
         default:
                 pr_err("Unknown ALUA access state: 0x%02x\n", state);
                 return TCM_INVALID_PARAMETER_LIST;
@@ -908,7 +910,7 @@ static int core_alua_write_tpg_metadata(
  }
  
  /*
- * Called with tg_pt_gp->tg_pt_gp_md_mutex held
+ * Called with tg_pt_gp->tg_pt_gp_transition_mutex held
   */
  static int core_alua_update_tpg_primary_metadata(
         struct t10_alua_tg_pt_gp *tg_pt_gp)
@@ -931,7 +933,7 @@ static int core_alua_update_tpg_primary_metadata(
                         "alua_access_state=0x%02x\n"
                         "alua_access_status=0x%02x\n",
                         tg_pt_gp->tg_pt_gp_id,
-                       tg_pt_gp->tg_pt_gp_alua_pending_state,
+                       tg_pt_gp->tg_pt_gp_alua_access_state,
                         tg_pt_gp->tg_pt_gp_alua_access_status);
  
         snprintf(path, ALUA_METADATA_PATH_LEN,
@@ -1010,100 +1012,42 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
         spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
  }
  
-static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
-{
-       struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
-               struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work);
-       struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
-       bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
-                        ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
-
-       /*
-        * Update the ALUA metadata buf that has been allocated in
-        * core_alua_do_port_transition(), this metadata will be written
-        * to struct file.
-        *
-        * Note that there is the case where we do not want to update the
-        * metadata when the saved metadata is being parsed in userspace
-        * when setting the existing port access state and access status.
-        *
-        * Also note that the failure to write out the ALUA metadata to
-        * struct file does NOT affect the actual ALUA transition.
-        */
-       if (tg_pt_gp->tg_pt_gp_write_metadata) {
-               mutex_lock(&tg_pt_gp->tg_pt_gp_md_mutex);
-               core_alua_update_tpg_primary_metadata(tg_pt_gp);
-               mutex_unlock(&tg_pt_gp->tg_pt_gp_md_mutex);
-       }
-       /*
-        * Set the current primary ALUA access state to the requested new state
-        */
-       atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-                  tg_pt_gp->tg_pt_gp_alua_pending_state);
-
-       pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
-               " from primary access state %s to %s\n", (explicit) ? "explicit" :
-               "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
-               tg_pt_gp->tg_pt_gp_id,
-               core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_previous_state),
-               core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state));
-
-       core_alua_queue_state_change_ua(tg_pt_gp);
-
-       spin_lock(&dev->t10_alua.tg_pt_gps_lock);
-       atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
-       spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
-
-       if (tg_pt_gp->tg_pt_gp_transition_complete)
-               complete(tg_pt_gp->tg_pt_gp_transition_complete);
-}
-
  static int core_alua_do_transition_tg_pt(
         struct t10_alua_tg_pt_gp *tg_pt_gp,
         int new_state,
         int explicit)
  {
-       struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
-       DECLARE_COMPLETION_ONSTACK(wait);
+       int prev_state;
  
+       mutex_lock(&tg_pt_gp->tg_pt_gp_transition_mutex);
         /* Nothing to be done here */
-       if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state)
+       if (tg_pt_gp->tg_pt_gp_alua_access_state == new_state) {
+               mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
                 return 0;
+       }
  
-       if (new_state == ALUA_ACCESS_STATE_TRANSITION)
+       if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) {
+               mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
                 return -EAGAIN;
-
-       /*
-        * Flush any pending transitions
-        */
-       if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs &&
-           atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
-           ALUA_ACCESS_STATE_TRANSITION) {
-               /* Just in case */
-               tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
-               tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-               flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
-               wait_for_completion(&wait);
-               tg_pt_gp->tg_pt_gp_transition_complete = NULL;
-               return 0;
         }
  
         /*
          * Save the old primary ALUA access state, and set the current state
          * to ALUA_ACCESS_STATE_TRANSITION.
          */
-       tg_pt_gp->tg_pt_gp_alua_previous_state =
-               atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
-       tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
-
-       atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-                       ALUA_ACCESS_STATE_TRANSITION);
+       prev_state = tg_pt_gp->tg_pt_gp_alua_access_state;
+       tg_pt_gp->tg_pt_gp_alua_access_state = ALUA_ACCESS_STATE_TRANSITION;
         tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
                                 ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG :
                                 ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA;
  
         core_alua_queue_state_change_ua(tg_pt_gp);
  
+       if (new_state == ALUA_ACCESS_STATE_TRANSITION) {
+               mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
+               return 0;
+       }
+
         /*
          * Check for the optional ALUA primary state transition delay
          */
@@ -1111,27 +1055,36 @@ static int core_alua_do_transition_tg_pt(
                 msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs);
  
         /*
-        * Take a reference for workqueue item
+        * Set the current primary ALUA access state to the requested new state
          */
-       spin_lock(&dev->t10_alua.tg_pt_gps_lock);
-       atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
-       spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
+       tg_pt_gp->tg_pt_gp_alua_access_state = new_state;
  
-       if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) {
-               unsigned long transition_tmo;
-
-               transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ;
-               queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
-                                  &tg_pt_gp->tg_pt_gp_transition_work,
-                                  transition_tmo);
-       } else {
-               tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-               queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
-                                  &tg_pt_gp->tg_pt_gp_transition_work, 0);
-               wait_for_completion(&wait);
-               tg_pt_gp->tg_pt_gp_transition_complete = NULL;
+       /*
+        * Update the ALUA metadata buf that has been allocated in
+        * core_alua_do_port_transition(), this metadata will be written
+        * to struct file.
+        *
+        * Note that there is the case where we do not want to update the
+        * metadata when the saved metadata is being parsed in userspace
+        * when setting the existing port access state and access status.
+        *
+        * Also note that the failure to write out the ALUA metadata to
+        * struct file does NOT affect the actual ALUA transition.
+        */
+       if (tg_pt_gp->tg_pt_gp_write_metadata) {
+               core_alua_update_tpg_primary_metadata(tg_pt_gp);
         }
  
+       pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
+               " from primary access state %s to %s\n", (explicit) ? "explicit" :
+               "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
+               tg_pt_gp->tg_pt_gp_id,
+               core_alua_dump_state(prev_state),
+               core_alua_dump_state(new_state));
+
+       core_alua_queue_state_change_ua(tg_pt_gp);
+
+       mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
         return 0;
  }
  
@@ -1149,8 +1102,12 @@ int core_alua_do_port_transition(
         struct t10_alua_tg_pt_gp *tg_pt_gp;
         int primary, valid_states, rc = 0;
  
+       if (l_dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA)
+               return -ENODEV;
+
         valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states;
-       if (core_alua_check_transition(new_state, valid_states, &primary) != 0)
+       if (core_alua_check_transition(new_state, valid_states, &primary,
+                                      explicit) != 0)
                 return -EINVAL;
  
         local_lu_gp_mem = l_dev->dev_alua_lu_gp_mem;
@@ -1692,14 +1649,12 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
         }
         INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list);
         INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list);
-       mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
+       mutex_init(&tg_pt_gp->tg_pt_gp_transition_mutex);
         spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
         atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
-       INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
-                         core_alua_do_transition_tg_pt_work);
         tg_pt_gp->tg_pt_gp_dev = dev;
-       atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-               ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
+       tg_pt_gp->tg_pt_gp_alua_access_state =
+                       ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED;
         /*
          * Enable both explicit and implicit ALUA support by default
          */
@@ -1804,8 +1759,6 @@ void core_alua_free_tg_pt_gp(
         dev->t10_alua.alua_tg_pt_gps_counter--;
         spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
  
-       flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
-
         /*
          * Allow a struct t10_alua_tg_pt_gp_member * referenced by
          * core_alua_get_tg_pt_gp_by_name() in
@@ -1945,8 +1898,8 @@ ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page)
                         "Primary Access Status: %s\nTG Port Secondary Access"
                         " State: %s\nTG Port Secondary Access Status: %s\n",
                         config_item_name(tg_pt_ci), tg_pt_gp->tg_pt_gp_id,
-                       core_alua_dump_state(atomic_read(
-                                       &tg_pt_gp->tg_pt_gp_alua_access_state)),
+                       core_alua_dump_state(
+                               tg_pt_gp->tg_pt_gp_alua_access_state),
                         core_alua_dump_status(
                                 tg_pt_gp->tg_pt_gp_alua_access_status),
                         atomic_read(&lun->lun_tg_pt_secondary_offline) ?
@@ -1973,7 +1926,7 @@ ssize_t core_alua_store_tg_pt_gp_info(
         unsigned char buf[TG_PT_GROUP_NAME_BUF];
         int move = 0;
  
-       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH ||
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ||
             (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
                 return -ENODEV;
  
@@ -2230,7 +2183,7 @@ ssize_t core_alua_store_offline_bit(
         unsigned long tmp;
         int ret;
  
-       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH ||
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ||
             (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
                 return -ENODEV;
  
@@ -2316,7 +2269,8 @@ ssize_t core_alua_store_secondary_write_metadata(
  
  int core_setup_alua(struct se_device *dev)
  {
-       if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
+       if (!(dev->transport->transport_flags &
+            TRANSPORT_FLAG_PASSTHROUGH_ALUA) &&
             !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) {
                 struct t10_alua_lu_gp_member *lu_gp_mem;
  
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c

index 54b36c9835be3ae2127cb1f447321eba73b824ac..70657fd564406b3c137b02c3f61824f387f554aa 100644 (file)
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -421,6 +421,10 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo)
                 pr_err("Missing tfo->aborted_task()\n");
                 return -EINVAL;
         }
+       if (!tfo->check_stop_free) {
+               pr_err("Missing tfo->check_stop_free()\n");
+               return -EINVAL;
+       }
         /*
          * We at least require tfo->fabric_make_wwn(), tfo->fabric_drop_wwn()
          * tfo->fabric_make_tpg() and tfo->fabric_drop_tpg() in
@@ -2388,7 +2392,7 @@ static ssize_t target_tg_pt_gp_alua_access_state_show(struct config_item *item,
                 char *page)
  {
         return sprintf(page, "%d\n",
-               atomic_read(&to_tg_pt_gp(item)->tg_pt_gp_alua_access_state));
+                      to_tg_pt_gp(item)->tg_pt_gp_alua_access_state);
  }
  
  static ssize_t target_tg_pt_gp_alua_access_state_store(struct config_item *item,
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c

index c754ae33bf7b154a5ce368c066ad336f2c4ee5d5..d2f089cfa9aedcd63f204aa898d775c16330d150 100644 (file)
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -851,7 +851,7 @@ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
         attrib->unmap_granularity = q->limits.discard_granularity / block_size;
         attrib->unmap_granularity_alignment = q->limits.discard_alignment /
                                                                 block_size;
-       attrib->unmap_zeroes_data = q->limits.discard_zeroes_data;
+       attrib->unmap_zeroes_data = 0;
         return true;
  }
  EXPORT_SYMBOL(target_configure_unmap_from_queue);
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c

index d8a16ca6baa507b235cbec2fbff56a648874fd2e..d1e6cab8e3d3f0a95cb801f2680c2b8e2474de1d 100644 (file)
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -92,6 +92,11 @@ static int target_fabric_mappedlun_link(
                 pr_err("Source se_lun->lun_se_dev does not exist\n");
                 return -EINVAL;
         }
+       if (lun->lun_shutdown) {
+               pr_err("Unable to create mappedlun symlink because"
+                       " lun->lun_shutdown=true\n");
+               return -EINVAL;
+       }
         se_tpg = lun->lun_tpg;
  
         nacl_ci = &lun_acl_ci->ci_parent->ci_group->cg_item;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c

index a8f8e53f2f574852de573a08a86ad1c25b4cf332..a93d94e68ab5faf5eea22d88ea8d9fe1b26aa897 100644 (file)
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -154,7 +154,7 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
  
         buf = kzalloc(12, GFP_KERNEL);
         if (!buf)
-               return;
+               goto out_free;
  
         memset(cdb, 0, MAX_COMMAND_SIZE);
         cdb[0] = MODE_SENSE;
@@ -169,9 +169,10 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
          * If MODE_SENSE still returns zero, set the default value to 1024.
          */
         sdev->sector_size = (buf[9] << 16) | (buf[10] << 8) | (buf[11]);
+out_free:
         if (!sdev->sector_size)
                 sdev->sector_size = 1024;
-out_free:
+
         kfree(buf);
  }
  
@@ -314,9 +315,10 @@ static int pscsi_add_device_to_list(struct se_device *dev,
                                 sd->lun, sd->queue_depth);
         }
  
-       dev->dev_attrib.hw_block_size = sd->sector_size;
+       dev->dev_attrib.hw_block_size =
+               min_not_zero((int)sd->sector_size, 512);
         dev->dev_attrib.hw_max_sectors =
-               min_t(int, sd->host->max_sectors, queue_max_hw_sectors(q));
+               min_not_zero(sd->host->max_sectors, queue_max_hw_sectors(q));
         dev->dev_attrib.hw_queue_depth = sd->queue_depth;
  
         /*
@@ -339,8 +341,10 @@ static int pscsi_add_device_to_list(struct se_device *dev,
         /*
          * For TYPE_TAPE, attempt to determine blocksize with MODE_SENSE.
          */
-       if (sd->type == TYPE_TAPE)
+       if (sd->type == TYPE_TAPE) {
                 pscsi_tape_read_blocksize(dev, sd);
+               dev->dev_attrib.hw_block_size = sd->sector_size;
+       }
         return 0;
  }
  
@@ -406,7 +410,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
  /*
   * Called with struct Scsi_Host->host_lock called.
   */
-static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd)
+static int pscsi_create_type_nondisk(struct se_device *dev, struct scsi_device *sd)
         __releases(sh->host_lock)
  {
         struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
@@ -433,28 +437,6 @@ static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd)
         return 0;
  }
  
-/*
- * Called with struct Scsi_Host->host_lock called.
- */
-static int pscsi_create_type_other(struct se_device *dev,
-               struct scsi_device *sd)
-       __releases(sh->host_lock)
-{
-       struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
-       struct Scsi_Host *sh = sd->host;
-       int ret;
-
-       spin_unlock_irq(sh->host_lock);
-       ret = pscsi_add_device_to_list(dev, sd);
-       if (ret)
-               return ret;
-
-       pr_debug("CORE_PSCSI[%d] - Added Type: %s for %d:%d:%d:%llu\n",
-               phv->phv_host_id, scsi_device_type(sd->type), sh->host_no,
-               sd->channel, sd->id, sd->lun);
-       return 0;
-}
-
  static int pscsi_configure_device(struct se_device *dev)
  {
         struct se_hba *hba = dev->se_hba;
@@ -542,11 +524,8 @@ static int pscsi_configure_device(struct se_device *dev)
                 case TYPE_DISK:
                         ret = pscsi_create_type_disk(dev, sd);
                         break;
-               case TYPE_ROM:
-                       ret = pscsi_create_type_rom(dev, sd);
-                       break;
                 default:
-                       ret = pscsi_create_type_other(dev, sd);
+                       ret = pscsi_create_type_nondisk(dev, sd);
                         break;
                 }
  
@@ -611,8 +590,7 @@ static void pscsi_free_device(struct se_device *dev)
                 else if (pdv->pdv_lld_host)
                         scsi_host_put(pdv->pdv_lld_host);
  
-               if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM))
-                       scsi_device_put(sd);
+               scsi_device_put(sd);
  
                 pdv->pdv_sd = NULL;
         }
@@ -1030,7 +1008,7 @@ pscsi_execute_cmd(struct se_cmd *cmd)
                 req->timeout = PS_TIMEOUT_DISK;
         else
                 req->timeout = PS_TIMEOUT_OTHER;
-       req->retries = PS_RETRY;
+       scsi_req(req)->retries = PS_RETRY;
  
         blk_execute_rq_nowait(pdv->pdv_sd->request_queue, NULL, req,
                         (cmd->sam_task_attr == TCM_HEAD_TAG),
@@ -1064,7 +1042,6 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
         if (pdv->pdv_bd && pdv->pdv_bd->bd_part)
                 return pdv->pdv_bd->bd_part->nr_sects;
  
-       dump_stack();
         return 0;
  }
  
@@ -1073,7 +1050,7 @@ static void pscsi_req_done(struct request *req, int uptodate)
         struct se_cmd *cmd = req->end_io_data;
         struct pscsi_plugin_task *pt = cmd->priv;
  
-       pt->pscsi_result = req->errors;
+       pt->pscsi_result = scsi_req(req)->result;
         pt->pscsi_resid = scsi_req(req)->resid_len;
  
         cmd->scsi_status = status_byte(pt->pscsi_result) << 1;
@@ -1103,7 +1080,8 @@ static void pscsi_req_done(struct request *req, int uptodate)
  static const struct target_backend_ops pscsi_ops = {
         .name                   = "pscsi",
         .owner                  = THIS_MODULE,
-       .transport_flags        = TRANSPORT_FLAG_PASSTHROUGH,
+       .transport_flags        = TRANSPORT_FLAG_PASSTHROUGH |
+                                 TRANSPORT_FLAG_PASSTHROUGH_ALUA,
         .attach_hba             = pscsi_attach_hba,
         .detach_hba             = pscsi_detach_hba,
         .pmode_enable_hba       = pscsi_pmode_enable_hba,
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c

index 68d8aef7ab78d4084b57e6fd0fa0b0afce7251df..c194063f169b13ce44bf014894960693530e25d7 100644 (file)
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1105,9 +1105,15 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                         return ret;
                 break;
         case VERIFY:
+       case VERIFY_16:
                 size = 0;
-               sectors = transport_get_sectors_10(cdb);
-               cmd->t_task_lba = transport_lba_32(cdb);
+               if (cdb[0] == VERIFY) {
+                       sectors = transport_get_sectors_10(cdb);
+                       cmd->t_task_lba = transport_lba_32(cdb);
+               } else {
+                       sectors = transport_get_sectors_16(cdb);
+                       cmd->t_task_lba = transport_lba_64(cdb);
+               }
                 cmd->execute_cmd = sbc_emulate_noop;
                 goto check_lba;
         case REZERO_UNIT:
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c

index c0dbfa0165750523e552b93fdbb0c64c94cdab2d..dfaef4d3b2d2698088754c08f1846bc893815237 100644 (file)
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -602,7 +602,8 @@ int core_tpg_add_lun(
         if (ret)
                 goto out_kill_ref;
  
-       if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
+       if (!(dev->transport->transport_flags &
+            TRANSPORT_FLAG_PASSTHROUGH_ALUA) &&
             !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
                 target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp);
  
@@ -641,6 +642,8 @@ void core_tpg_remove_lun(
          */
         struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev);
  
+       lun->lun_shutdown = true;
+
         core_clear_lun_from_tpg(lun, tpg);
         /*
          * Wait for any active I/O references to percpu se_lun->lun_ref to
@@ -662,6 +665,8 @@ void core_tpg_remove_lun(
         }
         if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
                 hlist_del_rcu(&lun->link);
+
+       lun->lun_shutdown = false;
         mutex_unlock(&tpg->tpg_lun_mutex);
  
         percpu_ref_exit(&lun->lun_ref);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c

index 434d9d693989179f72abca120e01155d664d0c87..a0cd56ee5fe984f7ddf27c41157f7314343d23c1 100644 (file)
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -64,8 +64,9 @@ struct kmem_cache *t10_alua_lba_map_cache;
  struct kmem_cache *t10_alua_lba_map_mem_cache;
  
  static void transport_complete_task_attr(struct se_cmd *cmd);
+static int translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason);
  static void transport_handle_queue_full(struct se_cmd *cmd,
-               struct se_device *dev);
+               struct se_device *dev, int err, bool write_pending);
  static int transport_put_cmd(struct se_cmd *cmd);
  static void target_complete_ok_work(struct work_struct *work);
  
@@ -636,8 +637,7 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
          * Fabric modules are expected to return '1' here if the se_cmd being
          * passed is released at this point, or zero if not being released.
          */
-       return cmd->se_tfo->check_stop_free ? cmd->se_tfo->check_stop_free(cmd)
-               : 0;
+       return cmd->se_tfo->check_stop_free(cmd);
  }
  
  static void transport_lun_remove_cmd(struct se_cmd *cmd)
@@ -805,7 +805,8 @@ void target_qf_do_work(struct work_struct *work)
  
                 if (cmd->t_state == TRANSPORT_COMPLETE_QF_WP)
                         transport_write_pending_qf(cmd);
-               else if (cmd->t_state == TRANSPORT_COMPLETE_QF_OK)
+               else if (cmd->t_state == TRANSPORT_COMPLETE_QF_OK ||
+                        cmd->t_state == TRANSPORT_COMPLETE_QF_ERR)
                         transport_complete_qf(cmd);
         }
  }
@@ -1720,7 +1721,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
                 }
                 trace_target_cmd_complete(cmd);
                 ret = cmd->se_tfo->queue_status(cmd);
-               if (ret == -EAGAIN || ret == -ENOMEM)
+               if (ret)
                         goto queue_full;
                 goto check_stop;
         default:
@@ -1731,7 +1732,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
         }
  
         ret = transport_send_check_condition_and_sense(cmd, sense_reason, 0);
-       if (ret == -EAGAIN || ret == -ENOMEM)
+       if (ret)
                 goto queue_full;
  
  check_stop:
@@ -1740,8 +1741,7 @@ check_stop:
         return;
  
  queue_full:
-       cmd->t_state = TRANSPORT_COMPLETE_QF_OK;
-       transport_handle_queue_full(cmd, cmd->se_dev);
+       transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
  }
  EXPORT_SYMBOL(transport_generic_request_failure);
  
@@ -1978,13 +1978,29 @@ static void transport_complete_qf(struct se_cmd *cmd)
         int ret = 0;
  
         transport_complete_task_attr(cmd);
+       /*
+        * If a fabric driver ->write_pending() or ->queue_data_in() callback
+        * has returned neither -ENOMEM or -EAGAIN, assume it's fatal and
+        * the same callbacks should not be retried.  Return CHECK_CONDITION
+        * if a scsi_status is not already set.
+        *
+        * If a fabric driver ->queue_status() has returned non zero, always
+        * keep retrying no matter what..
+        */
+       if (cmd->t_state == TRANSPORT_COMPLETE_QF_ERR) {
+               if (cmd->scsi_status)
+                       goto queue_status;
  
-       if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) {
-               trace_target_cmd_complete(cmd);
-               ret = cmd->se_tfo->queue_status(cmd);
-               goto out;
+               cmd->se_cmd_flags |= SCF_EMULATED_TASK_SENSE;
+               cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
+               cmd->scsi_sense_length  = TRANSPORT_SENSE_BUFFER;
+               translate_sense_reason(cmd, TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
+               goto queue_status;
         }
  
+       if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
+               goto queue_status;
+
         switch (cmd->data_direction) {
         case DMA_FROM_DEVICE:
                 if (cmd->scsi_status)
@@ -2008,19 +2024,33 @@ queue_status:
                 break;
         }
  
-out:
         if (ret < 0) {
-               transport_handle_queue_full(cmd, cmd->se_dev);
+               transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
                 return;
         }
         transport_lun_remove_cmd(cmd);
         transport_cmd_check_stop_to_fabric(cmd);
  }
  
-static void transport_handle_queue_full(
-       struct se_cmd *cmd,
-       struct se_device *dev)
+static void transport_handle_queue_full(struct se_cmd *cmd, struct se_device *dev,
+                                       int err, bool write_pending)
  {
+       /*
+        * -EAGAIN or -ENOMEM signals retry of ->write_pending() and/or
+        * ->queue_data_in() callbacks from new process context.
+        *
+        * Otherwise for other errors, transport_complete_qf() will send
+        * CHECK_CONDITION via ->queue_status() instead of attempting to
+        * retry associated fabric driver data-transfer callbacks.
+        */
+       if (err == -EAGAIN || err == -ENOMEM) {
+               cmd->t_state = (write_pending) ? TRANSPORT_COMPLETE_QF_WP :
+                                                TRANSPORT_COMPLETE_QF_OK;
+       } else {
+               pr_warn_ratelimited("Got unknown fabric queue status: %d\n", err);
+               cmd->t_state = TRANSPORT_COMPLETE_QF_ERR;
+       }
+
         spin_lock_irq(&dev->qf_cmd_lock);
         list_add_tail(&cmd->se_qf_node, &cmd->se_dev->qf_cmd_list);
         atomic_inc_mb(&dev->dev_qf_count);
@@ -2084,7 +2114,7 @@ static void target_complete_ok_work(struct work_struct *work)
                 WARN_ON(!cmd->scsi_status);
                 ret = transport_send_check_condition_and_sense(
                                         cmd, 0, 1);
-               if (ret == -EAGAIN || ret == -ENOMEM)
+               if (ret)
                         goto queue_full;
  
                 transport_lun_remove_cmd(cmd);
@@ -2110,7 +2140,7 @@ static void target_complete_ok_work(struct work_struct *work)
                 } else if (rc) {
                         ret = transport_send_check_condition_and_sense(cmd,
                                                 rc, 0);
-                       if (ret == -EAGAIN || ret == -ENOMEM)
+                       if (ret)
                                 goto queue_full;
  
                         transport_lun_remove_cmd(cmd);
@@ -2135,7 +2165,7 @@ queue_rsp:
                 if (target_read_prot_action(cmd)) {
                         ret = transport_send_check_condition_and_sense(cmd,
                                                 cmd->pi_err, 0);
-                       if (ret == -EAGAIN || ret == -ENOMEM)
+                       if (ret)
                                 goto queue_full;
  
                         transport_lun_remove_cmd(cmd);
@@ -2145,7 +2175,7 @@ queue_rsp:
  
                 trace_target_cmd_complete(cmd);
                 ret = cmd->se_tfo->queue_data_in(cmd);
-               if (ret == -EAGAIN || ret == -ENOMEM)
+               if (ret)
                         goto queue_full;
                 break;
         case DMA_TO_DEVICE:
@@ -2158,7 +2188,7 @@ queue_rsp:
                         atomic_long_add(cmd->data_length,
                                         &cmd->se_lun->lun_stats.tx_data_octets);
                         ret = cmd->se_tfo->queue_data_in(cmd);
-                       if (ret == -EAGAIN || ret == -ENOMEM)
+                       if (ret)
                                 goto queue_full;
                         break;
                 }
@@ -2167,7 +2197,7 @@ queue_rsp:
  queue_status:
                 trace_target_cmd_complete(cmd);
                 ret = cmd->se_tfo->queue_status(cmd);
-               if (ret == -EAGAIN || ret == -ENOMEM)
+               if (ret)
                         goto queue_full;
                 break;
         default:
@@ -2181,8 +2211,8 @@ queue_status:
  queue_full:
         pr_debug("Handling complete_ok QUEUE_FULL: se_cmd: %p,"
                 " data_direction: %d\n", cmd, cmd->data_direction);
-       cmd->t_state = TRANSPORT_COMPLETE_QF_OK;
-       transport_handle_queue_full(cmd, cmd->se_dev);
+
+       transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
  }
  
  void target_free_sgl(struct scatterlist *sgl, int nents)
@@ -2450,18 +2480,14 @@ transport_generic_new_cmd(struct se_cmd *cmd)
         spin_unlock_irqrestore(&cmd->t_state_lock, flags);
  
         ret = cmd->se_tfo->write_pending(cmd);
-       if (ret == -EAGAIN || ret == -ENOMEM)
+       if (ret)
                 goto queue_full;
  
-       /* fabric drivers should only return -EAGAIN or -ENOMEM as error */
-       WARN_ON(ret);
-
-       return (!ret) ? 0 : TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+       return 0;
  
  queue_full:
         pr_debug("Handling write_pending QUEUE__FULL: se_cmd: %p\n", cmd);
-       cmd->t_state = TRANSPORT_COMPLETE_QF_WP;
-       transport_handle_queue_full(cmd, cmd->se_dev);
+       transport_handle_queue_full(cmd, cmd->se_dev, ret, true);
         return 0;
  }
  EXPORT_SYMBOL(transport_generic_new_cmd);
@@ -2471,10 +2497,10 @@ static void transport_write_pending_qf(struct se_cmd *cmd)
         int ret;
  
         ret = cmd->se_tfo->write_pending(cmd);
-       if (ret == -EAGAIN || ret == -ENOMEM) {
+       if (ret) {
                 pr_debug("Handling write_pending QUEUE__FULL: se_cmd: %p\n",
                          cmd);
-               transport_handle_queue_full(cmd, cmd->se_dev);
+               transport_handle_queue_full(cmd, cmd->se_dev, ret, true);
         }
  }
  
@@ -3012,6 +3038,8 @@ static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status)
         __releases(&cmd->t_state_lock)
         __acquires(&cmd->t_state_lock)
  {
+       int ret;
+
         assert_spin_locked(&cmd->t_state_lock);
         WARN_ON_ONCE(!irqs_disabled());
  
@@ -3035,7 +3063,9 @@ static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status)
         trace_target_cmd_complete(cmd);
  
         spin_unlock_irq(&cmd->t_state_lock);
-       cmd->se_tfo->queue_status(cmd);
+       ret = cmd->se_tfo->queue_status(cmd);
+       if (ret)
+               transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
         spin_lock_irq(&cmd->t_state_lock);
  
         return 1;
@@ -3056,6 +3086,7 @@ EXPORT_SYMBOL(transport_check_aborted_status);
  void transport_send_task_abort(struct se_cmd *cmd)
  {
         unsigned long flags;
+       int ret;
  
         spin_lock_irqsave(&cmd->t_state_lock, flags);
         if (cmd->se_cmd_flags & (SCF_SENT_CHECK_CONDITION)) {
@@ -3091,7 +3122,9 @@ send_abort:
                  cmd->t_task_cdb[0], cmd->tag);
  
         trace_target_cmd_complete(cmd);
-       cmd->se_tfo->queue_status(cmd);
+       ret = cmd->se_tfo->queue_status(cmd);
+       if (ret)
+               transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
  }
  
  static void target_tmr_work(struct work_struct *work)
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c

index c3adefe95e50f7f7054e272e15fc5e37663d11c9..f615c3bbb73e8b7a2a7bf3f5039efd84c724cdf2 100644 (file)
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -28,6 +28,7 @@
  #include <linux/stringify.h>
  #include <linux/bitops.h>
  #include <linux/highmem.h>
+#include <linux/configfs.h>
  #include <net/genetlink.h>
  #include <scsi/scsi_common.h>
  #include <scsi/scsi_proto.h>
@@ -112,6 +113,7 @@ struct tcmu_dev {
         spinlock_t commands_lock;
  
         struct timer_list timeout;
+       unsigned int cmd_time_out;
  
         char dev_config[TCMU_CONFIG_LEN];
  };
@@ -172,7 +174,9 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
  
         tcmu_cmd->se_cmd = se_cmd;
         tcmu_cmd->tcmu_dev = udev;
-       tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT);
+       if (udev->cmd_time_out)
+               tcmu_cmd->deadline = jiffies +
+                                       msecs_to_jiffies(udev->cmd_time_out);
  
         idr_preload(GFP_KERNEL);
         spin_lock_irq(&udev->commands_lock);
@@ -307,24 +311,50 @@ static void free_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd)
                    DATA_BLOCK_BITS);
  }
  
-static void gather_data_area(struct tcmu_dev *udev, unsigned long *cmd_bitmap,
-               struct scatterlist *data_sg, unsigned int data_nents)
+static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
+                            bool bidi)
  {
+       struct se_cmd *se_cmd = cmd->se_cmd;
         int i, block;
         int block_remaining = 0;
         void *from, *to;
         size_t copy_bytes, from_offset;
-       struct scatterlist *sg;
+       struct scatterlist *sg, *data_sg;
+       unsigned int data_nents;
+       DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
+
+       bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
+
+       if (!bidi) {
+               data_sg = se_cmd->t_data_sg;
+               data_nents = se_cmd->t_data_nents;
+       } else {
+               uint32_t count;
+
+               /*
+                * For bidi case, the first count blocks are for Data-Out
+                * buffer blocks, and before gathering the Data-In buffer
+                * the Data-Out buffer blocks should be discarded.
+                */
+               count = DIV_ROUND_UP(se_cmd->data_length, DATA_BLOCK_SIZE);
+               while (count--) {
+                       block = find_first_bit(bitmap, DATA_BLOCK_BITS);
+                       clear_bit(block, bitmap);
+               }
+
+               data_sg = se_cmd->t_bidi_data_sg;
+               data_nents = se_cmd->t_bidi_data_nents;
+       }
  
         for_each_sg(data_sg, sg, data_nents, i) {
                 int sg_remaining = sg->length;
                 to = kmap_atomic(sg_page(sg)) + sg->offset;
                 while (sg_remaining > 0) {
                         if (block_remaining == 0) {
-                               block = find_first_bit(cmd_bitmap,
+                               block = find_first_bit(bitmap,
                                                 DATA_BLOCK_BITS);
                                 block_remaining = DATA_BLOCK_SIZE;
-                               clear_bit(block, cmd_bitmap);
+                               clear_bit(block, bitmap);
                         }
                         copy_bytes = min_t(size_t, sg_remaining,
                                         block_remaining);
@@ -390,6 +420,27 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t d
         return true;
  }
  
+static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd)
+{
+       struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
+       size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE);
+
+       if (se_cmd->se_cmd_flags & SCF_BIDI) {
+               BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
+               data_length += round_up(se_cmd->t_bidi_data_sg->length,
+                               DATA_BLOCK_SIZE);
+       }
+
+       return data_length;
+}
+
+static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd)
+{
+       size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd);
+
+       return data_length / DATA_BLOCK_SIZE;
+}
+
  static sense_reason_t
  tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
  {
@@ -403,7 +454,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
         uint32_t cmd_head;
         uint64_t cdb_off;
         bool copy_to_data_area;
-       size_t data_length;
+       size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd);
         DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS);
  
         if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
@@ -417,8 +468,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
          * expensive to tell how many regions are freed in the bitmap
         */
         base_command_size = max(offsetof(struct tcmu_cmd_entry,
-                               req.iov[se_cmd->t_bidi_data_nents +
-                                       se_cmd->t_data_nents]),
+                               req.iov[tcmu_cmd_get_block_cnt(tcmu_cmd)]),
                                 sizeof(struct tcmu_cmd_entry));
         command_size = base_command_size
                 + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE);
@@ -429,11 +479,6 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
  
         mb = udev->mb_addr;
         cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
-       data_length = se_cmd->data_length;
-       if (se_cmd->se_cmd_flags & SCF_BIDI) {
-               BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
-               data_length += se_cmd->t_bidi_data_sg->length;
-       }
         if ((command_size > (udev->cmdr_size / 2)) ||
             data_length > udev->data_size) {
                 pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu "
@@ -451,7 +496,11 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
  
                 pr_debug("sleeping for ring space\n");
                 spin_unlock_irq(&udev->cmdr_lock);
-               ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
+               if (udev->cmd_time_out)
+                       ret = schedule_timeout(
+                                       msecs_to_jiffies(udev->cmd_time_out));
+               else
+                       ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
                 finish_wait(&udev->wait_cmdr, &__wait);
                 if (!ret) {
                         pr_warn("tcmu: command timed out\n");
@@ -503,11 +552,14 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
         entry->req.iov_dif_cnt = 0;
  
         /* Handle BIDI commands */
-       iov_cnt = 0;
-       alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg,
-               se_cmd->t_bidi_data_nents, &iov, &iov_cnt, false);
-       entry->req.iov_bidi_cnt = iov_cnt;
-
+       if (se_cmd->se_cmd_flags & SCF_BIDI) {
+               iov_cnt = 0;
+               iov++;
+               alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg,
+                               se_cmd->t_bidi_data_nents, &iov, &iov_cnt,
+                               false);
+               entry->req.iov_bidi_cnt = iov_cnt;
+       }
         /* cmd's data_bitmap is what changed in process */
         bitmap_xor(tcmu_cmd->data_bitmap, old_bitmap, udev->data_bitmap,
                         DATA_BLOCK_BITS);
@@ -526,8 +578,9 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
         /* TODO: only if FLUSH and FUA? */
         uio_event_notify(&udev->uio_info);
  
-       mod_timer(&udev->timeout,
-               round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT)));
+       if (udev->cmd_time_out)
+               mod_timer(&udev->timeout, round_jiffies_up(jiffies +
+                         msecs_to_jiffies(udev->cmd_time_out)));
  
         return TCM_NO_SENSE;
  }
@@ -583,19 +636,11 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
                                se_cmd->scsi_sense_length);
                 free_data_area(udev, cmd);
         } else if (se_cmd->se_cmd_flags & SCF_BIDI) {
-               DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
-
                 /* Get Data-In buffer before clean up */
-               bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
-               gather_data_area(udev, bitmap,
-                       se_cmd->t_bidi_data_sg, se_cmd->t_bidi_data_nents);
+               gather_data_area(udev, cmd, true);
                 free_data_area(udev, cmd);
         } else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
-               DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
-
-               bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
-               gather_data_area(udev, bitmap,
-                       se_cmd->t_data_sg, se_cmd->t_data_nents);
+               gather_data_area(udev, cmd, false);
                 free_data_area(udev, cmd);
         } else if (se_cmd->data_direction == DMA_TO_DEVICE) {
                 free_data_area(udev, cmd);
@@ -742,6 +787,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
         }
  
         udev->hba = hba;
+       udev->cmd_time_out = TCMU_TIME_OUT;
  
         init_waitqueue_head(&udev->wait_cmdr);
         spin_lock_init(&udev->cmdr_lock);
@@ -960,7 +1006,8 @@ static int tcmu_configure_device(struct se_device *dev)
         if (dev->dev_attrib.hw_block_size == 0)
                 dev->dev_attrib.hw_block_size = 512;
         /* Other attributes can be configured in userspace */
-       dev->dev_attrib.hw_max_sectors = 128;
+       if (!dev->dev_attrib.hw_max_sectors)
+               dev->dev_attrib.hw_max_sectors = 128;
         dev->dev_attrib.hw_queue_depth = 128;
  
         ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
@@ -997,6 +1044,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p)
         kfree(udev);
  }
  
+static bool tcmu_dev_configured(struct tcmu_dev *udev)
+{
+       return udev->uio_info.uio_dev ? true : false;
+}
+
  static void tcmu_free_device(struct se_device *dev)
  {
         struct tcmu_dev *udev = TCMU_DEV(dev);
@@ -1018,8 +1070,7 @@ static void tcmu_free_device(struct se_device *dev)
         spin_unlock_irq(&udev->commands_lock);
         WARN_ON(!all_expired);
  
-       /* Device was configured */
-       if (udev->uio_info.uio_dev) {
+       if (tcmu_dev_configured(udev)) {
                 tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
                                    udev->uio_info.uio_dev->minor);
  
@@ -1031,16 +1082,42 @@ static void tcmu_free_device(struct se_device *dev)
  }
  
  enum {
-       Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err,
+       Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
+       Opt_err,
  };
  
  static match_table_t tokens = {
         {Opt_dev_config, "dev_config=%s"},
         {Opt_dev_size, "dev_size=%u"},
         {Opt_hw_block_size, "hw_block_size=%u"},
+       {Opt_hw_max_sectors, "hw_max_sectors=%u"},
         {Opt_err, NULL}
  };
  
+static int tcmu_set_dev_attrib(substring_t *arg, u32 *dev_attrib)
+{
+       unsigned long tmp_ul;
+       char *arg_p;
+       int ret;
+
+       arg_p = match_strdup(arg);
+       if (!arg_p)
+               return -ENOMEM;
+
+       ret = kstrtoul(arg_p, 0, &tmp_ul);
+       kfree(arg_p);
+       if (ret < 0) {
+               pr_err("kstrtoul() failed for dev attrib\n");
+               return ret;
+       }
+       if (!tmp_ul) {
+               pr_err("dev attrib must be nonzero\n");
+               return -EINVAL;
+       }
+       *dev_attrib = tmp_ul;
+       return 0;
+}
+
  static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
                 const char *page, ssize_t count)
  {
@@ -1048,7 +1125,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
         char *orig, *ptr, *opts, *arg_p;
         substring_t args[MAX_OPT_ARGS];
         int ret = 0, token;
-       unsigned long tmp_ul;
  
         opts = kstrdup(page, GFP_KERNEL);
         if (!opts)
@@ -1082,26 +1158,19 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
                                 pr_err("kstrtoul() failed for dev_size=\n");
                         break;
                 case Opt_hw_block_size:
-                       arg_p = match_strdup(&args[0]);
-                       if (!arg_p) {
-                               ret = -ENOMEM;
-                               break;
-                       }
-                       ret = kstrtoul(arg_p, 0, &tmp_ul);
-                       kfree(arg_p);
-                       if (ret < 0) {
-                               pr_err("kstrtoul() failed for hw_block_size=\n");
-                               break;
-                       }
-                       if (!tmp_ul) {
-                               pr_err("hw_block_size must be nonzero\n");
-                               break;
-                       }
-                       dev->dev_attrib.hw_block_size = tmp_ul;
+                       ret = tcmu_set_dev_attrib(&args[0],
+                                       &(dev->dev_attrib.hw_block_size));
+                       break;
+               case Opt_hw_max_sectors:
+                       ret = tcmu_set_dev_attrib(&args[0],
+                                       &(dev->dev_attrib.hw_max_sectors));
                         break;
                 default:
                         break;
                 }
+
+               if (ret)
+                       break;
         }
  
         kfree(orig);
@@ -1134,7 +1203,43 @@ tcmu_parse_cdb(struct se_cmd *cmd)
         return passthrough_parse_cdb(cmd, tcmu_queue_cmd);
  }
  
-static const struct target_backend_ops tcmu_ops = {
+static ssize_t tcmu_cmd_time_out_show(struct config_item *item, char *page)
+{
+       struct se_dev_attrib *da = container_of(to_config_group(item),
+                                       struct se_dev_attrib, da_group);
+       struct tcmu_dev *udev = container_of(da->da_dev,
+                                       struct tcmu_dev, se_dev);
+
+       return snprintf(page, PAGE_SIZE, "%lu\n", udev->cmd_time_out / MSEC_PER_SEC);
+}
+
+static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *page,
+                                      size_t count)
+{
+       struct se_dev_attrib *da = container_of(to_config_group(item),
+                                       struct se_dev_attrib, da_group);
+       struct tcmu_dev *udev = container_of(da->da_dev,
+                                       struct tcmu_dev, se_dev);
+       u32 val;
+       int ret;
+
+       if (da->da_dev->export_count) {
+               pr_err("Unable to set tcmu cmd_time_out while exports exist\n");
+               return -EINVAL;
+       }
+
+       ret = kstrtou32(page, 0, &val);
+       if (ret < 0)
+               return ret;
+
+       udev->cmd_time_out = val * MSEC_PER_SEC;
+       return count;
+}
+CONFIGFS_ATTR(tcmu_, cmd_time_out);
+
+static struct configfs_attribute **tcmu_attrs;
+
+static struct target_backend_ops tcmu_ops = {
         .name                   = "user",
         .owner                  = THIS_MODULE,
         .transport_flags        = TRANSPORT_FLAG_PASSTHROUGH,
@@ -1148,12 +1253,12 @@ static const struct target_backend_ops tcmu_ops = {
         .show_configfs_dev_params = tcmu_show_configfs_dev_params,
         .get_device_type        = sbc_get_device_type,
         .get_blocks             = tcmu_get_blocks,
-       .tb_dev_attrib_attrs    = passthrough_attrib_attrs,
+       .tb_dev_attrib_attrs    = NULL,
  };
  
  static int __init tcmu_module_init(void)
  {
-       int ret;
+       int ret, i, len = 0;
  
         BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
  
@@ -1175,12 +1280,31 @@ static int __init tcmu_module_init(void)
                 goto out_unreg_device;
         }
  
+       for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
+               len += sizeof(struct configfs_attribute *);
+       }
+       len += sizeof(struct configfs_attribute *) * 2;
+
+       tcmu_attrs = kzalloc(len, GFP_KERNEL);
+       if (!tcmu_attrs) {
+               ret = -ENOMEM;
+               goto out_unreg_genl;
+       }
+
+       for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
+               tcmu_attrs[i] = passthrough_attrib_attrs[i];
+       }
+       tcmu_attrs[i] = &tcmu_attr_cmd_time_out;
+       tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs;
+
         ret = transport_backend_register(&tcmu_ops);
         if (ret)
-               goto out_unreg_genl;
+               goto out_attrs;
  
         return 0;
  
+out_attrs:
+       kfree(tcmu_attrs);
  out_unreg_genl:
         genl_unregister_family(&tcmu_genl_family);
  out_unreg_device:
@@ -1194,6 +1318,7 @@ out_free_cache:
  static void __exit tcmu_module_exit(void)
  {
         target_backend_unregister(&tcmu_ops);
+       kfree(tcmu_attrs);
         genl_unregister_family(&tcmu_genl_family);
         root_device_unregister(tcmu_root_device);
         kmem_cache_destroy(tcmu_cmd_cache);
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c

index 91048eeca28b2dc3d81f234ed60bbfdd796f8277..69d0f430b2d190756de94d4b6b6334d1af2dfa50 100644 (file)
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -107,8 +107,6 @@ struct cpufreq_cooling_device {
  };
  static DEFINE_IDA(cpufreq_ida);
  
-static unsigned int cpufreq_dev_count;
-
  static DEFINE_MUTEX(cooling_list_lock);
  static LIST_HEAD(cpufreq_dev_list);
  
@@ -395,13 +393,20 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
  
         opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
                                          true);
+       if (IS_ERR(opp)) {
+               dev_warn_ratelimited(cpufreq_device->cpu_dev,
+                                    "Failed to find OPP for frequency %lu: %ld\n",
+                                    freq_hz, PTR_ERR(opp));
+               return -EINVAL;
+       }
+
         voltage = dev_pm_opp_get_voltage(opp);
         dev_pm_opp_put(opp);
  
         if (voltage == 0) {
-               dev_warn_ratelimited(cpufreq_device->cpu_dev,
-                                    "Failed to get voltage for frequency %lu: %ld\n",
-                                    freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+               dev_err_ratelimited(cpufreq_device->cpu_dev,
+                                   "Failed to get voltage for frequency %lu\n",
+                                   freq_hz);
                 return -EINVAL;
         }
  
@@ -693,9 +698,9 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
  
         *state = cpufreq_cooling_get_level(cpu, target_freq);
         if (*state == THERMAL_CSTATE_INVALID) {
-               dev_warn_ratelimited(&cdev->device,
-                                    "Failed to convert %dKHz for cpu %d into a cdev state\n",
-                                    target_freq, cpu);
+               dev_err_ratelimited(&cdev->device,
+                                   "Failed to convert %dKHz for cpu %d into a cdev state\n",
+                                   target_freq, cpu);
                 return -EINVAL;
         }
  
@@ -771,6 +776,7 @@ __cpufreq_cooling_register(struct device_node *np,
         unsigned int freq, i, num_cpus;
         int ret;
         struct thermal_cooling_device_ops *cooling_ops;
+       bool first;
  
         if (!alloc_cpumask_var(&temp_mask, GFP_KERNEL))
                 return ERR_PTR(-ENOMEM);
@@ -874,13 +880,14 @@ __cpufreq_cooling_register(struct device_node *np,
         cpufreq_dev->cool_dev = cool_dev;
  
         mutex_lock(&cooling_list_lock);
+       /* Register the notifier for first cpufreq cooling device */
+       first = list_empty(&cpufreq_dev_list);
         list_add(&cpufreq_dev->node, &cpufreq_dev_list);
+       mutex_unlock(&cooling_list_lock);
  
-       /* Register the notifier for first cpufreq cooling device */
-       if (!cpufreq_dev_count++)
+       if (first)
                 cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
                                           CPUFREQ_POLICY_NOTIFIER);
-       mutex_unlock(&cooling_list_lock);
  
         goto put_policy;
  
@@ -1021,6 +1028,7 @@ EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
  void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
  {
         struct cpufreq_cooling_device *cpufreq_dev;
+       bool last;
  
         if (!cdev)
                 return;
@@ -1028,14 +1036,15 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
         cpufreq_dev = cdev->devdata;
  
         mutex_lock(&cooling_list_lock);
+       list_del(&cpufreq_dev->node);
         /* Unregister the notifier for the last cpufreq cooling device */
-       if (!--cpufreq_dev_count)
+       last = list_empty(&cpufreq_dev_list);
+       mutex_unlock(&cooling_list_lock);
+
+       if (last)
                 cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
                                             CPUFREQ_POLICY_NOTIFIER);
  
-       list_del(&cpufreq_dev->node);
-       mutex_unlock(&cooling_list_lock);
-
         thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
         ida_simple_remove(&cpufreq_ida, cpufreq_dev->id);
         kfree(cpufreq_dev->dyn_power_table);
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c

index 7743a78d472397bbf9a10c0b1ac2d7e4b163a6a6..4bf4ad58cffda0172a48138248f4b406790c58e8 100644 (file)
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -186,16 +186,22 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
                 return 0;
  
         opp = dev_pm_opp_find_freq_exact(dev, freq, true);
-       if (IS_ERR(opp) && (PTR_ERR(opp) == -ERANGE))
+       if (PTR_ERR(opp) == -ERANGE)
                 opp = dev_pm_opp_find_freq_exact(dev, freq, false);
  
+       if (IS_ERR(opp)) {
+               dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n",
+                                   freq, PTR_ERR(opp));
+               return 0;
+       }
+
         voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
         dev_pm_opp_put(opp);
  
         if (voltage == 0) {
-               dev_warn_ratelimited(dev,
-                                    "Failed to get voltage for frequency %lu: %ld\n",
-                                    freq, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+               dev_err_ratelimited(dev,
+                                   "Failed to get voltage for frequency %lu\n",
+                                   freq);
                 return 0;
         }
  
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c

index 1bacbc3b19a05cc7b685ddf93b14d5ca10d67acf..e94aea8c0d0535cbc05933bb02067263e62e8292 100644 (file)
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -114,7 +114,7 @@
  #define DEFAULT_TX_BUF_COUNT 3
  
  struct n_hdlc_buf {
-       struct n_hdlc_buf *link;
+       struct list_head  list_item;
         int               count;
         char              buf[1];
  };
@@ -122,8 +122,7 @@ struct n_hdlc_buf {
  #define        N_HDLC_BUF_SIZE (sizeof(struct n_hdlc_buf) + maxframe)
  
  struct n_hdlc_buf_list {
-       struct n_hdlc_buf *head;
-       struct n_hdlc_buf *tail;
+       struct list_head  list;
         int               count;
         spinlock_t        spinlock;
  };
@@ -136,7 +135,6 @@ struct n_hdlc_buf_list {
   * @backup_tty - TTY to use if tty gets closed
   * @tbusy - reentrancy flag for tx wakeup code
   * @woke_up - FIXME: describe this field
- * @tbuf - currently transmitting tx buffer
   * @tx_buf_list - list of pending transmit frame buffers
   * @rx_buf_list - list of received frame buffers
   * @tx_free_buf_list - list unused transmit frame buffers
@@ -149,7 +147,6 @@ struct n_hdlc {
         struct tty_struct       *backup_tty;
         int                     tbusy;
         int                     woke_up;
-       struct n_hdlc_buf       *tbuf;
         struct n_hdlc_buf_list  tx_buf_list;
         struct n_hdlc_buf_list  rx_buf_list;
         struct n_hdlc_buf_list  tx_free_buf_list;
@@ -159,6 +156,8 @@ struct n_hdlc {
  /*
   * HDLC buffer list manipulation functions
   */
+static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list,
+                                               struct n_hdlc_buf *buf);
  static void n_hdlc_buf_put(struct n_hdlc_buf_list *list,
                            struct n_hdlc_buf *buf);
  static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list);
@@ -208,16 +207,9 @@ static void flush_tx_queue(struct tty_struct *tty)
  {
         struct n_hdlc *n_hdlc = tty2n_hdlc(tty);
         struct n_hdlc_buf *buf;
-       unsigned long flags;
  
         while ((buf = n_hdlc_buf_get(&n_hdlc->tx_buf_list)))
                 n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, buf);
-       spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags);
-       if (n_hdlc->tbuf) {
-               n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, n_hdlc->tbuf);
-               n_hdlc->tbuf = NULL;
-       }
-       spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags);
  }
  
  static struct tty_ldisc_ops n_hdlc_ldisc = {
@@ -283,7 +275,6 @@ static void n_hdlc_release(struct n_hdlc *n_hdlc)
                 } else
                         break;
         }
-       kfree(n_hdlc->tbuf);
         kfree(n_hdlc);
         
  }      /* end of n_hdlc_release() */
@@ -402,13 +393,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
         n_hdlc->woke_up = 0;
         spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags);
  
-       /* get current transmit buffer or get new transmit */
-       /* buffer from list of pending transmit buffers */
-               
-       tbuf = n_hdlc->tbuf;
-       if (!tbuf)
-               tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list);
-               
+       tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list);
         while (tbuf) {
                 if (debuglevel >= DEBUG_LEVEL_INFO)     
                         printk("%s(%d)sending frame %p, count=%d\n",
@@ -420,7 +405,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
  
                 /* rollback was possible and has been done */
                 if (actual == -ERESTARTSYS) {
-                       n_hdlc->tbuf = tbuf;
+                       n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf);
                         break;
                 }
                 /* if transmit error, throw frame away by */
@@ -435,10 +420,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
                                         
                         /* free current transmit buffer */
                         n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, tbuf);
-                       
-                       /* this tx buffer is done */
-                       n_hdlc->tbuf = NULL;
-                       
+
                         /* wait up sleeping writers */
                         wake_up_interruptible(&tty->write_wait);
         
@@ -448,10 +430,12 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
                         if (debuglevel >= DEBUG_LEVEL_INFO)     
                                 printk("%s(%d)frame %p pending\n",
                                         __FILE__,__LINE__,tbuf);
-                                       
-                       /* buffer not accepted by driver */
-                       /* set this buffer as pending buffer */
-                       n_hdlc->tbuf = tbuf;
+
+                       /*
+                        * the buffer was not accepted by driver,
+                        * return it back into tx queue
+                        */
+                       n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf);
                         break;
                 }
         }
@@ -749,7 +733,8 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file,
         int error = 0;
         int count;
         unsigned long flags;
-       
+       struct n_hdlc_buf *buf = NULL;
+
         if (debuglevel >= DEBUG_LEVEL_INFO)     
                 printk("%s(%d)n_hdlc_tty_ioctl() called %d\n",
                         __FILE__,__LINE__,cmd);
@@ -763,8 +748,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file,
                 /* report count of read data available */
                 /* in next available frame (if any) */
                 spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags);
-               if (n_hdlc->rx_buf_list.head)
-                       count = n_hdlc->rx_buf_list.head->count;
+               buf = list_first_entry_or_null(&n_hdlc->rx_buf_list.list,
+                                               struct n_hdlc_buf, list_item);
+               if (buf)
+                       count = buf->count;
                 else
                         count = 0;
                 spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags);
@@ -776,8 +763,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file,
                 count = tty_chars_in_buffer(tty);
                 /* add size of next output frame in queue */
                 spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock,flags);
-               if (n_hdlc->tx_buf_list.head)
-                       count += n_hdlc->tx_buf_list.head->count;
+               buf = list_first_entry_or_null(&n_hdlc->tx_buf_list.list,
+                                               struct n_hdlc_buf, list_item);
+               if (buf)
+                       count += buf->count;
                 spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock,flags);
                 error = put_user(count, (int __user *)arg);
                 break;
@@ -825,14 +814,14 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp,
                 poll_wait(filp, &tty->write_wait, wait);
  
                 /* set bits for operations that won't block */
-               if (n_hdlc->rx_buf_list.head)
+               if (!list_empty(&n_hdlc->rx_buf_list.list))
                         mask |= POLLIN | POLLRDNORM;    /* readable */
                 if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
                         mask |= POLLHUP;
                 if (tty_hung_up_p(filp))
                         mask |= POLLHUP;
                 if (!tty_is_writelocked(tty) &&
-                               n_hdlc->tx_free_buf_list.head)
+                               !list_empty(&n_hdlc->tx_free_buf_list.list))
                         mask |= POLLOUT | POLLWRNORM;   /* writable */
         }
         return mask;
@@ -856,7 +845,12 @@ static struct n_hdlc *n_hdlc_alloc(void)
         spin_lock_init(&n_hdlc->tx_free_buf_list.spinlock);
         spin_lock_init(&n_hdlc->rx_buf_list.spinlock);
         spin_lock_init(&n_hdlc->tx_buf_list.spinlock);
-       
+
+       INIT_LIST_HEAD(&n_hdlc->rx_free_buf_list.list);
+       INIT_LIST_HEAD(&n_hdlc->tx_free_buf_list.list);
+       INIT_LIST_HEAD(&n_hdlc->rx_buf_list.list);
+       INIT_LIST_HEAD(&n_hdlc->tx_buf_list.list);
+
         /* allocate free rx buffer list */
         for(i=0;i<DEFAULT_RX_BUF_COUNT;i++) {
                 buf = kmalloc(N_HDLC_BUF_SIZE, GFP_KERNEL);
@@ -883,54 +877,66 @@ static struct n_hdlc *n_hdlc_alloc(void)
         
  }      /* end of n_hdlc_alloc() */
  
+/**
+ * n_hdlc_buf_return - put the HDLC buffer after the head of the specified list
+ * @buf_list - pointer to the buffer list
+ * @buf - pointer to the buffer
+ */
+static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list,
+                                               struct n_hdlc_buf *buf)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&buf_list->spinlock, flags);
+
+       list_add(&buf->list_item, &buf_list->list);
+       buf_list->count++;
+
+       spin_unlock_irqrestore(&buf_list->spinlock, flags);
+}
+
  /**
   * n_hdlc_buf_put - add specified HDLC buffer to tail of specified list
- * @list - pointer to buffer list
+ * @buf_list - pointer to buffer list
   * @buf        - pointer to buffer
   */
-static void n_hdlc_buf_put(struct n_hdlc_buf_list *list,
+static void n_hdlc_buf_put(struct n_hdlc_buf_list *buf_list,
                            struct n_hdlc_buf *buf)
  {
         unsigned long flags;
-       spin_lock_irqsave(&list->spinlock,flags);
-       
-       buf->link=NULL;
-       if (list->tail)
-               list->tail->link = buf;
-       else
-               list->head = buf;
-       list->tail = buf;
-       (list->count)++;
-       
-       spin_unlock_irqrestore(&list->spinlock,flags);
-       
+
+       spin_lock_irqsave(&buf_list->spinlock, flags);
+
+       list_add_tail(&buf->list_item, &buf_list->list);
+       buf_list->count++;
+
+       spin_unlock_irqrestore(&buf_list->spinlock, flags);
  }      /* end of n_hdlc_buf_put() */
  
  /**
   * n_hdlc_buf_get - remove and return an HDLC buffer from list
- * @list - pointer to HDLC buffer list
+ * @buf_list - pointer to HDLC buffer list
   * 
   * Remove and return an HDLC buffer from the head of the specified HDLC buffer
   * list.
   * Returns a pointer to HDLC buffer if available, otherwise %NULL.
   */
-static struct n_hdlc_buf* n_hdlc_buf_get(struct n_hdlc_buf_list *list)
+static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *buf_list)
  {
         unsigned long flags;
         struct n_hdlc_buf *buf;
-       spin_lock_irqsave(&list->spinlock,flags);
-       
-       buf = list->head;
+
+       spin_lock_irqsave(&buf_list->spinlock, flags);
+
+       buf = list_first_entry_or_null(&buf_list->list,
+                                               struct n_hdlc_buf, list_item);
         if (buf) {
-               list->head = buf->link;
-               (list->count)--;
+               list_del(&buf->list_item);
+               buf_list->count--;
         }
-       if (!list->head)
-               list->tail = NULL;
-       
-       spin_unlock_irqrestore(&list->spinlock,flags);
+
+       spin_unlock_irqrestore(&buf_list->spinlock, flags);
         return buf;
-       
  }      /* end of n_hdlc_buf_get() */
  
  static char hdlc_banner[] __initdata =
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c

index 6ee55a2d47bb429f73cf9d2b2711b109f6746a3d..e65808c482f1847d09d819a24defb0e5cf6508b5 100644 (file)
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -257,7 +257,7 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
  {
         unsigned int baud = tty_termios_baud_rate(termios);
         struct dw8250_data *d = p->private_data;
-       unsigned int rate;
+       long rate;
         int ret;
  
         if (IS_ERR(d->clk) || !old)
@@ -265,7 +265,12 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
  
         clk_disable_unprepare(d->clk);
         rate = clk_round_rate(d->clk, baud * 16);
-       ret = clk_set_rate(d->clk, rate);
+       if (rate < 0)
+               ret = rate;
+       else if (rate == 0)
+               ret = -ENOENT;
+       else
+               ret = clk_set_rate(d->clk, rate);
         clk_prepare_enable(d->clk);
  
         if (!ret)
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig

index a65fb8197aecb6af029bc4469d5920618dd39359..0e3f529d50e9d07bd684c92ddda4878ac553b9e7 100644 (file)
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -128,9 +128,13 @@ config SERIAL_8250_PCI
           by the parport_serial driver, enabled with CONFIG_PARPORT_SERIAL.
  
  config SERIAL_8250_EXAR
-        tristate "8250/16550 PCI device support"
-        depends on SERIAL_8250_PCI
+       tristate "8250/16550 Exar/Commtech PCI/PCIe device support"
+       depends on SERIAL_8250_PCI
         default SERIAL_8250
+       help
+         This builds support for XR17C1xx, XR17V3xx and some Commtech
+         422x PCIe serial cards that are not covered by the more generic
+         SERIAL_8250_PCI option.
  
  config SERIAL_8250_HP300
         tristate
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c

index 8789ea423ccfd1054d9d5433538d0a8e466525dd..b0a377725d636c11cddad62470d821a31f1fff96 100644 (file)
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2373,7 +2373,7 @@ static int __init pl011_console_match(struct console *co, char *name, int idx,
         if (strcmp(name, "qdf2400_e44") == 0) {
                 pr_info_once("UART: Working around QDF2400 SoC erratum 44");
                 qdf2400_e44_present = true;
-       } else if (strcmp(name, "pl011") != 0 || strcmp(name, "ttyAMA") != 0) {
+       } else if (strcmp(name, "pl011") != 0) {
                 return -ENODEV;
         }
  
@@ -2452,18 +2452,37 @@ static void pl011_early_write(struct console *con, const char *s, unsigned n)
         uart_console_write(&dev->port, s, n, pl011_putc);
  }
  
+/*
+ * On non-ACPI systems, earlycon is enabled by specifying
+ * "earlycon=pl011,<address>" on the kernel command line.
+ *
+ * On ACPI ARM64 systems, an "early" console is enabled via the SPCR table,
+ * by specifying only "earlycon" on the command line.  Because it requires
+ * SPCR, the console starts after ACPI is parsed, which is later than a
+ * traditional early console.
+ *
+ * To get the traditional early console that starts before ACPI is parsed,
+ * specify the full "earlycon=pl011,<address>" option.
+ */
  static int __init pl011_early_console_setup(struct earlycon_device *device,
                                             const char *opt)
  {
         if (!device->port.membase)
                 return -ENODEV;
  
-       device->con->write = qdf2400_e44_present ?
-                               qdf2400_e44_early_write : pl011_early_write;
+       /* On QDF2400 SOCs affected by Erratum 44, the "qdf2400_e44" must
+        * also be specified, e.g. "earlycon=pl011,<address>,qdf2400_e44".
+        */
+       if (!strcmp(device->options, "qdf2400_e44"))
+               device->con->write = qdf2400_e44_early_write;
+       else
+               device->con->write = pl011_early_write;
+
         return 0;
  }
  OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup);
  OF_EARLYCON_DECLARE(pl011, "arm,sbsa-uart", pl011_early_console_setup);
+EARLYCON_DECLARE(qdf2400_e44, pl011_early_console_setup);
  
  #else
  #define AMBA_CONSOLE   NULL
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c

index dcebb28ffbc412d5282ef32b7f3c133ff984ab46..1f50a83ef958609c3f27473b135e84e65303330d 100644 (file)
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -1951,6 +1951,11 @@ static void atmel_flush_buffer(struct uart_port *port)
                 atmel_uart_writel(port, ATMEL_PDC_TCR, 0);
                 atmel_port->pdc_tx.ofs = 0;
         }
+       /*
+        * in uart_flush_buffer(), the xmit circular buffer has just
+        * been cleared, so we have to reset tx_len accordingly.
+        */
+       atmel_port->tx_len = 0;
  }
  
  /*
@@ -2483,6 +2488,9 @@ static void atmel_console_write(struct console *co, const char *s, u_int count)
         pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN;
         atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS);
  
+       /* Make sure that tx path is actually able to send characters */
+       atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN);
+
         uart_console_write(port, s, count, atmel_console_putchar);
  
         /*
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c

index 6989b227d1349baeb0cb05ed9f7cff8572e36324..be94246b6fcca1874161470035b9d9bd91237f88 100644 (file)
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1088,7 +1088,7 @@ static void mxs_auart_settermios(struct uart_port *u,
                                         AUART_LINECTRL_BAUD_DIV_MAX);
                 baud_max = u->uartclk * 32 / AUART_LINECTRL_BAUD_DIV_MIN;
                 baud = uart_get_baud_rate(u, termios, old, baud_min, baud_max);
-               div = u->uartclk * 32 / baud;
+               div = DIV_ROUND_CLOSEST(u->uartclk * 32, baud);
         }
  
         ctrl |= AUART_LINECTRL_BAUD_DIVFRAC(div & 0x3F);
diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c

index b4f86c219db1e0f46047471ee47588f7a1cd0567..7a17aedbf902e05034129a832941f27fe5dc38c8 100644 (file)
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -1031,8 +1031,10 @@ static int s3c64xx_serial_startup(struct uart_port *port)
         if (ourport->dma) {
                 ret = s3c24xx_serial_request_dma(ourport);
                 if (ret < 0) {
-                       dev_warn(port->dev, "DMA request failed\n");
-                       return ret;
+                       dev_warn(port->dev,
+                                "DMA request failed, DMA will not be used\n");
+                       devm_kfree(port->dev, ourport->dma);
+                       ourport->dma = NULL;
                 }
         }
  
diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c

index bcf1d33e6ffe0b3cb9952e88046658df1358543b..c334bcc59c649eedc2933ac29c4dc1ef45ae21d2 100644 (file)
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -575,12 +575,13 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios,
                         pinctrl_select_state(ascport->pinctrl,
                                              ascport->states[NO_HW_FLOWCTRL]);
  
-                       gpiod = devm_get_gpiod_from_child(port->dev, "rts",
-                                                         &np->fwnode);
-                       if (!IS_ERR(gpiod)) {
-                               gpiod_direction_output(gpiod, 0);
+                       gpiod = devm_fwnode_get_gpiod_from_child(port->dev,
+                                                                "rts",
+                                                                &np->fwnode,
+                                                                GPIOD_OUT_LOW,
+                                                                np->name);
+                       if (!IS_ERR(gpiod))
                                 ascport->rts = gpiod;
-                       }
                 }
         }
  
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c

index 68947f6de5ad6339adea804182597229c3eb1d38..e4603b09863a8fa5ffd4467e81a463538f0533bb 100644 (file)
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -271,10 +271,13 @@ const struct file_operations tty_ldiscs_proc_fops = {
  
  struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
  {
+       struct tty_ldisc *ld;
+
         ldsem_down_read(&tty->ldisc_sem, MAX_SCHEDULE_TIMEOUT);
-       if (!tty->ldisc)
+       ld = tty->ldisc;
+       if (!ld)
                 ldsem_up_read(&tty->ldisc_sem);
-       return tty->ldisc;
+       return ld;
  }
  EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
  
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c

index c5f0fc906136b580b23df4e3708fd04d2c01e27b..8af8d9542663379ef1367ceb871f4753314bc984 100644 (file)
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -28,7 +28,6 @@
  #include <linux/module.h>
  #include <linux/sched/signal.h>
  #include <linux/sched/debug.h>
-#include <linux/sched/debug.h>
  #include <linux/tty.h>
  #include <linux/tty_flip.h>
  #include <linux/mm.h>
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c

index f03692ec552056845c6fa50947e38abca47ea66b..8fb309a0ff6b5dae0c6f867cd323585aeff17252 100644 (file)
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1381,7 +1381,7 @@ static int usbtmc_probe(struct usb_interface *intf,
  
         dev_dbg(&intf->dev, "%s called\n", __func__);
  
-       data = kmalloc(sizeof(*data), GFP_KERNEL);
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
         if (!data)
                 return -ENOMEM;
  
@@ -1444,6 +1444,13 @@ static int usbtmc_probe(struct usb_interface *intf,
                         break;
                 }
         }
+
+       if (!data->bulk_out || !data->bulk_in) {
+               dev_err(&intf->dev, "bulk endpoints not found\n");
+               retcode = -ENODEV;
+               goto err_put;
+       }
+
         /* Find int endpoint */
         for (n = 0; n < iface_desc->desc.bNumEndpoints; n++) {
                 endpoint = &iface_desc->endpoint[n].desc;
@@ -1469,8 +1476,10 @@ static int usbtmc_probe(struct usb_interface *intf,
         if (data->iin_ep_present) {
                 /* allocate int urb */
                 data->iin_urb = usb_alloc_urb(0, GFP_KERNEL);
-               if (!data->iin_urb)
+               if (!data->iin_urb) {
+                       retcode = -ENOMEM;
                         goto error_register;
+               }
  
                 /* Protect interrupt in endpoint data until iin_urb is freed */
                 kref_get(&data->kref);
@@ -1478,8 +1487,10 @@ static int usbtmc_probe(struct usb_interface *intf,
                 /* allocate buffer for interrupt in */
                 data->iin_buffer = kmalloc(data->iin_wMaxPacketSize,
                                         GFP_KERNEL);
-               if (!data->iin_buffer)
+               if (!data->iin_buffer) {
+                       retcode = -ENOMEM;
                         goto error_register;
+               }
  
                 /* fill interrupt urb */
                 usb_fill_int_urb(data->iin_urb, data->usb_dev,
@@ -1512,6 +1523,7 @@ error_register:
         sysfs_remove_group(&intf->dev.kobj, &capability_attr_grp);
         sysfs_remove_group(&intf->dev.kobj, &data_attr_grp);
         usbtmc_free_int(data);
+err_put:
         kref_put(&data->kref, usbtmc_delete);
         return retcode;
  }
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c

index 25dbd8c7aec73345d357c2a75ff0cde26c918217..4be52c602e9b7a7de6a76ecb4be686cb12ec9950 100644 (file)
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -280,6 +280,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
  
                         /*
                          * Adjust bInterval for quirked devices.
+                        */
+                       /*
+                        * This quirk fixes bIntervals reported in ms.
+                        */
+                       if (to_usb_device(ddev)->quirks &
+                               USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) {
+                               n = clamp(fls(d->bInterval) + 3, i, j);
+                               i = j = n;
+                       }
+                       /*
                          * This quirk fixes bIntervals reported in
                          * linear microframes.
                          */
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c

index 612fab6e54fb84f2d84372d202c77823fc3be921..79bdca5cb9c7ae8f01b1f4a25c7ceacd9c716e2c 100644 (file)
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -520,8 +520,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
          */
         tbuf_size =  max_t(u16, sizeof(struct usb_hub_descriptor), wLength);
         tbuf = kzalloc(tbuf_size, GFP_KERNEL);
-       if (!tbuf)
-               return -ENOMEM;
+       if (!tbuf) {
+               status = -ENOMEM;
+               goto err_alloc;
+       }
  
         bufp = tbuf;
  
@@ -734,6 +736,7 @@ error:
         }
  
         kfree(tbuf);
+ err_alloc:
  
         /* any errors get returned through the urb completion */
         spin_lock_irq(&hcd_root_hub_lock);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c

index f0dd08198d7426b9973bb676bca12b8cb76d7e7b..5286bf67869a83e1d7e1d3f1ca0ebc87db5cf7a4 100644 (file)
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4275,7 +4275,7 @@ static void hub_set_initial_usb2_lpm_policy(struct usb_device *udev)
         struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent);
         int connect_type = USB_PORT_CONNECT_TYPE_UNKNOWN;
  
-       if (!udev->usb2_hw_lpm_capable)
+       if (!udev->usb2_hw_lpm_capable || !udev->bos)
                 return;
  
         if (hub)
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c

index 24f9f98968a5d860f83920287a5b7deb4c98bed6..96b21b0dac1e8c15fb20c19c85d13a58ab95b285 100644 (file)
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -170,6 +170,14 @@ static const struct usb_device_id usb_quirk_list[] = {
         /* M-Systems Flash Disk Pioneers */
         { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME },
  
+       /* Baum Vario Ultra */
+       { USB_DEVICE(0x0904, 0x6101), .driver_info =
+                       USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
+       { USB_DEVICE(0x0904, 0x6102), .driver_info =
+                       USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
+       { USB_DEVICE(0x0904, 0x6103), .driver_info =
+                       USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
+
         /* Keytouch QWERTY Panel keyboard */
         { USB_DEVICE(0x0926, 0x3333), .driver_info =
                         USB_QUIRK_CONFIG_INTF_STRINGS },
diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c

index 2092e46b1380e91712f3aab90434ba157dde809e..f8d0747810e78d7cc7930fed1b71cfc9c5aeb048 100644 (file)
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -250,6 +250,7 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap,
                 val = dwc3_omap_read_utmi_ctrl(omap);
                 val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG;
                 dwc3_omap_write_utmi_ctrl(omap, val);
+               break;
  
         case OMAP_DWC3_VBUS_OFF:
                 val = dwc3_omap_read_utmi_ctrl(omap);
@@ -392,7 +393,7 @@ static void dwc3_omap_set_utmi_mode(struct dwc3_omap *omap)
  {
         u32                     reg;
         struct device_node      *node = omap->dev->of_node;
-       int                     utmi_mode = 0;
+       u32                     utmi_mode = 0;
  
         reg = dwc3_omap_read_utmi_ctrl(omap);
  
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c

index 4db97ecae8859ba0bc03c91c4948fc7af205390f..79e7a3480d51b07abf3988a51f1790f6caec7ca3 100644 (file)
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -171,6 +171,7 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
                 int status)
  {
         struct dwc3                     *dwc = dep->dwc;
+       unsigned int                    unmap_after_complete = false;
  
         req->started = false;
         list_del(&req->list);
@@ -180,11 +181,19 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
         if (req->request.status == -EINPROGRESS)
                 req->request.status = status;
  
-       if (dwc->ep0_bounced && dep->number <= 1)
+       /*
+        * NOTICE we don't want to unmap before calling ->complete() if we're
+        * dealing with a bounced ep0 request. If we unmap it here, we would end
+        * up overwritting the contents of req->buf and this could confuse the
+        * gadget driver.
+        */
+       if (dwc->ep0_bounced && dep->number <= 1) {
                 dwc->ep0_bounced = false;
-
-       usb_gadget_unmap_request_by_dev(dwc->sysdev,
-                       &req->request, req->direction);
+               unmap_after_complete = true;
+       } else {
+               usb_gadget_unmap_request_by_dev(dwc->sysdev,
+                               &req->request, req->direction);
+       }
  
         trace_dwc3_gadget_giveback(req);
  
@@ -192,6 +201,10 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
         usb_gadget_giveback_request(&dep->endpoint, &req->request);
         spin_lock(&dwc->lock);
  
+       if (unmap_after_complete)
+               usb_gadget_unmap_request_by_dev(dwc->sysdev,
+                               &req->request, req->direction);
+
         if (dep->number > 1)
                 pm_runtime_put(dwc->dev);
  }
@@ -1342,6 +1355,68 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
                 if (r == req) {
                         /* wait until it is processed */
                         dwc3_stop_active_transfer(dwc, dep->number, true);
+
+                       /*
+                        * If request was already started, this means we had to
+                        * stop the transfer. With that we also need to ignore
+                        * all TRBs used by the request, however TRBs can only
+                        * be modified after completion of END_TRANSFER
+                        * command. So what we do here is that we wait for
+                        * END_TRANSFER completion and only after that, we jump
+                        * over TRBs by clearing HWO and incrementing dequeue
+                        * pointer.
+                        *
+                        * Note that we have 2 possible types of transfers here:
+                        *
+                        * i) Linear buffer request
+                        * ii) SG-list based request
+                        *
+                        * SG-list based requests will have r->num_pending_sgs
+                        * set to a valid number (> 0). Linear requests,
+                        * normally use a single TRB.
+                        *
+                        * For each of these two cases, if r->unaligned flag is
+                        * set, one extra TRB has been used to align transfer
+                        * size to wMaxPacketSize.
+                        *
+                        * All of these cases need to be taken into
+                        * consideration so we don't mess up our TRB ring
+                        * pointers.
+                        */
+                       wait_event_lock_irq(dep->wait_end_transfer,
+                                       !(dep->flags & DWC3_EP_END_TRANSFER_PENDING),
+                                       dwc->lock);
+
+                       if (!r->trb)
+                               goto out1;
+
+                       if (r->num_pending_sgs) {
+                               struct dwc3_trb *trb;
+                               int i = 0;
+
+                               for (i = 0; i < r->num_pending_sgs; i++) {
+                                       trb = r->trb + i;
+                                       trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+                                       dwc3_ep_inc_deq(dep);
+                               }
+
+                               if (r->unaligned) {
+                                       trb = r->trb + r->num_pending_sgs + 1;
+                                       trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+                                       dwc3_ep_inc_deq(dep);
+                               }
+                       } else {
+                               struct dwc3_trb *trb = r->trb;
+
+                               trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+                               dwc3_ep_inc_deq(dep);
+
+                               if (r->unaligned) {
+                                       trb = r->trb + 1;
+                                       trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+                                       dwc3_ep_inc_deq(dep);
+                               }
+                       }
                         goto out1;
                 }
                 dev_err(dwc->dev, "request %p was not queued to %s\n",
@@ -1352,6 +1427,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
  
  out1:
         /* giveback the request */
+       dep->queued_requests--;
         dwc3_gadget_giveback(dep, req, -ECONNRESET);
  
  out0:
@@ -2126,12 +2202,12 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
                 return 1;
         }
  
-       if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
-               return 1;
-
         count = trb->size & DWC3_TRB_SIZE_MASK;
         req->remaining += count;
  
+       if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
+               return 1;
+
         if (dep->direction) {
                 if (count) {
                         trb_status = DWC3_TRB_SIZE_TRBSTS(trb->size);
@@ -3228,15 +3304,10 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
  
  int dwc3_gadget_suspend(struct dwc3 *dwc)
  {
-       int ret;
-
         if (!dwc->gadget_driver)
                 return 0;
  
-       ret = dwc3_gadget_run_stop(dwc, false, false);
-       if (ret < 0)
-               return ret;
-
+       dwc3_gadget_run_stop(dwc, false, false);
         dwc3_disconnect_gadget(dwc);
         __dwc3_gadget_stop(dwc);
  
diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h

index 3129bcf74d7d8de7ffe8d23923a7ba34d4e5cff9..265e223ab64554f6be78d37d15e36a86bb571c74 100644 (file)
--- a/drivers/usb/dwc3/gadget.h
+++ b/drivers/usb/dwc3/gadget.h
@@ -28,23 +28,23 @@ struct dwc3;
  #define gadget_to_dwc(g)       (container_of(g, struct dwc3, gadget))
  
  /* DEPCFG parameter 1 */
-#define DWC3_DEPCFG_INT_NUM(n)         ((n) << 0)
+#define DWC3_DEPCFG_INT_NUM(n)         (((n) & 0x1f) << 0)
  #define DWC3_DEPCFG_XFER_COMPLETE_EN   (1 << 8)
  #define DWC3_DEPCFG_XFER_IN_PROGRESS_EN        (1 << 9)
  #define DWC3_DEPCFG_XFER_NOT_READY_EN  (1 << 10)
  #define DWC3_DEPCFG_FIFO_ERROR_EN      (1 << 11)
  #define DWC3_DEPCFG_STREAM_EVENT_EN    (1 << 13)
-#define DWC3_DEPCFG_BINTERVAL_M1(n)    ((n) << 16)
+#define DWC3_DEPCFG_BINTERVAL_M1(n)    (((n) & 0xff) << 16)
  #define DWC3_DEPCFG_STREAM_CAPABLE     (1 << 24)
-#define DWC3_DEPCFG_EP_NUMBER(n)       ((n) << 25)
+#define DWC3_DEPCFG_EP_NUMBER(n)       (((n) & 0x1f) << 25)
  #define DWC3_DEPCFG_BULK_BASED         (1 << 30)
  #define DWC3_DEPCFG_FIFO_BASED         (1 << 31)
  
  /* DEPCFG parameter 0 */
-#define DWC3_DEPCFG_EP_TYPE(n)         ((n) << 1)
-#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) ((n) << 3)
-#define DWC3_DEPCFG_FIFO_NUMBER(n)     ((n) << 17)
-#define DWC3_DEPCFG_BURST_SIZE(n)      ((n) << 22)
+#define DWC3_DEPCFG_EP_TYPE(n)         (((n) & 0x3) << 1)
+#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) (((n) & 0x7ff) << 3)
+#define DWC3_DEPCFG_FIFO_NUMBER(n)     (((n) & 0x1f) << 17)
+#define DWC3_DEPCFG_BURST_SIZE(n)      (((n) & 0xf) << 22)
  #define DWC3_DEPCFG_DATA_SEQ_NUM(n)    ((n) << 26)
  /* This applies for core versions earlier than 1.94a */
  #define DWC3_DEPCFG_IGN_SEQ_NUM                (1 << 31)
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c

index 78c44979dde382ca8c58e9f9f84024180ce9663d..cbff3b02840df901ca0ca03c646b14f5f6085719 100644 (file)
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -269,6 +269,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct config_item *item,
                 ret = unregister_gadget(gi);
                 if (ret)
                         goto err;
+               kfree(name);
         } else {
                 if (gi->composite.gadget_driver.udc_name) {
                         ret = -EBUSY;
diff --git a/drivers/usb/gadget/function/f_acm.c b/drivers/usb/gadget/function/f_acm.c

index a30766ca422644ce91be7660b8822b4a7357f7f3..5e3828d9dac7f3af922456d141191ddd0733bbaf 100644 (file)
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -535,13 +535,15 @@ static int acm_notify_serial_state(struct f_acm *acm)
  {
         struct usb_composite_dev *cdev = acm->port.func.config->cdev;
         int                     status;
+       __le16                  serial_state;
  
         spin_lock(&acm->lock);
         if (acm->notify_req) {
                 dev_dbg(&cdev->gadget->dev, "acm ttyGS%d serial state %04x\n",
                         acm->port_num, acm->serial_state);
+               serial_state = cpu_to_le16(acm->serial_state);
                 status = acm_cdc_notify(acm, USB_CDC_NOTIFY_SERIAL_STATE,
-                               0, &acm->serial_state, sizeof(acm->serial_state));
+                               0, &serial_state, sizeof(acm->serial_state));
         } else {
                 acm->pending = true;
                 status = 0;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c

index a5b7cd6156987a80ad74ba73bde16a30de4e2bf9..a0085571824d9b4352c7245625a30c3248d789f5 100644 (file)
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1834,11 +1834,14 @@ static int ffs_func_eps_enable(struct ffs_function *func)
         spin_lock_irqsave(&func->ffs->eps_lock, flags);
         while(count--) {
                 struct usb_endpoint_descriptor *ds;
+               struct usb_ss_ep_comp_descriptor *comp_desc = NULL;
+               int needs_comp_desc = false;
                 int desc_idx;
  
-               if (ffs->gadget->speed == USB_SPEED_SUPER)
+               if (ffs->gadget->speed == USB_SPEED_SUPER) {
                         desc_idx = 2;
-               else if (ffs->gadget->speed == USB_SPEED_HIGH)
+                       needs_comp_desc = true;
+               } else if (ffs->gadget->speed == USB_SPEED_HIGH)
                         desc_idx = 1;
                 else
                         desc_idx = 0;
@@ -1855,6 +1858,14 @@ static int ffs_func_eps_enable(struct ffs_function *func)
  
                 ep->ep->driver_data = ep;
                 ep->ep->desc = ds;
+
+               comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds +
+                               USB_DT_ENDPOINT_SIZE);
+               ep->ep->maxburst = comp_desc->bMaxBurst + 1;
+
+               if (needs_comp_desc)
+                       ep->ep->comp_desc = comp_desc;
+
                 ret = usb_ep_enable(ep->ep);
                 if (likely(!ret)) {
                         epfile->ep = ep;
@@ -2253,7 +2264,7 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type,
  
                 if (len < sizeof(*d) ||
                     d->bFirstInterfaceNumber >= ffs->interfaces_count ||
-                   d->Reserved1)
+                   !d->Reserved1)
                         return -EINVAL;
                 for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i)
                         if (d->Reserved2[i])
diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c

index 89b48bcc377a16d426d6f5826a2e1ce5301124b6..5eea44823ca06d06955eb2bc51782cac6cd345ec 100644 (file)
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -367,7 +367,7 @@ try_again:
         count  = min_t(unsigned, count, hidg->report_length);
  
         spin_unlock_irqrestore(&hidg->write_spinlock, flags);
-       status = copy_from_user(hidg->req->buf, buffer, count);
+       status = copy_from_user(req->buf, buffer, count);
  
         if (status != 0) {
                 ERROR(hidg->func.config->cdev,
@@ -378,9 +378,9 @@ try_again:
  
         spin_lock_irqsave(&hidg->write_spinlock, flags);
  
-       /* we our function has been disabled by host */
+       /* when our function has been disabled by host */
         if (!hidg->req) {
-               free_ep_req(hidg->in_ep, hidg->req);
+               free_ep_req(hidg->in_ep, req);
                 /*
                  * TODO
                  * Should we fail with error here?
@@ -394,7 +394,7 @@ try_again:
         req->complete = f_hidg_req_complete;
         req->context  = hidg;
  
-       status = usb_ep_queue(hidg->in_ep, hidg->req, GFP_ATOMIC);
+       status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC);
         if (status < 0) {
                 ERROR(hidg->func.config->cdev,
                         "usb_ep_queue error on int endpoint %zd\n", status);
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c

index d2351139342f6200209078e769e04f5ea1eb2d1f..a82e2bd5ea34d97996cb79ba5b72d09aed86e09a 100644 (file)
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -373,7 +373,7 @@ static void bot_cleanup_old_alt(struct f_uas *fu)
         usb_ep_free_request(fu->ep_in, fu->bot_req_in);
         usb_ep_free_request(fu->ep_out, fu->bot_req_out);
         usb_ep_free_request(fu->ep_out, fu->cmd.req);
-       usb_ep_free_request(fu->ep_out, fu->bot_status.req);
+       usb_ep_free_request(fu->ep_in, fu->bot_status.req);
  
         kfree(fu->cmd.buf);
  
diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c

index 27ed51b5082f66de17c41761f260a4b96dcc0f33..f8a1881609a2c808f690f6d31ea6bcaf118b8bb4 100644 (file)
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -258,13 +258,6 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
         memcpy(&uvc_event->req, ctrl, sizeof(uvc_event->req));
         v4l2_event_queue(&uvc->vdev, &v4l2_event);
  
-       /* Pass additional setup data to userspace */
-       if (uvc->event_setup_out && uvc->event_length) {
-               uvc->control_req->length = uvc->event_length;
-               return usb_ep_queue(uvc->func.config->cdev->gadget->ep0,
-                       uvc->control_req, GFP_ATOMIC);
-       }
-
         return 0;
  }
  
@@ -601,6 +594,14 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
         opts->streaming_maxpacket = clamp(opts->streaming_maxpacket, 1U, 3072U);
         opts->streaming_maxburst = min(opts->streaming_maxburst, 15U);
  
+       /* For SS, wMaxPacketSize has to be 1024 if bMaxBurst is not 0 */
+       if (opts->streaming_maxburst &&
+           (opts->streaming_maxpacket % 1024) != 0) {
+               opts->streaming_maxpacket = roundup(opts->streaming_maxpacket, 1024);
+               INFO(cdev, "overriding streaming_maxpacket to %d\n",
+                    opts->streaming_maxpacket);
+       }
+
         /* Fill in the FS/HS/SS Video Streaming specific descriptors from the
          * module parameters.
          *
@@ -632,7 +633,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
         uvc_ss_streaming_comp.bMaxBurst = opts->streaming_maxburst;
         uvc_ss_streaming_comp.wBytesPerInterval =
                 cpu_to_le16(max_packet_size * max_packet_mult *
-                           opts->streaming_maxburst);
+                           (opts->streaming_maxburst + 1));
  
         /* Allocate endpoints. */
         ep = usb_ep_autoconfig(cdev->gadget, &uvc_control_ep);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c

index a2615d64d07c1967d7cd2c25ab2e046747f6bd7d..a2c916869293720e378ced6b265532846eca52a3 100644 (file)
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -84,8 +84,7 @@ static int ep_open(struct inode *, struct file *);
  
  /* /dev/gadget/$CHIP represents ep0 and the whole device */
  enum ep0_state {
-       /* DISBLED is the initial state.
-        */
+       /* DISABLED is the initial state. */
         STATE_DEV_DISABLED = 0,
  
         /* Only one open() of /dev/gadget/$CHIP; only one file tracks
@@ -1782,8 +1781,10 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
  
         spin_lock_irq (&dev->lock);
         value = -EINVAL;
-       if (dev->buf)
+       if (dev->buf) {
+               kfree(kbuf);
                 goto fail;
+       }
         dev->buf = kbuf;
  
         /* full or low speed config */
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c

index 11bbce28bc231b701bef74cc38b99e5dbd3cc6ca..2035906b8ced173c2e869a3272334d4265acf79c 100644 (file)
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -610,7 +610,7 @@ usba_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
  {
         struct usba_ep *ep = to_usba_ep(_ep);
         struct usba_udc *udc = ep->udc;
-       unsigned long flags, ept_cfg, maxpacket;
+       unsigned long flags, maxpacket;
         unsigned int nr_trans;
  
         DBG(DBG_GADGET, "%s: ep_enable: desc=%p\n", ep->ep.name, desc);
@@ -630,7 +630,7 @@ usba_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
         ep->is_in = 0;
  
         DBG(DBG_ERR, "%s: EPT_CFG = 0x%lx (maxpacket = %lu)\n",
-                       ep->ep.name, ept_cfg, maxpacket);
+                       ep->ep.name, ep->ept_cfg, maxpacket);
  
         if (usb_endpoint_dir_in(desc)) {
                 ep->is_in = 1;
diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c

index c60abe3a68f9cf48c21831d40bc32c4e9390b892..8cabc5944d5f1d834db7dd2186777cd79536016b 100644 (file)
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -1031,6 +1031,8 @@ static int dummy_udc_probe(struct platform_device *pdev)
         int             rc;
  
         dum = *((void **)dev_get_platdata(&pdev->dev));
+       /* Clear usb_gadget region for new registration to udc-core */
+       memzero_explicit(&dum->gadget, sizeof(struct usb_gadget));
         dum->gadget.name = gadget_name;
         dum->gadget.ops = &dummy_ops;
         dum->gadget.max_speed = USB_SPEED_SUPER;
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c

index 85504419ab312e58a83c52d524bfdebadf5e1ed7..3828c2ec8623b155c9948ae90dcebdc4a0106ce6 100644 (file)
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -1146,15 +1146,15 @@ static int scan_dma_completions(struct net2280_ep *ep)
          */
         while (!list_empty(&ep->queue)) {
                 struct net2280_request  *req;
-               u32                     tmp;
+               u32 req_dma_count;
  
                 req = list_entry(ep->queue.next,
                                 struct net2280_request, queue);
                 if (!req->valid)
                         break;
                 rmb();
-               tmp = le32_to_cpup(&req->td->dmacount);
-               if ((tmp & BIT(VALID_BIT)) != 0)
+               req_dma_count = le32_to_cpup(&req->td->dmacount);
+               if ((req_dma_count & BIT(VALID_BIT)) != 0)
                         break;
  
                 /* SHORT_PACKET_TRANSFERRED_INTERRUPT handles "usb-short"
@@ -1163,40 +1163,41 @@ static int scan_dma_completions(struct net2280_ep *ep)
                  */
                 if (unlikely(req->td->dmadesc == 0)) {
                         /* paranoia */
-                       tmp = readl(&ep->dma->dmacount);
-                       if (tmp & DMA_BYTE_COUNT_MASK)
+                       u32 const ep_dmacount = readl(&ep->dma->dmacount);
+
+                       if (ep_dmacount & DMA_BYTE_COUNT_MASK)
                                 break;
                         /* single transfer mode */
-                       dma_done(ep, req, tmp, 0);
+                       dma_done(ep, req, req_dma_count, 0);
                         num_completed++;
                         break;
                 } else if (!ep->is_in &&
                            (req->req.length % ep->ep.maxpacket) &&
                            !(ep->dev->quirks & PLX_PCIE)) {
  
-                       tmp = readl(&ep->regs->ep_stat);
+                       u32 const ep_stat = readl(&ep->regs->ep_stat);
                         /* AVOID TROUBLE HERE by not issuing short reads from
                          * your gadget driver.  That helps avoids errata 0121,
                          * 0122, and 0124; not all cases trigger the warning.
                          */
-                       if ((tmp & BIT(NAK_OUT_PACKETS)) == 0) {
+                       if ((ep_stat & BIT(NAK_OUT_PACKETS)) == 0) {
                                 ep_warn(ep->dev, "%s lost packet sync!\n",
                                                 ep->ep.name);
                                 req->req.status = -EOVERFLOW;
                         } else {
-                               tmp = readl(&ep->regs->ep_avail);
-                               if (tmp) {
+                               u32 const ep_avail = readl(&ep->regs->ep_avail);
+                               if (ep_avail) {
                                         /* fifo gets flushed later */
                                         ep->out_overflow = 1;
                                         ep_dbg(ep->dev,
                                                 "%s dma, discard %d len %d\n",
-                                               ep->ep.name, tmp,
+                                               ep->ep.name, ep_avail,
                                                 req->req.length);
                                         req->req.status = -EOVERFLOW;
                                 }
                         }
                 }
-               dma_done(ep, req, tmp, 0);
+               dma_done(ep, req, req_dma_count, 0);
                 num_completed++;
         }
  
diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c

index a97da645c1b9eaecc5e5bb0bcb5ad25857c43ec8..8a365aad66fe2e38eaf0a869ae0f774349f694f2 100644 (file)
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -1523,7 +1523,6 @@ static void pch_udc_free_dma_chain(struct pch_udc_dev *dev,
                 td = phys_to_virt(addr);
                 addr2 = (dma_addr_t)td->next;
                 pci_pool_free(dev->data_requests, td, addr);
-               td->next = 0x00;
                 addr = addr2;
         }
         req->chain_len = 1;
diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c

index e1335ad5bce9f2c96dc729e14ede027994b8b381..832c4fdbe98512a2b70b6b9e9424acc21a09b83b 100644 (file)
--- a/drivers/usb/gadget/udc/pxa27x_udc.c
+++ b/drivers/usb/gadget/udc/pxa27x_udc.c
@@ -2534,9 +2534,10 @@ static int pxa_udc_remove(struct platform_device *_dev)
         usb_del_gadget_udc(&udc->gadget);
         pxa_cleanup_debugfs(udc);
  
-       if (!IS_ERR_OR_NULL(udc->transceiver))
+       if (!IS_ERR_OR_NULL(udc->transceiver)) {
                 usb_unregister_notifier(udc->transceiver, &pxa27x_udc_phy);
-       usb_put_phy(udc->transceiver);
+               usb_put_phy(udc->transceiver);
+       }
  
         udc->transceiver = NULL;
         the_controller = NULL;
diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c

index 414e3c376dbbd59587dc3398f4a90872a5aae19c..5302f988e7e670eec3fbd66f0058a49e91f22b76 100644 (file)
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -350,7 +350,7 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
  
                 case USB_PORT_FEAT_SUSPEND:
                         dev_dbg(hcd->self.controller, "SetPortFeat: SUSPEND\n");
-                       if (valid_port(wIndex)) {
+                       if (valid_port(wIndex) && ohci_at91->sfr_regmap) {
                                 ohci_at91_port_suspend(ohci_at91->sfr_regmap,
                                                        1);
                                 return 0;
@@ -393,7 +393,7 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
  
                 case USB_PORT_FEAT_SUSPEND:
                         dev_dbg(hcd->self.controller, "ClearPortFeature: SUSPEND\n");
-                       if (valid_port(wIndex)) {
+                       if (valid_port(wIndex) && ohci_at91->sfr_regmap) {
                                 ohci_at91_port_suspend(ohci_at91->sfr_regmap,
                                                        0);
                                 return 0;
diff --git a/drivers/usb/host/xhci-dbg.c b/drivers/usb/host/xhci-dbg.c

index 363d125300eacfbef6287644f89af6abe007c920..2b4a00fa735dfef5c51b4a9f583e5783e87ad957 100644 (file)
--- a/drivers/usb/host/xhci-dbg.c
+++ b/drivers/usb/host/xhci-dbg.c
@@ -109,7 +109,7 @@ static void xhci_print_cap_regs(struct xhci_hcd *xhci)
         xhci_dbg(xhci, "RTSOFF 0x%x:\n", temp & RTSOFF_MASK);
  
         /* xhci 1.1 controllers have the HCCPARAMS2 register */
-       if (hci_version > 100) {
+       if (hci_version > 0x100) {
                 temp = readl(&xhci->cap_regs->hcc_params2);
                 xhci_dbg(xhci, "HCC PARAMS2 0x%x:\n", (unsigned int) temp);
                 xhci_dbg(xhci, "  HC %s Force save context capability",
diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c

index 9066ec9e0c2e7aacefabcd41f8f77805d635afe2..67d5dc79b6b50e6fbaa30cddface8602dded27ee 100644 (file)
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -382,7 +382,6 @@ static int usb_wakeup_of_property_parse(struct xhci_hcd_mtk *mtk,
  
  static int xhci_mtk_setup(struct usb_hcd *hcd);
  static const struct xhci_driver_overrides xhci_mtk_overrides __initconst = {
-       .extra_priv_size = sizeof(struct xhci_hcd),
         .reset = xhci_mtk_setup,
  };
  
@@ -678,13 +677,13 @@ static int xhci_mtk_probe(struct platform_device *pdev)
                 goto power_off_phys;
         }
  
-       if (HCC_MAX_PSA(xhci->hcc_params) >= 4)
-               xhci->shared_hcd->can_do_streams = 1;
-
         ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
         if (ret)
                 goto put_usb3_hcd;
  
+       if (HCC_MAX_PSA(xhci->hcc_params) >= 4)
+               xhci->shared_hcd->can_do_streams = 1;
+
         ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED);
         if (ret)
                 goto dealloc_usb2_hcd;
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c

index 6d33b42ffcf5224a1b347666c77b010df9001d91..6ed468fa7d5e593ca2e0fdabc6048fb7f2dd9f5d 100644 (file)
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -286,6 +286,8 @@ static int xhci_plat_remove(struct platform_device *dev)
         struct xhci_hcd *xhci = hcd_to_xhci(hcd);
         struct clk *clk = xhci->clk;
  
+       xhci->xhc_state |= XHCI_STATE_REMOVING;
+
         usb_remove_hcd(xhci->shared_hcd);
         usb_phy_shutdown(hcd->usb_phy);
  
@@ -342,6 +344,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match);
  static struct platform_driver usb_xhci_driver = {
         .probe  = xhci_plat_probe,
         .remove = xhci_plat_remove,
+       .shutdown       = usb_hcd_platform_shutdown,
         .driver = {
                 .name = "xhci-hcd",
                 .pm = DEV_PM_OPS,
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c

index d9936c771fa074593e77aad4aa86f0771388e6b1..a3309aa02993dfa79e52a2b93a87b9efa289d498 100644 (file)
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1989,6 +1989,9 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
                 case TRB_NORMAL:
                         td->urb->actual_length = requested - remaining;
                         goto finish_td;
+               case TRB_STATUS:
+                       td->urb->actual_length = requested;
+                       goto finish_td;
                 default:
                         xhci_warn(xhci, "WARN: unexpected TRB Type %d\n",
                                   trb_type);
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c

index a59fafb4b329f532be52773c53a874b00cae789a..74436f8ca5382f736dbf352c21b7c2f83a8438ba 100644 (file)
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -1308,7 +1308,6 @@ static int tegra_xhci_setup(struct usb_hcd *hcd)
  }
  
  static const struct xhci_driver_overrides tegra_xhci_overrides __initconst = {
-       .extra_priv_size = sizeof(struct xhci_hcd),
         .reset = tegra_xhci_setup,
  };
  
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c

index 6d6c46000e56cc76895a34f9d3980c949030b8a2..953fd8f62df0787b0479286c12513656f141614f 100644 (file)
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -868,7 +868,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
  
         spin_lock_irqsave(&xhci->lock, flags);
  
-       /* disble usb3 ports Wake bits*/
+       /* disable usb3 ports Wake bits */
         port_index = xhci->num_usb3_ports;
         port_array = xhci->usb3_ports;
         while (port_index--) {
@@ -879,7 +879,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
                         writel(t2, port_array[port_index]);
         }
  
-       /* disble usb2 ports Wake bits*/
+       /* disable usb2 ports Wake bits */
         port_index = xhci->num_usb2_ports;
         port_array = xhci->usb2_ports;
         while (port_index--) {
@@ -1477,6 +1477,7 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
         struct xhci_ring *ep_ring;
         struct xhci_virt_ep *ep;
         struct xhci_command *command;
+       struct xhci_virt_device *vdev;
  
         xhci = hcd_to_xhci(hcd);
         spin_lock_irqsave(&xhci->lock, flags);
@@ -1485,15 +1486,27 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
  
         /* Make sure the URB hasn't completed or been unlinked already */
         ret = usb_hcd_check_unlink_urb(hcd, urb, status);
-       if (ret || !urb->hcpriv)
+       if (ret)
                 goto done;
+
+       /* give back URB now if we can't queue it for cancel */
+       vdev = xhci->devs[urb->dev->slot_id];
+       urb_priv = urb->hcpriv;
+       if (!vdev || !urb_priv)
+               goto err_giveback;
+
+       ep_index = xhci_get_endpoint_index(&urb->ep->desc);
+       ep = &vdev->eps[ep_index];
+       ep_ring = xhci_urb_to_transfer_ring(xhci, urb);
+       if (!ep || !ep_ring)
+               goto err_giveback;
+
         temp = readl(&xhci->op_regs->status);
         if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_HALTED)) {
                 xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
                                 "HW died, freeing TD.");
-               urb_priv = urb->hcpriv;
                 for (i = urb_priv->num_tds_done;
-                    i < urb_priv->num_tds && xhci->devs[urb->dev->slot_id];
+                    i < urb_priv->num_tds;
                      i++) {
                         td = &urb_priv->td[i];
                         if (!list_empty(&td->td_list))
@@ -1501,23 +1514,9 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
                         if (!list_empty(&td->cancelled_td_list))
                                 list_del_init(&td->cancelled_td_list);
                 }
-
-               usb_hcd_unlink_urb_from_ep(hcd, urb);
-               spin_unlock_irqrestore(&xhci->lock, flags);
-               usb_hcd_giveback_urb(hcd, urb, -ESHUTDOWN);
-               xhci_urb_free_priv(urb_priv);
-               return ret;
+               goto err_giveback;
         }
  
-       ep_index = xhci_get_endpoint_index(&urb->ep->desc);
-       ep = &xhci->devs[urb->dev->slot_id]->eps[ep_index];
-       ep_ring = xhci_urb_to_transfer_ring(xhci, urb);
-       if (!ep_ring) {
-               ret = -EINVAL;
-               goto done;
-       }
-
-       urb_priv = urb->hcpriv;
         i = urb_priv->num_tds_done;
         if (i < urb_priv->num_tds)
                 xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
@@ -1554,6 +1553,14 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
  done:
         spin_unlock_irqrestore(&xhci->lock, flags);
         return ret;
+
+err_giveback:
+       if (urb_priv)
+               xhci_urb_free_priv(urb_priv);
+       usb_hcd_unlink_urb_from_ep(hcd, urb);
+       spin_unlock_irqrestore(&xhci->lock, flags);
+       usb_hcd_giveback_urb(hcd, urb, -ESHUTDOWN);
+       return ret;
  }
  
  /* Drop an endpoint from a new bandwidth configuration for this device.
diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c

index 8b9fd7534f698b937b5a89113acc12c716368075..502bfe30a077a20120616af74b378c62bc2ec6d9 100644 (file)
--- a/drivers/usb/misc/idmouse.c
+++ b/drivers/usb/misc/idmouse.c
@@ -347,6 +347,9 @@ static int idmouse_probe(struct usb_interface *interface,
         if (iface_desc->desc.bInterfaceClass != 0x0A)
                 return -ENODEV;
  
+       if (iface_desc->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
         /* allocate memory for our device state and initialize it */
         dev = kzalloc(sizeof(*dev), GFP_KERNEL);
         if (dev == NULL)
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c

index 095778ff984de25838b1e0eda53e0794dd382f8c..37c63cb39714b86ada39d3232565ef1595ed71c6 100644 (file)
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -781,12 +781,6 @@ static int iowarrior_probe(struct usb_interface *interface,
         iface_desc = interface->cur_altsetting;
         dev->product_id = le16_to_cpu(udev->descriptor.idProduct);
  
-       if (iface_desc->desc.bNumEndpoints < 1) {
-               dev_err(&interface->dev, "Invalid number of endpoints\n");
-               retval = -EINVAL;
-               goto error;
-       }
-
         /* set up the endpoint information */
         for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
                 endpoint = &iface_desc->endpoint[i].desc;
@@ -797,6 +791,21 @@ static int iowarrior_probe(struct usb_interface *interface,
                         /* this one will match for the IOWarrior56 only */
                         dev->int_out_endpoint = endpoint;
         }
+
+       if (!dev->int_in_endpoint) {
+               dev_err(&interface->dev, "no interrupt-in endpoint found\n");
+               retval = -ENODEV;
+               goto error;
+       }
+
+       if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) {
+               if (!dev->int_out_endpoint) {
+                       dev_err(&interface->dev, "no interrupt-out endpoint found\n");
+                       retval = -ENODEV;
+                       goto error;
+               }
+       }
+
         /* we have to check the report_size often, so remember it in the endianness suitable for our machine */
         dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint);
         if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) &&
diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c

index 77176511658f3328f830ecdbd056f072a6c84f8a..d3d12475326663def2ce1f389f06f040d2a56126 100644 (file)
--- a/drivers/usb/misc/lvstest.c
+++ b/drivers/usb/misc/lvstest.c
@@ -366,6 +366,10 @@ static int lvs_rh_probe(struct usb_interface *intf,
  
         hdev = interface_to_usbdev(intf);
         desc = intf->cur_altsetting;
+
+       if (desc->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
         endpoint = &desc->endpoint[0].desc;
  
         /* valid only for SS root hub */
diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c

index 4e18600dc9b43e2603ec2c8f371545e63b75a404..91f66d68bcb7b55bd000e18ad551d8b6bc27ff7c 100644 (file)
--- a/drivers/usb/misc/usb251xb.c
+++ b/drivers/usb/misc/usb251xb.c
@@ -375,18 +375,24 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
         if (of_get_property(np, "dynamic-power-switching", NULL))
                 hub->conf_data2 |= BIT(7);
  
-       if (of_get_property(np, "oc-delay-100us", NULL)) {
-               hub->conf_data2 &= ~BIT(5);
-               hub->conf_data2 &= ~BIT(4);
-       } else if (of_get_property(np, "oc-delay-4ms", NULL)) {
-               hub->conf_data2 &= ~BIT(5);
-               hub->conf_data2 |= BIT(4);
-       } else if (of_get_property(np, "oc-delay-8ms", NULL)) {
-               hub->conf_data2 |= BIT(5);
-               hub->conf_data2 &= ~BIT(4);
-       } else if (of_get_property(np, "oc-delay-16ms", NULL)) {
-               hub->conf_data2 |= BIT(5);
-               hub->conf_data2 |= BIT(4);
+       if (!of_property_read_u32(np, "oc-delay-us", property_u32)) {
+               if (*property_u32 == 100) {
+                       /* 100 us*/
+                       hub->conf_data2 &= ~BIT(5);
+                       hub->conf_data2 &= ~BIT(4);
+               } else if (*property_u32 == 4000) {
+                       /* 4 ms */
+                       hub->conf_data2 &= ~BIT(5);
+                       hub->conf_data2 |= BIT(4);
+               } else if (*property_u32 == 16000) {
+                       /* 16 ms */
+                       hub->conf_data2 |= BIT(5);
+                       hub->conf_data2 |= BIT(4);
+               } else {
+                       /* 8 ms (DEFAULT) */
+                       hub->conf_data2 |= BIT(5);
+                       hub->conf_data2 &= ~BIT(4);
+               }
         }
  
         if (of_get_property(np, "compound-device", NULL))
@@ -432,30 +438,9 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
                 }
         }
  
-       hub->max_power_sp = USB251XB_DEF_MAX_POWER_SELF;
-       if (!of_property_read_u32(np, "max-sp-power", property_u32))
-               hub->max_power_sp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-                                         250);
-
-       hub->max_power_bp = USB251XB_DEF_MAX_POWER_BUS;
-       if (!of_property_read_u32(np, "max-bp-power", property_u32))
-               hub->max_power_bp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-                                         250);
-
-       hub->max_current_sp = USB251XB_DEF_MAX_CURRENT_SELF;
-       if (!of_property_read_u32(np, "max-sp-current", property_u32))
-               hub->max_current_sp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-                                           250);
-
-       hub->max_current_bp = USB251XB_DEF_MAX_CURRENT_BUS;
-       if (!of_property_read_u32(np, "max-bp-current", property_u32))
-               hub->max_current_bp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-                                           250);
-
         hub->power_on_time = USB251XB_DEF_POWER_ON_TIME;
-       if (!of_property_read_u32(np, "power-on-time", property_u32))
-               hub->power_on_time = min_t(u8, be32_to_cpu(*property_u32) / 2,
-                                          255);
+       if (!of_property_read_u32(np, "power-on-time-ms", property_u32))
+               hub->power_on_time = min_t(u8, *property_u32 / 2, 255);
  
         if (of_property_read_u16_array(np, "language-id", &hub->lang_id, 1))
                 hub->lang_id = USB251XB_DEF_LANGUAGE_ID;
@@ -492,6 +477,10 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
         /* The following parameters are currently not exposed to devicetree, but
          * may be as soon as needed.
          */
+       hub->max_power_sp = USB251XB_DEF_MAX_POWER_SELF;
+       hub->max_power_bp = USB251XB_DEF_MAX_POWER_BUS;
+       hub->max_current_sp = USB251XB_DEF_MAX_CURRENT_SELF;
+       hub->max_current_bp = USB251XB_DEF_MAX_CURRENT_BUS;
         hub->bat_charge_en = USB251XB_DEF_BATTERY_CHARGING_ENABLE;
         hub->boost_up = USB251XB_DEF_BOOST_UP;
         hub->boost_x = USB251XB_DEF_BOOST_X;
diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c

index e45a3a680db8f6490257c0f6decdfd6d1d8247d8..07014cad6dbe357bca938561eaf976dbbee902d4 100644 (file)
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -709,6 +709,11 @@ static int uss720_probe(struct usb_interface *intf,
  
         interface = intf->cur_altsetting;
  
+       if (interface->desc.bNumEndpoints < 3) {
+               usb_put_dev(usbdev);
+               return -ENODEV;
+       }
+
         /*
          * Allocate parport interface 
          */
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c

index d8bae6ca890475b16bf7d45058ae35713ca46656..0c3664ab705eed549e73d53dea13976066182ee1 100644 (file)
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2490,8 +2490,8 @@ static int musb_remove(struct platform_device *pdev)
         musb_host_cleanup(musb);
         musb_gadget_cleanup(musb);
  
-       spin_lock_irqsave(&musb->lock, flags);
         musb_platform_disable(musb);
+       spin_lock_irqsave(&musb->lock, flags);
         musb_disable_interrupts(musb);
         musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
         spin_unlock_irqrestore(&musb->lock, flags);
diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c

index 00e272bfee39a94520f16aeef03beed6a08ea43c..355655f8a3fbc9c4e3d8acd03eefa31a5736fb2f 100644 (file)
--- a/drivers/usb/musb/musb_cppi41.c
+++ b/drivers/usb/musb/musb_cppi41.c
@@ -238,8 +238,27 @@ static void cppi41_dma_callback(void *private_data,
                         transferred < cppi41_channel->packet_sz)
                 cppi41_channel->prog_len = 0;
  
-       if (cppi41_channel->is_tx)
-               empty = musb_is_tx_fifo_empty(hw_ep);
+       if (cppi41_channel->is_tx) {
+               u8 type;
+
+               if (is_host_active(musb))
+                       type = hw_ep->out_qh->type;
+               else
+                       type = hw_ep->ep_in.type;
+
+               if (type == USB_ENDPOINT_XFER_ISOC)
+                       /*
+                        * Don't use the early-TX-interrupt workaround below
+                        * for Isoch transfter. Since Isoch are periodic
+                        * transfer, by the time the next transfer is
+                        * scheduled, the current one should be done already.
+                        *
+                        * This avoids audio playback underrun issue.
+                        */
+                       empty = true;
+               else
+                       empty = musb_is_tx_fifo_empty(hw_ep);
+       }
  
         if (!cppi41_channel->is_tx || empty) {
                 cppi41_trans_done(cppi41_channel);
diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c

index 7c047c4a2565cca25690c47d85a2bb9eb29f728e..9c7ee26ef388062bdc5e1f1fb2097fc010882950 100644 (file)
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -933,7 +933,7 @@ static int dsps_probe(struct platform_device *pdev)
         if (usb_get_dr_mode(&pdev->dev) == USB_DR_MODE_PERIPHERAL) {
                 ret = dsps_setup_optional_vbus_irq(pdev, glue);
                 if (ret)
-                       return ret;
+                       goto err_iounmap;
         }
  
         platform_set_drvdata(pdev, glue);
@@ -946,6 +946,8 @@ static int dsps_probe(struct platform_device *pdev)
  
  err:
         pm_runtime_disable(&pdev->dev);
+err_iounmap:
+       iounmap(glue->usbss_base);
         return ret;
  }
  
@@ -956,6 +958,7 @@ static int dsps_remove(struct platform_device *pdev)
         platform_device_unregister(glue->musb);
  
         pm_runtime_disable(&pdev->dev);
+       iounmap(glue->usbss_base);
  
         return 0;
  }
diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c

index db68156568e6e7bc209eaf942eaa9c51bd4e15ad..f333024660b4d0d4c6c8f54e5b209285de9a169a 100644 (file)
--- a/drivers/usb/phy/phy-isp1301.c
+++ b/drivers/usb/phy/phy-isp1301.c
@@ -33,6 +33,12 @@ static const struct i2c_device_id isp1301_id[] = {
  };
  MODULE_DEVICE_TABLE(i2c, isp1301_id);
  
+static const struct of_device_id isp1301_of_match[] = {
+       {.compatible = "nxp,isp1301" },
+       { },
+};
+MODULE_DEVICE_TABLE(of, isp1301_of_match);
+
  static struct i2c_client *isp1301_i2c_client;
  
  static int __isp1301_write(struct isp1301 *isp, u8 reg, u8 value, u8 clear)
@@ -130,6 +136,7 @@ static int isp1301_remove(struct i2c_client *client)
  static struct i2c_driver isp1301_driver = {
         .driver = {
                 .name = DRV_NAME,
+               .of_match_table = isp1301_of_match,
         },
         .probe = isp1301_probe,
         .remove = isp1301_remove,
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c

index ab78111e09680f47267cbe5fc7fe9a25ca5eea21..6537d3ca2797d8573236578e3088f6dbce1ce1b5 100644 (file)
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -1500,7 +1500,7 @@ static int digi_read_oob_callback(struct urb *urb)
                 return -1;
  
         /* handle each oob command */
-       for (i = 0; i < urb->actual_length - 4; i += 4) {
+       for (i = 0; i < urb->actual_length - 3; i += 4) {
                 opcode = buf[i];
                 line = buf[i + 1];
                 status = buf[i + 2];
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c

index ceaeebaa6f90587b6d8ea01e53daaad243be8b6d..a76b95d32157871f5e2964b629784a7642da8480 100644 (file)
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -1674,6 +1674,12 @@ static void edge_interrupt_callback(struct urb *urb)
         function    = TIUMP_GET_FUNC_FROM_CODE(data[0]);
         dev_dbg(dev, "%s - port_number %d, function %d, info 0x%x\n", __func__,
                 port_number, function, data[1]);
+
+       if (port_number >= edge_serial->serial->num_ports) {
+               dev_err(dev, "bad port number %d\n", port_number);
+               goto exit;
+       }
+
         port = edge_serial->serial->port[port_number];
         edge_port = usb_get_serial_port_data(port);
         if (!edge_port) {
@@ -1755,7 +1761,7 @@ static void edge_bulk_in_callback(struct urb *urb)
  
         port_number = edge_port->port->port_number;
  
-       if (edge_port->lsr_event) {
+       if (urb->actual_length > 0 && edge_port->lsr_event) {
                 edge_port->lsr_event = 0;
                 dev_dbg(dev, "%s ===== Port %u LSR Status = %02x, Data = %02x ======\n",
                         __func__, port_number, edge_port->lsr_mask, *data);
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c

index a180b17d24323b074aee19e33bf0f497ad271d8a..dd706953b4660905cc5abf5e85477bfb6bdb4149 100644 (file)
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -31,7 +31,6 @@
  #define BT_IGNITIONPRO_ID      0x2000
  
  /* function prototypes */
-static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port);
  static void omninet_process_read_urb(struct urb *urb);
  static void omninet_write_bulk_callback(struct urb *urb);
  static int  omninet_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -60,7 +59,6 @@ static struct usb_serial_driver zyxel_omninet_device = {
         .attach =               omninet_attach,
         .port_probe =           omninet_port_probe,
         .port_remove =          omninet_port_remove,
-       .open =                 omninet_open,
         .write =                omninet_write,
         .write_room =           omninet_write_room,
         .write_bulk_callback =  omninet_write_bulk_callback,
@@ -140,17 +138,6 @@ static int omninet_port_remove(struct usb_serial_port *port)
         return 0;
  }
  
-static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port)
-{
-       struct usb_serial       *serial = port->serial;
-       struct usb_serial_port  *wport;
-
-       wport = serial->port[1];
-       tty_port_tty_set(&wport->port, tty);
-
-       return usb_serial_generic_open(tty, port);
-}
-
  #define OMNINET_HEADERLEN      4
  #define OMNINET_BULKOUTSIZE    64
  #define OMNINET_PAYLOADSIZE    (OMNINET_BULKOUTSIZE - OMNINET_HEADERLEN)
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c

index 42cc72e54c051b2115c358bcee8bfc534258d206..af67a0de6b5d475d2be95952ddfb2354546a0fbb 100644 (file)
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -233,6 +233,14 @@ static void option_instat_callback(struct urb *urb);
  #define BANDRICH_PRODUCT_1012                  0x1012
  
  #define QUALCOMM_VENDOR_ID                     0x05C6
+/* These Quectel products use Qualcomm's vendor ID */
+#define QUECTEL_PRODUCT_UC20                   0x9003
+#define QUECTEL_PRODUCT_UC15                   0x9090
+
+#define QUECTEL_VENDOR_ID                      0x2c7c
+/* These Quectel products use Quectel's vendor ID */
+#define QUECTEL_PRODUCT_EC21                   0x0121
+#define QUECTEL_PRODUCT_EC25                   0x0125
  
  #define CMOTECH_VENDOR_ID                      0x16d8
  #define CMOTECH_PRODUCT_6001                   0x6001
@@ -1161,7 +1169,14 @@ static const struct usb_device_id option_ids[] = {
         { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
         { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
         { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
-       { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9003), /* Quectel UC20 */
+       /* Quectel products using Qualcomm vendor ID */
+       { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
+       { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
+         .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+       /* Quectel products using Quectel vendor ID */
+       { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
+         .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+       { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25),
           .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
         { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
         { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c

index 696458db7e3c45e661a9825d05df0fe25dc0a832..38b3f0d8cd580f2366136003934d00a475b1d7f1 100644 (file)
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -169,6 +169,8 @@ static const struct usb_device_id id_table[] = {
         {DEVICE_SWI(0x413c, 0x81a9)},   /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
         {DEVICE_SWI(0x413c, 0x81b1)},   /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
         {DEVICE_SWI(0x413c, 0x81b3)},   /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
+       {DEVICE_SWI(0x413c, 0x81b5)},   /* Dell Wireless 5811e QDL */
+       {DEVICE_SWI(0x413c, 0x81b6)},   /* Dell Wireless 5811e QDL */
  
         /* Huawei devices */
         {DEVICE_HWI(0x03f0, 0x581d)},   /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */
diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c

index 93c6c9b08daae534234ca75121f696cec521857d..8a069aa154eda461ae12807d2518bbfd0bb27bf4 100644 (file)
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c
@@ -200,6 +200,11 @@ static void safe_process_read_urb(struct urb *urb)
         if (!safe)
                 goto out;
  
+       if (length < 2) {
+               dev_err(&port->dev, "malformed packet\n");
+               return;
+       }
+
         fcs = fcs_compute10(data, length, CRC10_INITFCS);
         if (fcs) {
                 dev_err(&port->dev, "%s - bad CRC %x\n", __func__, fcs);
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h

index 16cc18369111d039ffededa7559075a869638708..9129f6cb823074a555a90f74611dac3f0164a60d 100644 (file)
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2071,6 +2071,20 @@ UNUSUAL_DEV(  0x1370, 0x6828, 0x0110, 0x0110,
                 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                 US_FL_IGNORE_RESIDUE ),
  
+/*
+ * Reported by Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
+ * The INIC-3619 bridge is used in the StarTech SLSODDU33B
+ * SATA-USB enclosure for slimline optical drives.
+ *
+ * The quirk enables MakeMKV to properly exchange keys with
+ * an installed BD drive.
+ */
+UNUSUAL_DEV(  0x13fd, 0x3609, 0x0209, 0x0209,
+               "Initio Corporation",
+               "INIC-3619",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_IGNORE_RESIDUE ),
+
  /* Reported by Qinglin Ye <yestyle@gmail.com> */
  UNUSUAL_DEV(  0x13fe, 0x3600, 0x0100, 0x0100,
                 "Kingston",
diff --git a/drivers/usb/wusbcore/wa-hc.c b/drivers/usb/wusbcore/wa-hc.c

index 252c7bd9218afd5db373325b55b561c304ab0e88..d01496fd27fe88460988745a57c32c0cf5f14840 100644 (file)
--- a/drivers/usb/wusbcore/wa-hc.c
+++ b/drivers/usb/wusbcore/wa-hc.c
@@ -39,6 +39,9 @@ int wa_create(struct wahc *wa, struct usb_interface *iface,
         int result;
         struct device *dev = &iface->dev;
  
+       if (iface->cur_altsetting->desc.bNumEndpoints < 3)
+               return -ENODEV;
+
         result = wa_rpipes_create(wa);
         if (result < 0)
                 goto error_rpipes_create;
diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c

index 0aa6c3c29d17260f684b8bf79944eff43af04427..35a1e777b4497ad0ad1cfa18db0834354732b128 100644 (file)
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -823,6 +823,9 @@ static int hwarc_probe(struct usb_interface *iface,
         struct hwarc *hwarc;
         struct device *dev = &iface->dev;
  
+       if (iface->cur_altsetting->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
         result = -ENOMEM;
         uwb_rc = uwb_rc_alloc();
         if (uwb_rc == NULL) {
diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c

index 2bfc846ac071341ace37aa50dc126fc5b08a519f..6345e85822a42457f11c607effb3fbb66ce8c89f 100644 (file)
--- a/drivers/uwb/i1480/dfu/usb.c
+++ b/drivers/uwb/i1480/dfu/usb.c
@@ -362,6 +362,9 @@ int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id)
                                  result);
         }
  
+       if (iface->cur_altsetting->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
         result = -ENOMEM;
         i1480_usb = kzalloc(sizeof(*i1480_usb), GFP_KERNEL);
         if (i1480_usb == NULL) {
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c

index 609f4f982c74c59a5b4fd87dfd83367765ebbc1e..561084ab387f3fd7c8ae3fa3e91c27d8329f7fe4 100644 (file)
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -403,6 +403,7 @@ static void vfio_group_release(struct kref *kref)
         struct iommu_group *iommu_group = group->iommu_group;
  
         WARN_ON(!list_empty(&group->device_list));
+       WARN_ON(group->notifier.head);
  
         list_for_each_entry_safe(unbound, tmp,
                                  &group->unbound_list, unbound_next) {
@@ -1573,6 +1574,10 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
                 return -EBUSY;
         }
  
+       /* Warn if previous user didn't cleanup and re-init to drop them */
+       if (WARN_ON(group->notifier.head))
+               BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
+
         filep->private_data = group;
  
         return 0;
@@ -1584,9 +1589,6 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
  
         filep->private_data = NULL;
  
-       /* Any user didn't unregister? */
-       WARN_ON(group->notifier.head);
-
         vfio_group_try_dissolve_container(group);
  
         atomic_dec(&group->opened);
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c

index c26fa1f3ed8606e65870f05aa47b7eca5fdf381e..32d2633092a37edf64ec4b9d2afc9fa4f12ea77d 100644 (file)
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1182,8 +1182,7 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
         return NULL;
  }
  
-static bool vfio_iommu_has_resv_msi(struct iommu_group *group,
-                                   phys_addr_t *base)
+static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
  {
         struct list_head group_resv_regions;
         struct iommu_resv_region *region, *next;
@@ -1192,7 +1191,7 @@ static bool vfio_iommu_has_resv_msi(struct iommu_group *group,
         INIT_LIST_HEAD(&group_resv_regions);
         iommu_get_group_resv_regions(group, &group_resv_regions);
         list_for_each_entry(region, &group_resv_regions, list) {
-               if (region->type & IOMMU_RESV_MSI) {
+               if (region->type == IOMMU_RESV_SW_MSI) {
                         *base = region->start;
                         ret = true;
                         goto out;
@@ -1283,7 +1282,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
         if (ret)
                 goto out_domain;
  
-       resv_msi = vfio_iommu_has_resv_msi(iommu_group, &resv_msi_base);
+       resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base);
  
         INIT_LIST_HEAD(&domain->group_list);
         list_add(&group->next, &domain->group_list);
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c

index ce5e63d2c66aac7d019c422ec294cab025e94e5e..44eed8eb0725b25e3c9765e19387e7c338ab9bbb 100644 (file)
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -223,6 +223,46 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
         return len;
  }
  
+static int
+vhost_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+       struct vhost_vsock *vsock;
+       struct virtio_vsock_pkt *pkt, *n;
+       int cnt = 0;
+       LIST_HEAD(freeme);
+
+       /* Find the vhost_vsock according to guest context id  */
+       vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
+       if (!vsock)
+               return -ENODEV;
+
+       spin_lock_bh(&vsock->send_pkt_list_lock);
+       list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+               if (pkt->vsk != vsk)
+                       continue;
+               list_move(&pkt->list, &freeme);
+       }
+       spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+       list_for_each_entry_safe(pkt, n, &freeme, list) {
+               if (pkt->reply)
+                       cnt++;
+               list_del(&pkt->list);
+               virtio_transport_free_pkt(pkt);
+       }
+
+       if (cnt) {
+               struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
+               int new_cnt;
+
+               new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
+               if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
+                       vhost_poll_queue(&tx_vq->poll);
+       }
+
+       return 0;
+}
+
  static struct virtio_vsock_pkt *
  vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
                       unsigned int out, unsigned int in)
@@ -675,6 +715,7 @@ static struct virtio_transport vhost_transport = {
                 .release                  = virtio_transport_release,
                 .connect                  = virtio_transport_connect,
                 .shutdown                 = virtio_transport_shutdown,
+               .cancel_pkt               = vhost_transport_cancel_pkt,
  
                 .dgram_enqueue            = virtio_transport_dgram_enqueue,
                 .dgram_dequeue            = virtio_transport_dgram_dequeue,
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c

index d7efcb632f7d9dde08b0a494c455725b05d55af0..002f1ce22bd02032062924b19d645ebc25302c93 100644 (file)
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -297,14 +297,15 @@ static int pwm_backlight_probe(struct platform_device *pdev)
         }
  
         /*
-        * If the GPIO is configured as input, change the direction to output
-        * and set the GPIO as active.
+        * If the GPIO is not known to be already configured as output, that
+        * is, if gpiod_get_direction returns either GPIOF_DIR_IN or -EINVAL,
+        * change the direction to output and set the GPIO as active.
          * Do not force the GPIO to active when it was already output as it
          * could cause backlight flickering or we would enable the backlight too
          * early. Leave the decision of the initial backlight state for later.
          */
         if (pb->enable_gpio &&
-           gpiod_get_direction(pb->enable_gpio) == GPIOF_DIR_IN)
+           gpiod_get_direction(pb->enable_gpio) != GPIOF_DIR_OUT)
                 gpiod_direction_output(pb->enable_gpio, 1);
  
         pb->power_supply = devm_regulator_get(&pdev->dev, "power");
diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c

index 8c4dc1e1f94fdb53ad7acf2808fa400c03787857..b827a8113e26803d8caee69e4ea59d6ceae56ea0 100644 (file)
--- a/drivers/video/fbdev/efifb.c
+++ b/drivers/video/fbdev/efifb.c
@@ -10,6 +10,7 @@
  #include <linux/efi.h>
  #include <linux/errno.h>
  #include <linux/fb.h>
+#include <linux/pci.h>
  #include <linux/platform_device.h>
  #include <linux/screen_info.h>
  #include <video/vga.h>
@@ -143,6 +144,8 @@ static struct attribute *efifb_attrs[] = {
  };
  ATTRIBUTE_GROUPS(efifb);
  
+static bool pci_dev_disabled;  /* FB base matches BAR of a disabled device */
+
  static int efifb_probe(struct platform_device *dev)
  {
         struct fb_info *info;
@@ -152,7 +155,7 @@ static int efifb_probe(struct platform_device *dev)
         unsigned int size_total;
         char *option = NULL;
  
-       if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
+       if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || pci_dev_disabled)
                 return -ENODEV;
  
         if (fb_get_options("efifb", &option))
@@ -360,3 +363,64 @@ static struct platform_driver efifb_driver = {
  };
  
  builtin_platform_driver(efifb_driver);
+
+#if defined(CONFIG_PCI) && !defined(CONFIG_X86)
+
+static bool pci_bar_found;     /* did we find a BAR matching the efifb base? */
+
+static void claim_efifb_bar(struct pci_dev *dev, int idx)
+{
+       u16 word;
+
+       pci_bar_found = true;
+
+       pci_read_config_word(dev, PCI_COMMAND, &word);
+       if (!(word & PCI_COMMAND_MEMORY)) {
+               pci_dev_disabled = true;
+               dev_err(&dev->dev,
+                       "BAR %d: assigned to efifb but device is disabled!\n",
+                       idx);
+               return;
+       }
+
+       if (pci_claim_resource(dev, idx)) {
+               pci_dev_disabled = true;
+               dev_err(&dev->dev,
+                       "BAR %d: failed to claim resource for efifb!\n", idx);
+               return;
+       }
+
+       dev_info(&dev->dev, "BAR %d: assigned to efifb\n", idx);
+}
+
+static void efifb_fixup_resources(struct pci_dev *dev)
+{
+       u64 base = screen_info.lfb_base;
+       u64 size = screen_info.lfb_size;
+       int i;
+
+       if (pci_bar_found || screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
+               return;
+
+       if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
+               base |= (u64)screen_info.ext_lfb_base << 32;
+
+       if (!base)
+               return;
+
+       for (i = 0; i < PCI_STD_RESOURCE_END; i++) {
+               struct resource *res = &dev->resource[i];
+
+               if (!(res->flags & IORESOURCE_MEM))
+                       continue;
+
+               if (res->start <= base && res->end >= base + size - 1) {
+                       claim_efifb_bar(dev, i);
+                       break;
+               }
+       }
+}
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY,
+                              16, efifb_fixup_resources);
+
+#endif
diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c

index 1abba07b84b3efd014b28bfd0ef61f3cad7cbeda..f4cbfb3b8a0980030e2a0c2bbd78348968d6b8a1 100644 (file)
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c
@@ -1608,19 +1608,6 @@ static int omapfb_find_ctrl(struct omapfb_device *fbdev)
         return 0;
  }
  
-static void check_required_callbacks(struct omapfb_device *fbdev)
-{
-#define _C(x) (fbdev->ctrl->x != NULL)
-#define _P(x) (fbdev->panel->x != NULL)
-       BUG_ON(fbdev->ctrl == NULL || fbdev->panel == NULL);
-       BUG_ON(!(_C(init) && _C(cleanup) && _C(get_caps) &&
-                _C(set_update_mode) && _C(setup_plane) && _C(enable_plane) &&
-                _P(init) && _P(cleanup) && _P(enable) && _P(disable) &&
-                _P(get_caps)));
-#undef _P
-#undef _C
-}
-
  /*
   * Called by LDM binding to probe and attach a new device.
   * Initialization sequence:
@@ -1705,8 +1692,6 @@ static int omapfb_do_probe(struct platform_device *pdev,
                 omapfb_ops.fb_mmap = omapfb_mmap;
         init_state++;
  
-       check_required_callbacks(fbdev);
-
         r = planes_init(fbdev);
         if (r)
                 goto cleanup;
diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c

index bd017b57c47f8af4ff1558cedaa8589a5f0ce9ff..f599520374ddf575bba1236b81bec2c4c2d21c49 100644 (file)
--- a/drivers/video/fbdev/ssd1307fb.c
+++ b/drivers/video/fbdev/ssd1307fb.c
@@ -578,10 +578,14 @@ static int ssd1307fb_probe(struct i2c_client *client,
  
         par->vbat_reg = devm_regulator_get_optional(&client->dev, "vbat");
         if (IS_ERR(par->vbat_reg)) {
-               dev_err(&client->dev, "failed to get VBAT regulator: %ld\n",
-                       PTR_ERR(par->vbat_reg));
                 ret = PTR_ERR(par->vbat_reg);
-               goto fb_alloc_error;
+               if (ret == -ENODEV) {
+                       par->vbat_reg = NULL;
+               } else {
+                       dev_err(&client->dev, "failed to get VBAT regulator: %d\n",
+                               ret);
+                       goto fb_alloc_error;
+               }
         }
  
         if (of_property_read_u32(node, "solomon,width", &par->width))
@@ -668,10 +672,13 @@ static int ssd1307fb_probe(struct i2c_client *client,
                 udelay(4);
         }
  
-       ret = regulator_enable(par->vbat_reg);
-       if (ret) {
-               dev_err(&client->dev, "failed to enable VBAT: %d\n", ret);
-               goto reset_oled_error;
+       if (par->vbat_reg) {
+               ret = regulator_enable(par->vbat_reg);
+               if (ret) {
+                       dev_err(&client->dev, "failed to enable VBAT: %d\n",
+                               ret);
+                       goto reset_oled_error;
+               }
         }
  
         ret = ssd1307fb_init(par);
@@ -710,7 +717,8 @@ panel_init_error:
                 pwm_put(par->pwm);
         };
  regulator_enable_error:
-       regulator_disable(par->vbat_reg);
+       if (par->vbat_reg)
+               regulator_disable(par->vbat_reg);
  reset_oled_error:
         fb_deferred_io_cleanup(info);
  fb_alloc_error:
diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c

index d0115a7af0a9ae0f3171a66bf716f0f21c3e91b1..3ee309c50b2d015fb3d463dd88f6b99253beea12 100644 (file)
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -643,7 +643,6 @@ static void xenfb_backend_changed(struct xenbus_device *dev,
                 break;
  
         case XenbusStateInitWait:
-InitWait:
                 xenbus_switch_state(dev, XenbusStateConnected);
                 break;
  
@@ -654,7 +653,8 @@ InitWait:
                  * get Connected twice here.
                  */
                 if (dev->state != XenbusStateConnected)
-                       goto InitWait; /* no InitWait seen yet, fudge it */
+                       /* no InitWait seen yet, fudge it */
+                       xenbus_switch_state(dev, XenbusStateConnected);
  
                 if (xenbus_read_unsigned(info->xbdev->otherend,
                                          "request-update", 0))
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c

index 400d70b6937948cc5a8aac698efc64fcf49c1dde..48230a5e12f262b67d28d87adc713f462e8ec5fc 100644 (file)
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -232,6 +232,12 @@ static int virtio_dev_probe(struct device *_d)
                 if (device_features & (1ULL << i))
                         __virtio_set_bit(dev, i);
  
+       if (drv->validate) {
+               err = drv->validate(dev);
+               if (err)
+                       goto err;
+       }
+
         err = virtio_finalize_features(dev);
         if (err)
                 goto err;
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c

index 4e1191508228cd86f6c3ee8174f4320c89e14686..34adf9b9c0538815db33f62ed842de49be5222e7 100644 (file)
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -242,11 +242,11 @@ static inline void update_stat(struct virtio_balloon *vb, int idx,
  
  #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
  
-static void update_balloon_stats(struct virtio_balloon *vb)
+static unsigned int update_balloon_stats(struct virtio_balloon *vb)
  {
         unsigned long events[NR_VM_EVENT_ITEMS];
         struct sysinfo i;
-       int idx = 0;
+       unsigned int idx = 0;
         long available;
  
         all_vm_events(events);
@@ -254,18 +254,22 @@ static void update_balloon_stats(struct virtio_balloon *vb)
  
         available = si_mem_available();
  
+#ifdef CONFIG_VM_EVENT_COUNTERS
         update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
                                 pages_to_bytes(events[PSWPIN]));
         update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
                                 pages_to_bytes(events[PSWPOUT]));
         update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
         update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+#endif
         update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
                                 pages_to_bytes(i.freeram));
         update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,
                                 pages_to_bytes(i.totalram));
         update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL,
                                 pages_to_bytes(available));
+
+       return idx;
  }
  
  /*
@@ -291,14 +295,14 @@ static void stats_handle_request(struct virtio_balloon *vb)
  {
         struct virtqueue *vq;
         struct scatterlist sg;
-       unsigned int len;
+       unsigned int len, num_stats;
  
-       update_balloon_stats(vb);
+       num_stats = update_balloon_stats(vb);
  
         vq = vb->stats_vq;
         if (!virtqueue_get_buf(vq, &len))
                 return;
-       sg_init_one(&sg, vb->stats, sizeof(vb->stats));
+       sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
         virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
         virtqueue_kick(vq);
  }
@@ -423,13 +427,16 @@ static int init_vqs(struct virtio_balloon *vb)
         vb->deflate_vq = vqs[1];
         if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
                 struct scatterlist sg;
+               unsigned int num_stats;
                 vb->stats_vq = vqs[2];
  
                 /*
                  * Prime this virtqueue with one buffer so the hypervisor can
                  * use it to signal us later (it can't be broken yet!).
                  */
-               sg_init_one(&sg, vb->stats, sizeof vb->stats);
+               num_stats = update_balloon_stats(vb);
+
+               sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
                 if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
                     < 0)
                         BUG();
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c

index df548a6fb844f701d65301503d998a05e6d19703..698d5d06fa039ca1a27b151a3dcf5d322784e3f0 100644 (file)
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -33,8 +33,10 @@ void vp_synchronize_vectors(struct virtio_device *vdev)
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
         int i;
  
-       synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0));
-       for (i = 1; i < vp_dev->msix_vectors; i++)
+       if (vp_dev->intx_enabled)
+               synchronize_irq(vp_dev->pci_dev->irq);
+
+       for (i = 0; i < vp_dev->msix_vectors; ++i)
                 synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
  }
  
@@ -60,13 +62,16 @@ static irqreturn_t vp_config_changed(int irq, void *opaque)
  static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
  {
         struct virtio_pci_device *vp_dev = opaque;
+       struct virtio_pci_vq_info *info;
         irqreturn_t ret = IRQ_NONE;
-       struct virtqueue *vq;
+       unsigned long flags;
  
-       list_for_each_entry(vq, &vp_dev->vdev.vqs, list) {
-               if (vq->callback && vring_interrupt(irq, vq) == IRQ_HANDLED)
+       spin_lock_irqsave(&vp_dev->lock, flags);
+       list_for_each_entry(info, &vp_dev->virtqueues, node) {
+               if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
                         ret = IRQ_HANDLED;
         }
+       spin_unlock_irqrestore(&vp_dev->lock, flags);
  
         return ret;
  }
@@ -97,185 +102,244 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
         return vp_vring_interrupt(irq, opaque);
  }
  
-static void vp_remove_vqs(struct virtio_device *vdev)
+static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
+                                  bool per_vq_vectors, struct irq_affinity *desc)
  {
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       struct virtqueue *vq, *n;
+       const char *name = dev_name(&vp_dev->vdev.dev);
+       unsigned i, v;
+       int err = -ENOMEM;
  
-       list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
-               if (vp_dev->msix_vector_map) {
-                       int v = vp_dev->msix_vector_map[vq->index];
+       vp_dev->msix_vectors = nvectors;
  
-                       if (v != VIRTIO_MSI_NO_VECTOR)
-                               free_irq(pci_irq_vector(vp_dev->pci_dev, v),
-                                       vq);
-               }
-               vp_dev->del_vq(vq);
+       vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
+                                    GFP_KERNEL);
+       if (!vp_dev->msix_names)
+               goto error;
+       vp_dev->msix_affinity_masks
+               = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
+                         GFP_KERNEL);
+       if (!vp_dev->msix_affinity_masks)
+               goto error;
+       for (i = 0; i < nvectors; ++i)
+               if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
+                                       GFP_KERNEL))
+                       goto error;
+
+       err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
+                                            nvectors, PCI_IRQ_MSIX |
+                                            (desc ? PCI_IRQ_AFFINITY : 0),
+                                            desc);
+       if (err < 0)
+               goto error;
+       vp_dev->msix_enabled = 1;
+
+       /* Set the vector used for configuration */
+       v = vp_dev->msix_used_vectors;
+       snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+                "%s-config", name);
+       err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
+                         vp_config_changed, 0, vp_dev->msix_names[v],
+                         vp_dev);
+       if (err)
+               goto error;
+       ++vp_dev->msix_used_vectors;
+
+       v = vp_dev->config_vector(vp_dev, v);
+       /* Verify we had enough resources to assign the vector */
+       if (v == VIRTIO_MSI_NO_VECTOR) {
+               err = -EBUSY;
+               goto error;
         }
+
+       if (!per_vq_vectors) {
+               /* Shared vector for all VQs */
+               v = vp_dev->msix_used_vectors;
+               snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+                        "%s-virtqueues", name);
+               err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
+                                 vp_vring_interrupt, 0, vp_dev->msix_names[v],
+                                 vp_dev);
+               if (err)
+                       goto error;
+               ++vp_dev->msix_used_vectors;
+       }
+       return 0;
+error:
+       return err;
+}
+
+static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
+                                    void (*callback)(struct virtqueue *vq),
+                                    const char *name,
+                                    u16 msix_vec)
+{
+       struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+       struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
+       struct virtqueue *vq;
+       unsigned long flags;
+
+       /* fill out our structure that represents an active queue */
+       if (!info)
+               return ERR_PTR(-ENOMEM);
+
+       vq = vp_dev->setup_vq(vp_dev, info, index, callback, name,
+                             msix_vec);
+       if (IS_ERR(vq))
+               goto out_info;
+
+       info->vq = vq;
+       if (callback) {
+               spin_lock_irqsave(&vp_dev->lock, flags);
+               list_add(&info->node, &vp_dev->virtqueues);
+               spin_unlock_irqrestore(&vp_dev->lock, flags);
+       } else {
+               INIT_LIST_HEAD(&info->node);
+       }
+
+       vp_dev->vqs[index] = info;
+       return vq;
+
+out_info:
+       kfree(info);
+       return vq;
+}
+
+static void vp_del_vq(struct virtqueue *vq)
+{
+       struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+       struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
+       unsigned long flags;
+
+       spin_lock_irqsave(&vp_dev->lock, flags);
+       list_del(&info->node);
+       spin_unlock_irqrestore(&vp_dev->lock, flags);
+
+       vp_dev->del_vq(info);
+       kfree(info);
  }
  
  /* the config->del_vqs() implementation */
  void vp_del_vqs(struct virtio_device *vdev)
  {
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+       struct virtqueue *vq, *n;
         int i;
  
-       if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs)))
-               return;
+       list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
+               if (vp_dev->per_vq_vectors) {
+                       int v = vp_dev->vqs[vq->index]->msix_vector;
  
-       vp_remove_vqs(vdev);
+                       if (v != VIRTIO_MSI_NO_VECTOR) {
+                               int irq = pci_irq_vector(vp_dev->pci_dev, v);
+
+                               irq_set_affinity_hint(irq, NULL);
+                               free_irq(irq, vq);
+                       }
+               }
+               vp_del_vq(vq);
+       }
+       vp_dev->per_vq_vectors = false;
+
+       if (vp_dev->intx_enabled) {
+               free_irq(vp_dev->pci_dev->irq, vp_dev);
+               vp_dev->intx_enabled = 0;
+       }
  
-       if (vp_dev->pci_dev->msix_enabled) {
-               for (i = 0; i < vp_dev->msix_vectors; i++)
+       for (i = 0; i < vp_dev->msix_used_vectors; ++i)
+               free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
+
+       for (i = 0; i < vp_dev->msix_vectors; i++)
+               if (vp_dev->msix_affinity_masks[i])
                         free_cpumask_var(vp_dev->msix_affinity_masks[i]);
  
+       if (vp_dev->msix_enabled) {
                 /* Disable the vector used for configuration */
                 vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
  
-               kfree(vp_dev->msix_affinity_masks);
-               kfree(vp_dev->msix_names);
-               kfree(vp_dev->msix_vector_map);
+               pci_free_irq_vectors(vp_dev->pci_dev);
+               vp_dev->msix_enabled = 0;
         }
  
-       free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
-       pci_free_irq_vectors(vp_dev->pci_dev);
+       vp_dev->msix_vectors = 0;
+       vp_dev->msix_used_vectors = 0;
+       kfree(vp_dev->msix_names);
+       vp_dev->msix_names = NULL;
+       kfree(vp_dev->msix_affinity_masks);
+       vp_dev->msix_affinity_masks = NULL;
+       kfree(vp_dev->vqs);
+       vp_dev->vqs = NULL;
  }
  
  static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
                 struct virtqueue *vqs[], vq_callback_t *callbacks[],
-               const char * const names[], struct irq_affinity *desc)
+               const char * const names[], bool per_vq_vectors,
+               struct irq_affinity *desc)
  {
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       const char *name = dev_name(&vp_dev->vdev.dev);
-       int i, err = -ENOMEM, allocated_vectors, nvectors;
-       unsigned flags = PCI_IRQ_MSIX;
-       bool shared = false;
         u16 msix_vec;
+       int i, err, nvectors, allocated_vectors;
  
-       if (desc) {
-               flags |= PCI_IRQ_AFFINITY;
-               desc->pre_vectors++; /* virtio config vector */
-       }
-
-       nvectors = 1;
-       for (i = 0; i < nvqs; i++)
-               if (callbacks[i])
-                       nvectors++;
-
-       /* Try one vector per queue first. */
-       err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
-                       nvectors, flags, desc);
-       if (err < 0) {
-               /* Fallback to one vector for config, one shared for queues. */
-               shared = true;
-               err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2,
-                               PCI_IRQ_MSIX);
-               if (err < 0)
-                       return err;
-       }
-       if (err < 0)
-               return err;
-
-       vp_dev->msix_vectors = nvectors;
-       vp_dev->msix_names = kmalloc_array(nvectors,
-                       sizeof(*vp_dev->msix_names), GFP_KERNEL);
-       if (!vp_dev->msix_names)
-               goto out_free_irq_vectors;
-
-       vp_dev->msix_affinity_masks = kcalloc(nvectors,
-                       sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL);
-       if (!vp_dev->msix_affinity_masks)
-               goto out_free_msix_names;
+       vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
+       if (!vp_dev->vqs)
+               return -ENOMEM;
  
-       for (i = 0; i < nvectors; ++i) {
-               if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
-                               GFP_KERNEL))
-                       goto out_free_msix_affinity_masks;
+       if (per_vq_vectors) {
+               /* Best option: one for change interrupt, one per vq. */
+               nvectors = 1;
+               for (i = 0; i < nvqs; ++i)
+                       if (callbacks[i])
+                               ++nvectors;
+       } else {
+               /* Second best: one for change, shared for all vqs. */
+               nvectors = 2;
         }
  
-       /* Set the vector used for configuration */
-       snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names),
-                "%s-config", name);
-       err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed,
-                       0, vp_dev->msix_names[0], vp_dev);
+       err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors,
+                                     per_vq_vectors ? desc : NULL);
         if (err)
-               goto out_free_msix_affinity_masks;
+               goto error_find;
  
-       /* Verify we had enough resources to assign the vector */
-       if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) {
-               err = -EBUSY;
-               goto out_free_config_irq;
-       }
-
-       vp_dev->msix_vector_map = kmalloc_array(nvqs,
-                       sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
-       if (!vp_dev->msix_vector_map)
-               goto out_disable_config_irq;
-
-       allocated_vectors = 1; /* vector 0 is the config interrupt */
+       vp_dev->per_vq_vectors = per_vq_vectors;
+       allocated_vectors = vp_dev->msix_used_vectors;
         for (i = 0; i < nvqs; ++i) {
                 if (!names[i]) {
                         vqs[i] = NULL;
                         continue;
                 }
  
-               if (callbacks[i])
-                       msix_vec = allocated_vectors;
-               else
+               if (!callbacks[i])
                         msix_vec = VIRTIO_MSI_NO_VECTOR;
-
-               vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
-                               msix_vec);
+               else if (vp_dev->per_vq_vectors)
+                       msix_vec = allocated_vectors++;
+               else
+                       msix_vec = VP_MSIX_VQ_VECTOR;
+               vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+                                    msix_vec);
                 if (IS_ERR(vqs[i])) {
                         err = PTR_ERR(vqs[i]);
-                       goto out_remove_vqs;
+                       goto error_find;
                 }
  
-               if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
-                       vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
+               if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
                         continue;
-               }
  
-               snprintf(vp_dev->msix_names[i + 1],
-                        sizeof(*vp_dev->msix_names), "%s-%s",
+               /* allocate per-vq irq if available and necessary */
+               snprintf(vp_dev->msix_names[msix_vec],
+                        sizeof *vp_dev->msix_names,
+                        "%s-%s",
                          dev_name(&vp_dev->vdev.dev), names[i]);
                 err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
-                                 vring_interrupt, IRQF_SHARED,
-                                 vp_dev->msix_names[i + 1], vqs[i]);
-               if (err) {
-                       /* don't free this irq on error */
-                       vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
-                       goto out_remove_vqs;
-               }
-               vp_dev->msix_vector_map[i] = msix_vec;
-
-               /*
-                * Use a different vector for each queue if they are available,
-                * else share the same vector for all VQs.
-                */
-               if (!shared)
-                       allocated_vectors++;
+                                 vring_interrupt, 0,
+                                 vp_dev->msix_names[msix_vec],
+                                 vqs[i]);
+               if (err)
+                       goto error_find;
         }
-
         return 0;
  
-out_remove_vqs:
-       vp_remove_vqs(vdev);
-       kfree(vp_dev->msix_vector_map);
-out_disable_config_irq:
-       vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
-out_free_config_irq:
-       free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
-out_free_msix_affinity_masks:
-       for (i = 0; i < nvectors; i++) {
-               if (vp_dev->msix_affinity_masks[i])
-                       free_cpumask_var(vp_dev->msix_affinity_masks[i]);
-       }
-       kfree(vp_dev->msix_affinity_masks);
-out_free_msix_names:
-       kfree(vp_dev->msix_names);
-out_free_irq_vectors:
-       pci_free_irq_vectors(vp_dev->pci_dev);
+error_find:
+       vp_del_vqs(vdev);
         return err;
  }
  
@@ -286,29 +350,33 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
         int i, err;
  
+       vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
+       if (!vp_dev->vqs)
+               return -ENOMEM;
+
         err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
                         dev_name(&vdev->dev), vp_dev);
         if (err)
-               return err;
+               goto out_del_vqs;
  
+       vp_dev->intx_enabled = 1;
+       vp_dev->per_vq_vectors = false;
         for (i = 0; i < nvqs; ++i) {
                 if (!names[i]) {
                         vqs[i] = NULL;
                         continue;
                 }
-               vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
-                               VIRTIO_MSI_NO_VECTOR);
+               vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+                                    VIRTIO_MSI_NO_VECTOR);
                 if (IS_ERR(vqs[i])) {
                         err = PTR_ERR(vqs[i]);
-                       goto out_remove_vqs;
+                       goto out_del_vqs;
                 }
         }
  
         return 0;
-
-out_remove_vqs:
-       vp_remove_vqs(vdev);
-       free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
+out_del_vqs:
+       vp_del_vqs(vdev);
         return err;
  }
  
@@ -319,9 +387,15 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
  {
         int err;
  
-       err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, desc);
+       /* Try MSI-X with one vector per queue. */
+       err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, desc);
         if (!err)
                 return 0;
+       /* Fallback: MSI-X with one vector for config, one shared for queues. */
+       err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, desc);
+       if (!err)
+               return 0;
+       /* Finally fall back to regular interrupts. */
         return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
  }
  
@@ -341,15 +415,16 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
  {
         struct virtio_device *vdev = vq->vdev;
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+       struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
+       struct cpumask *mask;
+       unsigned int irq;
  
         if (!vq->callback)
                 return -EINVAL;
  
-       if (vp_dev->pci_dev->msix_enabled) {
-               int vec = vp_dev->msix_vector_map[vq->index];
-               struct cpumask *mask = vp_dev->msix_affinity_masks[vec];
-               unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec);
-
+       if (vp_dev->msix_enabled) {
+               mask = vp_dev->msix_affinity_masks[info->msix_vector];
+               irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
                 if (cpu == -1)
                         irq_set_affinity_hint(irq, NULL);
                 else {
@@ -364,12 +439,13 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
  const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index)
  {
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       unsigned int *map = vp_dev->msix_vector_map;
  
-       if (!map || map[index] == VIRTIO_MSI_NO_VECTOR)
+       if (!vp_dev->per_vq_vectors ||
+           vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR)
                 return NULL;
  
-       return pci_irq_get_affinity(vp_dev->pci_dev, map[index]);
+       return pci_irq_get_affinity(vp_dev->pci_dev,
+                                   vp_dev->vqs[index]->msix_vector);
  }
  
  #ifdef CONFIG_PM_SLEEP
@@ -440,6 +516,8 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
         vp_dev->vdev.dev.parent = &pci_dev->dev;
         vp_dev->vdev.dev.release = virtio_pci_release_dev;
         vp_dev->pci_dev = pci_dev;
+       INIT_LIST_HEAD(&vp_dev->virtqueues);
+       spin_lock_init(&vp_dev->lock);
  
         /* enable the device */
         rc = pci_enable_device(pci_dev);
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h

index ac8c9d7889646ab3cc28bb51accd0d3840d5a34f..e96334aec1e0d70842d1a9fc53462ab728be87c0 100644 (file)
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -31,6 +31,17 @@
  #include <linux/highmem.h>
  #include <linux/spinlock.h>
  
+struct virtio_pci_vq_info {
+       /* the actual virtqueue */
+       struct virtqueue *vq;
+
+       /* the list node for the virtqueues list */
+       struct list_head node;
+
+       /* MSI-X vector (or none) */
+       unsigned msix_vector;
+};
+
  /* Our device structure */
  struct virtio_pci_device {
         struct virtio_device vdev;
@@ -64,25 +75,47 @@ struct virtio_pci_device {
         /* the IO mapping for the PCI config space */
         void __iomem *ioaddr;
  
+       /* a list of queues so we can dispatch IRQs */
+       spinlock_t lock;
+       struct list_head virtqueues;
+
+       /* array of all queues for house-keeping */
+       struct virtio_pci_vq_info **vqs;
+
+       /* MSI-X support */
+       int msix_enabled;
+       int intx_enabled;
         cpumask_var_t *msix_affinity_masks;
         /* Name strings for interrupts. This size should be enough,
          * and I'm too lazy to allocate each name separately. */
         char (*msix_names)[256];
-       /* Total Number of MSI-X vectors (including per-VQ ones). */
-       int msix_vectors;
-       /* Map of per-VQ MSI-X vectors, may be NULL */
-       unsigned *msix_vector_map;
+       /* Number of available vectors */
+       unsigned msix_vectors;
+       /* Vectors allocated, excluding per-vq vectors if any */
+       unsigned msix_used_vectors;
+
+       /* Whether we have vector per vq */
+       bool per_vq_vectors;
  
         struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev,
+                                     struct virtio_pci_vq_info *info,
                                       unsigned idx,
                                       void (*callback)(struct virtqueue *vq),
                                       const char *name,
                                       u16 msix_vec);
-       void (*del_vq)(struct virtqueue *vq);
+       void (*del_vq)(struct virtio_pci_vq_info *info);
  
         u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
  };
  
+/* Constants for MSI-X */
+/* Use first vector for configuration changes, second and the rest for
+ * virtqueues Thus, we need at least 2 vectors for MSI. */
+enum {
+       VP_MSIX_CONFIG_VECTOR = 0,
+       VP_MSIX_VQ_VECTOR = 1,
+};
+
  /* Convert a generic virtio device to our structure */
  static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
  {
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c

index f7362c5fe18a96a902bc81138b8ea796e03e79d9..4bfa48fb1324660f82ae6272d2e1ecc33522ba3e 100644 (file)
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -112,6 +112,7 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
  }
  
  static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
+                                 struct virtio_pci_vq_info *info,
                                   unsigned index,
                                   void (*callback)(struct virtqueue *vq),
                                   const char *name,
@@ -129,6 +130,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
         if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
                 return ERR_PTR(-ENOENT);
  
+       info->msix_vector = msix_vec;
+
         /* create the vring */
         vq = vring_create_virtqueue(index, num,
                                     VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
@@ -159,13 +162,14 @@ out_deactivate:
         return ERR_PTR(err);
  }
  
-static void del_vq(struct virtqueue *vq)
+static void del_vq(struct virtio_pci_vq_info *info)
  {
+       struct virtqueue *vq = info->vq;
         struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
  
         iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
  
-       if (vp_dev->pci_dev->msix_enabled) {
+       if (vp_dev->msix_enabled) {
                 iowrite16(VIRTIO_MSI_NO_VECTOR,
                           vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
                 /* Flush the write out to device */
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c

index 7bc3004b840ef3e3dabb5c2e24af8a935f552eba..8978f109d2d79828e5b0c12649debc481dfacd7f 100644 (file)
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -293,6 +293,7 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
  }
  
  static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
+                                 struct virtio_pci_vq_info *info,
                                   unsigned index,
                                   void (*callback)(struct virtqueue *vq),
                                   const char *name,
@@ -322,6 +323,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
         /* get offset of notification word for this vq */
         off = vp_ioread16(&cfg->queue_notify_off);
  
+       info->msix_vector = msix_vec;
+
         /* create the vring */
         vq = vring_create_virtqueue(index, num,
                                     SMP_CACHE_BYTES, &vp_dev->vdev,
@@ -405,13 +408,14 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
         return 0;
  }
  
-static void del_vq(struct virtqueue *vq)
+static void del_vq(struct virtio_pci_vq_info *info)
  {
+       struct virtqueue *vq = info->vq;
         struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
  
         vp_iowrite16(vq->index, &vp_dev->common->queue_select);
  
-       if (vp_dev->pci_dev->msix_enabled) {
+       if (vp_dev->msix_enabled) {
                 vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
                              &vp_dev->common->queue_msix_vector);
                 /* Flush the write out to device */
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c

index c77a0751a31173344de0c02c3f70d18ec259ca63..f3bf8f4e2d6cef09101b53aa9f1a69563b206287 100644 (file)
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -36,6 +36,7 @@
  #include <linux/spinlock.h>
  #include <linux/slab.h>
  #include <linux/highmem.h>
+#include <linux/refcount.h>
  
  #include <xen/xen.h>
  #include <xen/grant_table.h>
@@ -86,7 +87,7 @@ struct grant_map {
         int index;
         int count;
         int flags;
-       atomic_t users;
+       refcount_t users;
         struct unmap_notify notify;
         struct ioctl_gntdev_grant_ref *grants;
         struct gnttab_map_grant_ref   *map_ops;
@@ -166,7 +167,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
  
         add->index = 0;
         add->count = count;
-       atomic_set(&add->users, 1);
+       refcount_set(&add->users, 1);
  
         return add;
  
@@ -212,7 +213,7 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map)
         if (!map)
                 return;
  
-       if (!atomic_dec_and_test(&map->users))
+       if (!refcount_dec_and_test(&map->users))
                 return;
  
         atomic_sub(map->count, &pages_mapped);
@@ -400,7 +401,7 @@ static void gntdev_vma_open(struct vm_area_struct *vma)
         struct grant_map *map = vma->vm_private_data;
  
         pr_debug("gntdev_vma_open %p\n", vma);
-       atomic_inc(&map->users);
+       refcount_inc(&map->users);
  }
  
  static void gntdev_vma_close(struct vm_area_struct *vma)
@@ -1004,7 +1005,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
                 goto unlock_out;
         }
  
-       atomic_inc(&map->users);
+       refcount_inc(&map->users);
  
         vma->vm_ops = &gntdev_vmops;
  
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c

index f8afc6dcc29f2769694308092a4b543e5e0bed49..e8cef1ad0fe31e0139903399d70730c7eafdc399 100644 (file)
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -681,3 +681,50 @@ xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask)
         return 0;
  }
  EXPORT_SYMBOL_GPL(xen_swiotlb_set_dma_mask);
+
+/*
+ * Create userspace mapping for the DMA-coherent memory.
+ * This function should be called with the pages from the current domain only,
+ * passing pages mapped from other domains would lead to memory corruption.
+ */
+int
+xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+                    void *cpu_addr, dma_addr_t dma_addr, size_t size,
+                    unsigned long attrs)
+{
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+       if (__generic_dma_ops(dev)->mmap)
+               return __generic_dma_ops(dev)->mmap(dev, vma, cpu_addr,
+                                                   dma_addr, size, attrs);
+#endif
+       return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap);
+
+/*
+ * This function should be called with the pages from the current domain only,
+ * passing pages mapped from other domains would lead to memory corruption.
+ */
+int
+xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
+                       void *cpu_addr, dma_addr_t handle, size_t size,
+                       unsigned long attrs)
+{
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+       if (__generic_dma_ops(dev)->get_sgtable) {
+#if 0
+       /*
+        * This check verifies that the page belongs to the current domain and
+        * is not one mapped from another domain.
+        * This check is for debug only, and should not go to production build
+        */
+               unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle));
+               BUG_ON (!page_is_ram(bfn));
+#endif
+               return __generic_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr,
+                                                          handle, size, attrs);
+       }
+#endif
+       return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size);
+}
+EXPORT_SYMBOL_GPL(xen_swiotlb_get_sgtable);
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c

index 4ce10bcca18b1f600c351675142240dbe94a4022..23e391d3ec015d0c5b38b21619898c282826f59c 100644 (file)
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -27,10 +27,10 @@
  #include <linux/init.h>
  #include <linux/module.h>
  #include <linux/types.h>
+#include <linux/syscore_ops.h>
  #include <linux/acpi.h>
  #include <acpi/processor.h>
  #include <xen/xen.h>
-#include <xen/xen-ops.h>
  #include <xen/interface/platform.h>
  #include <asm/xen/hypercall.h>
  
@@ -408,7 +408,7 @@ static int check_acpi_ids(struct acpi_processor *pr_backup)
         acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
                             ACPI_UINT32_MAX,
                             read_acpi_id, NULL, NULL, NULL);
-       acpi_get_devices("ACPI0007", read_acpi_id, NULL, NULL);
+       acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, read_acpi_id, NULL, NULL);
  
  upload:
         if (!bitmap_equal(acpi_id_present, acpi_ids_done, nr_acpi_bits)) {
@@ -466,15 +466,33 @@ static int xen_upload_processor_pm_data(void)
         return rc;
  }
  
-static int xen_acpi_processor_resume(struct notifier_block *nb,
-                                    unsigned long action, void *data)
+static void xen_acpi_processor_resume_worker(struct work_struct *dummy)
  {
+       int rc;
+
         bitmap_zero(acpi_ids_done, nr_acpi_bits);
-       return xen_upload_processor_pm_data();
+
+       rc = xen_upload_processor_pm_data();
+       if (rc != 0)
+               pr_info("ACPI data upload failed, error = %d\n", rc);
+}
+
+static void xen_acpi_processor_resume(void)
+{
+       static DECLARE_WORK(wq, xen_acpi_processor_resume_worker);
+
+       /*
+        * xen_upload_processor_pm_data() calls non-atomic code.
+        * However, the context for xen_acpi_processor_resume is syscore
+        * with only the boot CPU online and in an atomic context.
+        *
+        * So defer the upload for some point safer.
+        */
+       schedule_work(&wq);
  }
  
-struct notifier_block xen_acpi_processor_resume_nb = {
-       .notifier_call = xen_acpi_processor_resume,
+static struct syscore_ops xap_syscore_ops = {
+       .resume = xen_acpi_processor_resume,
  };
  
  static int __init xen_acpi_processor_init(void)
@@ -527,7 +545,7 @@ static int __init xen_acpi_processor_init(void)
         if (rc)
                 goto err_unregister;
  
-       xen_resume_notifier_register(&xen_acpi_processor_resume_nb);
+       register_syscore_ops(&xap_syscore_ops);
  
         return 0;
  err_unregister:
@@ -544,7 +562,7 @@ static void __exit xen_acpi_processor_exit(void)
  {
         int i;
  
-       xen_resume_notifier_unregister(&xen_acpi_processor_resume_nb);
+       unregister_syscore_ops(&xap_syscore_ops);
         kfree(acpi_ids_done);
         kfree(acpi_id_present);
         kfree(acpi_id_cst_present);
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c

index 4d343eed08f51e1a3d2a0628dccb256b95858fae..f3b089b7c0b62ab22fd2aba11111f9bcecea50cd 100644 (file)
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -55,7 +55,6 @@
  #include <linux/string.h>
  #include <linux/slab.h>
  #include <linux/miscdevice.h>
-#include <linux/init.h>
  
  #include <xen/xenbus.h>
  #include <xen/xen.h>
@@ -443,8 +442,10 @@ static int xenbus_write_transaction(unsigned msg_type,
                 return xenbus_command_reply(u, XS_ERROR, "ENOENT");
  
         rc = xenbus_dev_request_and_reply(&u->u.msg, u);
-       if (rc)
+       if (rc && trans) {
+               list_del(&trans->list);
                 kfree(trans);
+       }
  
  out:
         return rc;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c

index a89f3cfe3c7d7fba5341ee5adac4a3a1ecd8adec..c202930086edb6fd22e0645cdc5264071af28d1f 100644 (file)
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -333,10 +333,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
                 goto err_names;
         init_rwsem(&v9ses->rename_sem);
  
-       rc = bdi_setup_and_register(&v9ses->bdi, "9p");
-       if (rc)
-               goto err_names;
-
         v9ses->uid = INVALID_UID;
         v9ses->dfltuid = V9FS_DEFUID;
         v9ses->dfltgid = V9FS_DEFGID;
@@ -345,7 +341,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
         if (IS_ERR(v9ses->clnt)) {
                 rc = PTR_ERR(v9ses->clnt);
                 p9_debug(P9_DEBUG_ERROR, "problem initializing 9p client\n");
-               goto err_bdi;
+               goto err_names;
         }
  
         v9ses->flags = V9FS_ACCESS_USER;
@@ -415,8 +411,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
  
  err_clnt:
         p9_client_destroy(v9ses->clnt);
-err_bdi:
-       bdi_destroy(&v9ses->bdi);
  err_names:
         kfree(v9ses->uname);
         kfree(v9ses->aname);
@@ -445,8 +439,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
         kfree(v9ses->uname);
         kfree(v9ses->aname);
  
-       bdi_destroy(&v9ses->bdi);
-
         spin_lock(&v9fs_sessionlist_lock);
         list_del(&v9ses->slist);
         spin_unlock(&v9fs_sessionlist_lock);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h

index 443d12e020436f2e7af92333ae89d3f068dc614a..76eaf49abd3aea3b56644f3a025ccbc48b89bae8 100644 (file)
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -114,7 +114,6 @@ struct v9fs_session_info {
         kuid_t uid;             /* if ACCESS_SINGLE, the uid that has access */
         struct p9_client *clnt; /* 9p client */
         struct list_head slist; /* list of sessions registered with v9fs */
-       struct backing_dev_info bdi;
         struct rw_semaphore rename_sem;
  };
  
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c

index de3ed862919691344e5080c855403d4236512cd8..a0965fb587a5f7321b7f428b018d88307d471163 100644 (file)
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -72,10 +72,12 @@ static int v9fs_set_super(struct super_block *s, void *data)
   *
   */
  
-static void
+static int
  v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
                 int flags, void *data)
  {
+       int ret;
+
         sb->s_maxbytes = MAX_LFS_FILESIZE;
         sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
         sb->s_blocksize = 1 << sb->s_blocksize_bits;
@@ -85,7 +87,11 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
                 sb->s_xattr = v9fs_xattr_handlers;
         } else
                 sb->s_op = &v9fs_super_ops;
-       sb->s_bdi = &v9ses->bdi;
+
+       ret = super_setup_bdi(sb);
+       if (ret)
+               return ret;
+
         if (v9ses->cache)
                 sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE;
  
@@ -99,6 +105,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
  #endif
  
         save_mount_options(sb, data);
+       return 0;
  }
  
  /**
@@ -138,7 +145,9 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                 retval = PTR_ERR(sb);
                 goto clunk_fid;
         }
-       v9fs_fill_super(sb, v9ses, flags, data);
+       retval = v9fs_fill_super(sb, v9ses, flags, data);
+       if (retval)
+               goto release_sb;
  
         if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
                 sb->s_d_op = &v9fs_cached_dentry_operations;
diff --git a/fs/afs/callback.c b/fs/afs/callback.c

index b29447e03ede0d638950fa0dd64d908004156ea6..25d404d22caebcfd6b6b60d6287e36258f1185eb 100644 (file)
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work)
  {
         struct afs_server *server;
         struct afs_vnode *vnode, *xvnode;
-       time_t now;
+       time64_t now;
         long timeout;
         int ret;
  
@@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work)
  
         _enter("");
  
-       now = get_seconds();
+       now = ktime_get_real_seconds();
  
         /* find the first vnode to update */
         spin_lock(&server->cb_lock);
@@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work)
  
         /* and then reschedule */
         _debug("reschedule");
-       vnode->update_at = get_seconds() + afs_vnode_update_timeout;
+       vnode->update_at = ktime_get_real_seconds() +
+                       afs_vnode_update_timeout;
  
         spin_lock(&server->cb_lock);
  
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c

index 2edbdcbf6432add190464b5a5f414592953c944a..3062cceb5c2aebcc4a15e3c52d1b26ecea82f20d 100644 (file)
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -187,7 +187,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
         struct afs_callback *cb;
         struct afs_server *server;
         __be32 *bp;
-       u32 tmp;
         int ret, loop;
  
         _enter("{%u}", call->unmarshall);
@@ -249,9 +248,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
                 if (ret < 0)
                         return ret;
  
-               tmp = ntohl(call->tmp);
-               _debug("CB count: %u", tmp);
-               if (tmp != call->count && tmp != 0)
+               call->count2 = ntohl(call->tmp);
+               _debug("CB count: %u", call->count2);
+               if (call->count2 != call->count && call->count2 != 0)
                         return -EBADMSG;
                 call->offset = 0;
                 call->unmarshall++;
@@ -259,14 +258,14 @@ static int afs_deliver_cb_callback(struct afs_call *call)
         case 4:
                 _debug("extract CB array");
                 ret = afs_extract_data(call, call->buffer,
-                                      call->count * 3 * 4, false);
+                                      call->count2 * 3 * 4, false);
                 if (ret < 0)
                         return ret;
  
                 _debug("unmarshall CB array");
                 cb = call->request;
                 bp = call->buffer;
-               for (loop = call->count; loop > 0; loop--, cb++) {
+               for (loop = call->count2; loop > 0; loop--, cb++) {
                         cb->version     = ntohl(*bp++);
                         cb->expiry      = ntohl(*bp++);
                         cb->type        = ntohl(*bp++);
diff --git a/fs/afs/file.c b/fs/afs/file.c

index ba7b71fba34bcc4cd5f8b8a305ace06a388ac607..0d5b8508869bf0642a88d4c87b3feb49c1fab433 100644 (file)
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -30,6 +30,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping,
  
  const struct file_operations afs_file_operations = {
         .open           = afs_open,
+       .flush          = afs_flush,
         .release        = afs_release,
         .llseek         = generic_file_llseek,
         .read_iter      = generic_file_read_iter,
@@ -184,10 +185,13 @@ int afs_page_filler(void *data, struct page *page)
                 if (!req)
                         goto enomem;
  
+               /* We request a full page.  If the page is a partial one at the
+                * end of the file, the server will return a short read and the
+                * unmarshalling code will clear the unfilled space.
+                */
                 atomic_set(&req->usage, 1);
                 req->pos = (loff_t)page->index << PAGE_SHIFT;
-               req->len = min_t(size_t, i_size_read(inode) - req->pos,
-                                PAGE_SIZE);
+               req->len = PAGE_SIZE;
                 req->nr_pages = 1;
                 req->pages[0] = page;
                 get_page(page);
@@ -208,7 +212,13 @@ int afs_page_filler(void *data, struct page *page)
                         fscache_uncache_page(vnode->cache, page);
  #endif
                         BUG_ON(PageFsCache(page));
-                       goto error;
+
+                       if (ret == -EINTR ||
+                           ret == -ENOMEM ||
+                           ret == -ERESTARTSYS ||
+                           ret == -EAGAIN)
+                               goto error;
+                       goto io_error;
                 }
  
                 SetPageUptodate(page);
@@ -227,10 +237,12 @@ int afs_page_filler(void *data, struct page *page)
         _leave(" = 0");
         return 0;
  
+io_error:
+       SetPageError(page);
+       goto error;
  enomem:
         ret = -ENOMEM;
  error:
-       SetPageError(page);
         unlock_page(page);
         _leave(" = %d", ret);
         return ret;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c

index ac8e766978dc440e8690fbf44333d41f9894f92a..19f76ae36982df43be740c1bf73d396b1a81c77c 100644 (file)
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -16,6 +16,12 @@
  #include "internal.h"
  #include "afs_fs.h"
  
+/*
+ * We need somewhere to discard into in case the server helpfully returns more
+ * than we asked for in FS.FetchData{,64}.
+ */
+static u8 afs_discard_buffer[64];
+
  /*
   * decode an AFSFid block
   */
@@ -105,7 +111,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
                         vnode->vfs_inode.i_mode = mode;
                 }
  
-               vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
+               vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client;
                 vnode->vfs_inode.i_mtime        = vnode->vfs_inode.i_ctime;
                 vnode->vfs_inode.i_atime        = vnode->vfs_inode.i_ctime;
                 vnode->vfs_inode.i_version      = data_version;
@@ -139,7 +145,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
         vnode->cb_version       = ntohl(*bp++);
         vnode->cb_expiry        = ntohl(*bp++);
         vnode->cb_type          = ntohl(*bp++);
-       vnode->cb_expires       = vnode->cb_expiry + get_seconds();
+       vnode->cb_expires       = vnode->cb_expiry + ktime_get_real_seconds();
         *_bp = bp;
  }
  
@@ -315,7 +321,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
         void *buffer;
         int ret;
  
-       _enter("{%u,%zu/%u;%u/%llu}",
+       _enter("{%u,%zu/%u;%llu/%llu}",
                call->unmarshall, call->offset, call->count,
                req->remain, req->actual_len);
  
@@ -353,12 +359,6 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
  
                 req->actual_len |= ntohl(call->tmp);
                 _debug("DATA length: %llu", req->actual_len);
-               /* Check that the server didn't want to send us extra.  We
-                * might want to just discard instead, but that requires
-                * cooperation from AF_RXRPC.
-                */
-               if (req->actual_len > req->len)
-                       return -EBADMSG;
  
                 req->remain = req->actual_len;
                 call->offset = req->pos & (PAGE_SIZE - 1);
@@ -368,6 +368,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
                 call->unmarshall++;
  
         begin_page:
+               ASSERTCMP(req->index, <, req->nr_pages);
                 if (req->remain > PAGE_SIZE - call->offset)
                         size = PAGE_SIZE - call->offset;
                 else
@@ -378,7 +379,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
  
                 /* extract the returned data */
         case 3:
-               _debug("extract data %u/%llu %zu/%u",
+               _debug("extract data %llu/%llu %zu/%u",
                        req->remain, req->actual_len, call->offset, call->count);
  
                 buffer = kmap(req->pages[req->index]);
@@ -389,19 +390,40 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
                 if (call->offset == PAGE_SIZE) {
                         if (req->page_done)
                                 req->page_done(call, req);
+                       req->index++;
                         if (req->remain > 0) {
-                               req->index++;
                                 call->offset = 0;
+                               if (req->index >= req->nr_pages) {
+                                       call->unmarshall = 4;
+                                       goto begin_discard;
+                               }
                                 goto begin_page;
                         }
                 }
+               goto no_more_data;
+
+               /* Discard any excess data the server gave us */
+       begin_discard:
+       case 4:
+               size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
+               call->count = size;
+               _debug("extract discard %llu/%llu %zu/%u",
+                      req->remain, req->actual_len, call->offset, call->count);
+
+               call->offset = 0;
+               ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
+               req->remain -= call->offset;
+               if (ret < 0)
+                       return ret;
+               if (req->remain > 0)
+                       goto begin_discard;
  
         no_more_data:
                 call->offset = 0;
-               call->unmarshall++;
+               call->unmarshall = 5;
  
                 /* extract the metadata */
-       case 4:
+       case 5:
                 ret = afs_extract_data(call, call->buffer,
                                        (21 + 3 + 6) * 4, false);
                 if (ret < 0)
@@ -416,16 +438,17 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
                 call->offset = 0;
                 call->unmarshall++;
  
-       case 5:
+       case 6:
                 break;
         }
  
-       if (call->count < PAGE_SIZE) {
-               buffer = kmap(req->pages[req->index]);
-               memset(buffer + call->count, 0, PAGE_SIZE - call->count);
-               kunmap(req->pages[req->index]);
+       for (; req->index < req->nr_pages; req->index++) {
+               if (call->count < PAGE_SIZE)
+                       zero_user_segment(req->pages[req->index],
+                                         call->count, PAGE_SIZE);
                 if (req->page_done)
                         req->page_done(call, req);
+               call->count = 0;
         }
  
         _leave(" = 0 [done]");
@@ -711,8 +734,8 @@ int afs_fs_create(struct afs_server *server,
                 memset(bp, 0, padsz);
                 bp = (void *) bp + padsz;
         }
-       *bp++ = htonl(AFS_SET_MODE);
-       *bp++ = 0; /* mtime */
+       *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+       *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
         *bp++ = 0; /* owner */
         *bp++ = 0; /* group */
         *bp++ = htonl(mode & S_IALLUGO); /* unix mode */
@@ -980,8 +1003,8 @@ int afs_fs_symlink(struct afs_server *server,
                 memset(bp, 0, c_padsz);
                 bp = (void *) bp + c_padsz;
         }
-       *bp++ = htonl(AFS_SET_MODE);
-       *bp++ = 0; /* mtime */
+       *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+       *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
         *bp++ = 0; /* owner */
         *bp++ = 0; /* group */
         *bp++ = htonl(S_IRWXUGO); /* unix mode */
@@ -1180,8 +1203,8 @@ static int afs_fs_store_data64(struct afs_server *server,
         *bp++ = htonl(vnode->fid.vnode);
         *bp++ = htonl(vnode->fid.unique);
  
-       *bp++ = 0; /* mask */
-       *bp++ = 0; /* mtime */
+       *bp++ = htonl(AFS_SET_MTIME); /* mask */
+       *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
         *bp++ = 0; /* owner */
         *bp++ = 0; /* group */
         *bp++ = 0; /* unix mode */
@@ -1213,7 +1236,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
         _enter(",%x,{%x:%u},,",
                key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
  
-       size = to - offset;
+       size = (loff_t)to - (loff_t)offset;
         if (first != last)
                 size += (loff_t)(last - first) << PAGE_SHIFT;
         pos = (loff_t)first << PAGE_SHIFT;
@@ -1257,8 +1280,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
         *bp++ = htonl(vnode->fid.vnode);
         *bp++ = htonl(vnode->fid.unique);
  
-       *bp++ = 0; /* mask */
-       *bp++ = 0; /* mtime */
+       *bp++ = htonl(AFS_SET_MTIME); /* mask */
+       *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
         *bp++ = 0; /* owner */
         *bp++ = 0; /* group */
         *bp++ = 0; /* unix mode */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c

index 1e4897a048d2ee0dee49b613f22336b7118ff9f8..aae55dd151087e16f123adc0ebe51e47e393b297 100644 (file)
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -54,8 +54,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
                 inode->i_fop    = &afs_dir_file_operations;
                 break;
         case AFS_FTYPE_SYMLINK:
-               inode->i_mode   = S_IFLNK | vnode->status.mode;
-               inode->i_op     = &page_symlink_inode_operations;
+               /* Symlinks with a mode of 0644 are actually mountpoints. */
+               if ((vnode->status.mode & 0777) == 0644) {
+                       inode->i_flags |= S_AUTOMOUNT;
+
+                       spin_lock(&vnode->lock);
+                       set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+                       spin_unlock(&vnode->lock);
+
+                       inode->i_mode   = S_IFDIR | 0555;
+                       inode->i_op     = &afs_mntpt_inode_operations;
+                       inode->i_fop    = &afs_mntpt_file_operations;
+               } else {
+                       inode->i_mode   = S_IFLNK | vnode->status.mode;
+                       inode->i_op     = &page_symlink_inode_operations;
+               }
                 inode_nohighmem(inode);
                 break;
         default:
@@ -70,27 +83,15 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
  
         set_nlink(inode, vnode->status.nlink);
         inode->i_uid            = vnode->status.owner;
-       inode->i_gid            = GLOBAL_ROOT_GID;
+       inode->i_gid            = vnode->status.group;
         inode->i_size           = vnode->status.size;
-       inode->i_ctime.tv_sec   = vnode->status.mtime_server;
+       inode->i_ctime.tv_sec   = vnode->status.mtime_client;
         inode->i_ctime.tv_nsec  = 0;
         inode->i_atime          = inode->i_mtime = inode->i_ctime;
         inode->i_blocks         = 0;
         inode->i_generation     = vnode->fid.unique;
         inode->i_version        = vnode->status.data_version;
         inode->i_mapping->a_ops = &afs_fs_aops;
-
-       /* check to see whether a symbolic link is really a mountpoint */
-       if (vnode->status.type == AFS_FTYPE_SYMLINK) {
-               afs_mntpt_check_symlink(vnode, key);
-
-               if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
-                       inode->i_mode   = S_IFDIR | vnode->status.mode;
-                       inode->i_op     = &afs_mntpt_inode_operations;
-                       inode->i_fop    = &afs_mntpt_file_operations;
-               }
-       }
-
         return 0;
  }
  
@@ -245,12 +246,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
                         vnode->cb_version = 0;
                         vnode->cb_expiry = 0;
                         vnode->cb_type = 0;
-                       vnode->cb_expires = get_seconds();
+                       vnode->cb_expires = ktime_get_real_seconds();
                 } else {
                         vnode->cb_version = cb->version;
                         vnode->cb_expiry = cb->expiry;
                         vnode->cb_type = cb->type;
-                       vnode->cb_expires = vnode->cb_expiry + get_seconds();
+                       vnode->cb_expires = vnode->cb_expiry +
+                               ktime_get_real_seconds();
                 }
         }
  
@@ -323,7 +325,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
             !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
             !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
             !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
-               if (vnode->cb_expires < get_seconds() + 10) {
+               if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
                         _debug("callback expired");
                         set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
                 } else {
@@ -444,7 +446,7 @@ void afs_evict_inode(struct inode *inode)
  
         mutex_lock(&vnode->permits_lock);
         permits = vnode->permits;
-       rcu_assign_pointer(vnode->permits, NULL);
+       RCU_INIT_POINTER(vnode->permits, NULL);
         mutex_unlock(&vnode->permits_lock);
         if (permits)
                 call_rcu(&permits->rcu, afs_zap_permits);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h

index 5dfa56903a2d4b6ff058160ef973efaaa5e690d8..393672997cc23d4e5688dc77421ae81b0df95b48 100644 (file)
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -11,6 +11,7 @@
  
  #include <linux/compiler.h>
  #include <linux/kernel.h>
+#include <linux/ktime.h>
  #include <linux/fs.h>
  #include <linux/pagemap.h>
  #include <linux/rxrpc.h>
@@ -90,7 +91,10 @@ struct afs_call {
         unsigned                request_size;   /* size of request data */
         unsigned                reply_max;      /* maximum size of reply */
         unsigned                first_offset;   /* offset into mapping[first] */
-       unsigned                last_to;        /* amount of mapping[last] */
+       union {
+               unsigned        last_to;        /* amount of mapping[last] */
+               unsigned        count2;         /* count used in unmarshalling */
+       };
         unsigned char           unmarshall;     /* unmarshalling phase */
         bool                    incoming;       /* T if incoming call */
         bool                    send_pages;     /* T if data from mapping should be sent */
@@ -127,12 +131,11 @@ struct afs_call_type {
   */
  struct afs_read {
         loff_t                  pos;            /* Where to start reading */
-       loff_t                  len;            /* How much to read */
+       loff_t                  len;            /* How much we're asking for */
         loff_t                  actual_len;     /* How much we're actually getting */
+       loff_t                  remain;         /* Amount remaining */
         atomic_t                usage;
-       unsigned int            remain;         /* Amount remaining */
         unsigned int            index;          /* Which page we're reading into */
-       unsigned int            pg_offset;      /* Offset in page we're at */
         unsigned int            nr_pages;
         void (*page_done)(struct afs_call *, struct afs_read *);
         struct page             *pages[];
@@ -247,7 +250,7 @@ struct afs_cache_vhash {
   */
  struct afs_vlocation {
         atomic_t                usage;
-       time_t                  time_of_death;  /* time at which put reduced usage to 0 */
+       time64_t                time_of_death;  /* time at which put reduced usage to 0 */
         struct list_head        link;           /* link in cell volume location list */
         struct list_head        grave;          /* link in master graveyard list */
         struct list_head        update;         /* link in master update list */
@@ -258,7 +261,7 @@ struct afs_vlocation {
         struct afs_cache_vlocation vldb;        /* volume information DB record */
         struct afs_volume       *vols[3];       /* volume access record pointer (index by type) */
         wait_queue_head_t       waitq;          /* status change waitqueue */
-       time_t                  update_at;      /* time at which record should be updated */
+       time64_t                update_at;      /* time at which record should be updated */
         spinlock_t              lock;           /* access lock */
         afs_vlocation_state_t   state;          /* volume location state */
         unsigned short          upd_rej_cnt;    /* ENOMEDIUM count during update */
@@ -271,7 +274,7 @@ struct afs_vlocation {
   */
  struct afs_server {
         atomic_t                usage;
-       time_t                  time_of_death;  /* time at which put reduced usage to 0 */
+       time64_t                time_of_death;  /* time at which put reduced usage to 0 */
         struct in_addr          addr;           /* server address */
         struct afs_cell         *cell;          /* cell in which server resides */
         struct list_head        link;           /* link in cell's server list */
@@ -315,7 +318,6 @@ struct afs_volume {
         unsigned short          rjservers;      /* number of servers discarded due to -ENOMEDIUM */
         struct afs_server       *servers[8];    /* servers on which volume resides (ordered) */
         struct rw_semaphore     server_sem;     /* lock for accessing current server */
-       struct backing_dev_info bdi;
  };
  
  /*
@@ -374,8 +376,8 @@ struct afs_vnode {
         struct rb_node          server_rb;      /* link in server->fs_vnodes */
         struct rb_node          cb_promise;     /* link in server->cb_promises */
         struct work_struct      cb_broken_work; /* work to be done on callback break */
-       time_t                  cb_expires;     /* time at which callback expires */
-       time_t                  cb_expires_at;  /* time used to order cb_promise */
+       time64_t                cb_expires;     /* time at which callback expires */
+       time64_t                cb_expires_at;  /* time used to order cb_promise */
         unsigned                cb_version;     /* callback version */
         unsigned                cb_expiry;      /* callback expiry time */
         afs_callback_type_t     cb_type;        /* type of callback */
@@ -557,7 +559,6 @@ extern const struct inode_operations afs_autocell_inode_operations;
  extern const struct file_operations afs_mntpt_file_operations;
  
  extern struct vfsmount *afs_d_automount(struct path *);
-extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
  extern void afs_mntpt_kill_timer(void);
  
  /*
@@ -718,6 +719,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *);
  extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
  extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
  extern int afs_writeback_all(struct afs_vnode *);
+extern int afs_flush(struct file *, fl_owner_t);
  extern int afs_fsync(struct file *, loff_t, loff_t, int);
  
  
diff --git a/fs/afs/misc.c b/fs/afs/misc.c

index 91ea1aa0d8b3ab0a817b525e9f9b3deec98f775f..100b207efc9eaddff4ed9f7e0e4415ed62ba2880 100644 (file)
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code)
         case RXKADDATALEN:      return -EKEYREJECTED;
         case RXKADILLEGALLEVEL: return -EKEYREJECTED;
  
+       case RXGEN_OPCODE:      return -ENOTSUPP;
+
         default:                return -EREMOTEIO;
         }
  }
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c

index d4fb0afc0097d4947d3c2013cf27f521b055d423..bd3b65cde282a24769f7c549c9fe52c85b6c8e4e 100644 (file)
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -46,59 +46,6 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
  
  static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
  
-/*
- * check a symbolic link to see whether it actually encodes a mountpoint
- * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
- */
-int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
-{
-       struct page *page;
-       size_t size;
-       char *buf;
-       int ret;
-
-       _enter("{%x:%u,%u}",
-              vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-
-       /* read the contents of the symlink into the pagecache */
-       page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0,
-                              afs_page_filler, key);
-       if (IS_ERR(page)) {
-               ret = PTR_ERR(page);
-               goto out;
-       }
-
-       ret = -EIO;
-       if (PageError(page))
-               goto out_free;
-
-       buf = kmap(page);
-
-       /* examine the symlink's contents */
-       size = vnode->status.size;
-       _debug("symlink to %*.*s", (int) size, (int) size, buf);
-
-       if (size > 2 &&
-           (buf[0] == '%' || buf[0] == '#') &&
-           buf[size - 1] == '.'
-           ) {
-               _debug("symlink is a mountpoint");
-               spin_lock(&vnode->lock);
-               set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
-               vnode->vfs_inode.i_flags |= S_AUTOMOUNT;
-               spin_unlock(&vnode->lock);
-       }
-
-       ret = 0;
-
-       kunmap(page);
-out_free:
-       put_page(page);
-out:
-       _leave(" = %d", ret);
-       return ret;
-}
-
  /*
   * no valid lookup procedure on this sort of dir
   */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c

index 419ef05dcb5ec7149a3a0b5de657c75bbc6eabb4..8f76b13d55494bddec9e81203c0734a0f6d811d7 100644 (file)
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -259,67 +259,74 @@ void afs_flat_call_destructor(struct afs_call *call)
         call->buffer = NULL;
  }
  
+#define AFS_BVEC_MAX 8
+
+/*
+ * Load the given bvec with the next few pages.
+ */
+static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
+                         struct bio_vec *bv, pgoff_t first, pgoff_t last,
+                         unsigned offset)
+{
+       struct page *pages[AFS_BVEC_MAX];
+       unsigned int nr, n, i, to, bytes = 0;
+
+       nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX);
+       n = find_get_pages_contig(call->mapping, first, nr, pages);
+       ASSERTCMP(n, ==, nr);
+
+       msg->msg_flags |= MSG_MORE;
+       for (i = 0; i < nr; i++) {
+               to = PAGE_SIZE;
+               if (first + i >= last) {
+                       to = call->last_to;
+                       msg->msg_flags &= ~MSG_MORE;
+               }
+               bv[i].bv_page = pages[i];
+               bv[i].bv_len = to - offset;
+               bv[i].bv_offset = offset;
+               bytes += to - offset;
+               offset = 0;
+       }
+
+       iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+}
+
  /*
   * attach the data from a bunch of pages on an inode to a call
   */
  static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
  {
-       struct page *pages[8];
-       unsigned count, n, loop, offset, to;
+       struct bio_vec bv[AFS_BVEC_MAX];
+       unsigned int bytes, nr, loop, offset;
         pgoff_t first = call->first, last = call->last;
         int ret;
  
-       _enter("");
-
         offset = call->first_offset;
         call->first_offset = 0;
  
         do {
-               _debug("attach %lx-%lx", first, last);
-
-               count = last - first + 1;
-               if (count > ARRAY_SIZE(pages))
-                       count = ARRAY_SIZE(pages);
-               n = find_get_pages_contig(call->mapping, first, count, pages);
-               ASSERTCMP(n, ==, count);
-
-               loop = 0;
-               do {
-                       struct bio_vec bvec = {.bv_page = pages[loop],
-                                              .bv_offset = offset};
-                       msg->msg_flags = 0;
-                       to = PAGE_SIZE;
-                       if (first + loop >= last)
-                               to = call->last_to;
-                       else
-                               msg->msg_flags = MSG_MORE;
-                       bvec.bv_len = to - offset;
-                       offset = 0;
-
-                       _debug("- range %u-%u%s",
-                              offset, to, msg->msg_flags ? " [more]" : "");
-                       iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC,
-                                     &bvec, 1, to - offset);
-
-                       /* have to change the state *before* sending the last
-                        * packet as RxRPC might give us the reply before it
-                        * returns from sending the request */
-                       if (first + loop >= last)
-                               call->state = AFS_CALL_AWAIT_REPLY;
-                       ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
-                                                    msg, to - offset);
-                       if (ret < 0)
-                               break;
-               } while (++loop < count);
-               first += count;
-
-               for (loop = 0; loop < count; loop++)
-                       put_page(pages[loop]);
+               afs_load_bvec(call, msg, bv, first, last, offset);
+               offset = 0;
+               bytes = msg->msg_iter.count;
+               nr = msg->msg_iter.nr_segs;
+
+               /* Have to change the state *before* sending the last
+                * packet as RxRPC might give us the reply before it
+                * returns from sending the request.
+                */
+               if (first + nr - 1 >= last)
+                       call->state = AFS_CALL_AWAIT_REPLY;
+               ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
+                                            msg, bytes);
+               for (loop = 0; loop < nr; loop++)
+                       put_page(bv[loop].bv_page);
                 if (ret < 0)
                         break;
+
+               first += nr;
         } while (first <= last);
  
-       _leave(" = %d", ret);
         return ret;
  }
  
@@ -333,6 +340,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
         struct rxrpc_call *rxcall;
         struct msghdr msg;
         struct kvec iov[1];
+       size_t offset;
+       u32 abort_code;
         int ret;
  
         _enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -381,9 +390,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
         msg.msg_controllen      = 0;
         msg.msg_flags           = (call->send_pages ? MSG_MORE : 0);
  
-       /* have to change the state *before* sending the last packet as RxRPC
-        * might give us the reply before it returns from sending the
-        * request */
+       /* We have to change the state *before* sending the last packet as
+        * rxrpc might give us the reply before it returns from sending the
+        * request.  Further, if the send fails, we may already have been given
+        * a notification and may have collected it.
+        */
         if (!call->send_pages)
                 call->state = AFS_CALL_AWAIT_REPLY;
         ret = rxrpc_kernel_send_data(afs_socket, rxcall,
@@ -405,7 +416,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
         return afs_wait_for_call_to_complete(call);
  
  error_do_abort:
-       rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
+       call->state = AFS_CALL_COMPLETE;
+       if (ret != -ECONNABORTED) {
+               rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT,
+                                       -ret, "KSD");
+       } else {
+               abort_code = 0;
+               offset = 0;
+               rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
+                                      false, &abort_code);
+               ret = call->type->abort_to_error(abort_code);
+       }
  error_kill_call:
         afs_put_call(call);
         _leave(" = %d", ret);
@@ -452,16 +473,18 @@ static void afs_deliver_to_call(struct afs_call *call)
                 case -EINPROGRESS:
                 case -EAGAIN:
                         goto out;
+               case -ECONNABORTED:
+                       goto call_complete;
                 case -ENOTCONN:
                         abort_code = RX_CALL_DEAD;
                         rxrpc_kernel_abort_call(afs_socket, call->rxcall,
                                                 abort_code, -ret, "KNC");
-                       goto do_abort;
+                       goto save_error;
                 case -ENOTSUPP:
-                       abort_code = RX_INVALID_OPERATION;
+                       abort_code = RXGEN_OPCODE;
                         rxrpc_kernel_abort_call(afs_socket, call->rxcall,
                                                 abort_code, -ret, "KIV");
-                       goto do_abort;
+                       goto save_error;
                 case -ENODATA:
                 case -EBADMSG:
                 case -EMSGSIZE:
@@ -471,7 +494,7 @@ static void afs_deliver_to_call(struct afs_call *call)
                                 abort_code = RXGEN_SS_UNMARSHAL;
                         rxrpc_kernel_abort_call(afs_socket, call->rxcall,
                                                 abort_code, EBADMSG, "KUM");
-                       goto do_abort;
+                       goto save_error;
                 }
         }
  
@@ -482,8 +505,9 @@ out:
         _leave("");
         return;
  
-do_abort:
+save_error:
         call->error = ret;
+call_complete:
         call->state = AFS_CALL_COMPLETE;
         goto done;
  }
@@ -493,7 +517,6 @@ do_abort:
   */
  static int afs_wait_for_call_to_complete(struct afs_call *call)
  {
-       const char *abort_why;
         int ret;
  
         DECLARE_WAITQUEUE(myself, current);
@@ -512,13 +535,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
                         continue;
                 }
  
-               abort_why = "KWC";
-               ret = call->error;
-               if (call->state == AFS_CALL_COMPLETE)
-                       break;
-               abort_why = "KWI";
-               ret = -EINTR;
-               if (signal_pending(current))
+               if (call->state == AFS_CALL_COMPLETE ||
+                   signal_pending(current))
                         break;
                 schedule();
         }
@@ -526,13 +544,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
         remove_wait_queue(&call->waitq, &myself);
         __set_current_state(TASK_RUNNING);
  
-       /* kill the call */
+       /* Kill off the call if it's still live. */
         if (call->state < AFS_CALL_COMPLETE) {
-               _debug("call incomplete");
+               _debug("call interrupted");
                 rxrpc_kernel_abort_call(afs_socket, call->rxcall,
-                                       RX_CALL_DEAD, -ret, abort_why);
+                                       RX_USER_ABORT, -EINTR, "KWI");
         }
  
+       ret = call->error;
         _debug("call complete");
         afs_put_call(call);
         _leave(" = %d", ret);
diff --git a/fs/afs/security.c b/fs/afs/security.c

index 8d010422dc8962b72fb3af64f75fdedb8e892cc0..ecb86a6701801cb74745bc99b74f9d8a367a2792 100644 (file)
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -114,7 +114,7 @@ void afs_clear_permits(struct afs_vnode *vnode)
  
         mutex_lock(&vnode->permits_lock);
         permits = vnode->permits;
-       rcu_assign_pointer(vnode->permits, NULL);
+       RCU_INIT_POINTER(vnode->permits, NULL);
         mutex_unlock(&vnode->permits_lock);
  
         if (permits)
@@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask)
         } else {
                 if (!(access & AFS_ACE_LOOKUP))
                         goto permission_denied;
+               if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR))
+                       goto permission_denied;
                 if (mask & (MAY_EXEC | MAY_READ)) {
                         if (!(access & AFS_ACE_READ))
                                 goto permission_denied;
+                       if (!(inode->i_mode & S_IRUSR))
+                               goto permission_denied;
                 } else if (mask & MAY_WRITE) {
                         if (!(access & AFS_ACE_WRITE))
                                 goto permission_denied;
+                       if (!(inode->i_mode & S_IWUSR))
+                               goto permission_denied;
                 }
         }
  
         key_put(key);
-       ret = generic_permission(inode, mask);
         _leave(" = %d", ret);
         return ret;
  
diff --git a/fs/afs/server.c b/fs/afs/server.c

index d4066ab7dd5505b364a6506a1a2d932274bb5d9d..c001b1f2455fbf6dee4c9635c95590ada3890483 100644 (file)
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server)
         spin_lock(&afs_server_graveyard_lock);
         if (atomic_read(&server->usage) == 0) {
                 list_move_tail(&server->grave, &afs_server_graveyard);
-               server->time_of_death = get_seconds();
+               server->time_of_death = ktime_get_real_seconds();
                 queue_delayed_work(afs_wq, &afs_server_reaper,
                                    afs_server_timeout * HZ);
         }
@@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work)
         LIST_HEAD(corpses);
         struct afs_server *server;
         unsigned long delay, expiry;
-       time_t now;
+       time64_t now;
  
-       now = get_seconds();
+       now = ktime_get_real_seconds();
         spin_lock(&afs_server_graveyard_lock);
  
         while (!list_empty(&afs_server_graveyard)) {
diff --git a/fs/afs/super.c b/fs/afs/super.c

index fbdb022b75a27be11b5699203f8c04e32c499cf6..c79633e5cfd80ce6754650dfb1a26890cf035205 100644 (file)
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -319,7 +319,10 @@ static int afs_fill_super(struct super_block *sb,
         sb->s_blocksize_bits    = PAGE_SHIFT;
         sb->s_magic             = AFS_FS_MAGIC;
         sb->s_op                = &afs_super_ops;
-       sb->s_bdi               = &as->volume->bdi;
+       ret = super_setup_bdi(sb);
+       if (ret)
+               return ret;
+       sb->s_bdi->ra_pages     = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
         strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id));
  
         /* allocate the root inode and dentry */
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c

index d7d8dd8c0b3187e6fe7eaed8e6300cb06826ff81..37b7c3b342a6b5a1f2f0cd06c0538e8e1d7f9073 100644 (file)
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
         struct afs_vlocation *xvl;
  
         /* wait at least 10 minutes before updating... */
-       vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+       vl->update_at = ktime_get_real_seconds() +
+                       afs_vlocation_update_timeout;
  
         spin_lock(&afs_vlocation_updates_lock);
  
@@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl)
         if (atomic_read(&vl->usage) == 0) {
                 _debug("buried");
                 list_move_tail(&vl->grave, &afs_vlocation_graveyard);
-               vl->time_of_death = get_seconds();
+               vl->time_of_death = ktime_get_real_seconds();
                 queue_delayed_work(afs_wq, &afs_vlocation_reap,
                                    afs_vlocation_timeout * HZ);
  
@@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work)
         LIST_HEAD(corpses);
         struct afs_vlocation *vl;
         unsigned long delay, expiry;
-       time_t now;
+       time64_t now;
  
         _enter("");
  
-       now = get_seconds();
+       now = ktime_get_real_seconds();
         spin_lock(&afs_vlocation_graveyard_lock);
  
         while (!list_empty(&afs_vlocation_graveyard)) {
@@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work)
  {
         struct afs_cache_vlocation vldb;
         struct afs_vlocation *vl, *xvl;
-       time_t now;
+       time64_t now;
         long timeout;
         int ret;
  
         _enter("");
  
-       now = get_seconds();
+       now = ktime_get_real_seconds();
  
         /* find a record to update */
         spin_lock(&afs_vlocation_updates_lock);
@@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work)
  
         /* and then reschedule */
         _debug("reschedule");
-       vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+       vl->update_at = ktime_get_real_seconds() +
+                       afs_vlocation_update_timeout;
  
         spin_lock(&afs_vlocation_updates_lock);
  
diff --git a/fs/afs/volume.c b/fs/afs/volume.c

index 546f9d01710b5b66706331968edc557306ed848e..db73d6dad02b5f8f549cca6ccacdabaa9d458f63 100644 (file)
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -106,11 +106,6 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
         volume->cell            = params->cell;
         volume->vid             = vlocation->vldb.vid[params->type];
  
-       volume->bdi.ra_pages    = VM_MAX_READAHEAD*1024/PAGE_SIZE; 
-       ret = bdi_setup_and_register(&volume->bdi, "afs");
-       if (ret)
-               goto error_bdi;
-
         init_rwsem(&volume->server_sem);
  
         /* look up all the applicable server records */
@@ -156,8 +151,6 @@ error:
         return ERR_PTR(ret);
  
  error_discard:
-       bdi_destroy(&volume->bdi);
-error_bdi:
         up_write(&params->cell->vl_sem);
  
         for (loop = volume->nservers - 1; loop >= 0; loop--)
@@ -207,7 +200,6 @@ void afs_put_volume(struct afs_volume *volume)
         for (loop = volume->nservers - 1; loop >= 0; loop--)
                 afs_put_server(volume->servers[loop]);
  
-       bdi_destroy(&volume->bdi);
         kfree(volume);
  
         _leave(" [destroyed]");
diff --git a/fs/afs/write.c b/fs/afs/write.c

index c83c1a0e851fb34051c026bcea8e2a561299cf95..2d2fccd5044bcd9b02127246824c1221ec502484 100644 (file)
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -84,10 +84,9 @@ void afs_put_writeback(struct afs_writeback *wb)
   * partly or wholly fill a page that's under preparation for writing
   */
  static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
-                        loff_t pos, struct page *page)
+                        loff_t pos, unsigned int len, struct page *page)
  {
         struct afs_read *req;
-       loff_t i_size;
         int ret;
  
         _enter(",,%llu", (unsigned long long)pos);
@@ -99,14 +98,10 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
  
         atomic_set(&req->usage, 1);
         req->pos = pos;
+       req->len = len;
         req->nr_pages = 1;
         req->pages[0] = page;
-
-       i_size = i_size_read(&vnode->vfs_inode);
-       if (pos + PAGE_SIZE > i_size)
-               req->len = i_size - pos;
-       else
-               req->len = PAGE_SIZE;
+       get_page(page);
  
         ret = afs_vnode_fetch_data(vnode, key, req);
         afs_put_read(req);
@@ -159,12 +154,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
                 kfree(candidate);
                 return -ENOMEM;
         }
-       *pagep = page;
-       /* page won't leak in error case: it eventually gets cleaned off LRU */
  
         if (!PageUptodate(page) && len != PAGE_SIZE) {
-               ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
+               ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
                 if (ret < 0) {
+                       unlock_page(page);
+                       put_page(page);
                         kfree(candidate);
                         _leave(" = %d [prep]", ret);
                         return ret;
@@ -172,6 +167,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
                 SetPageUptodate(page);
         }
  
+       /* page won't leak in error case: it eventually gets cleaned off LRU */
+       *pagep = page;
+
  try_again:
         spin_lock(&vnode->writeback_lock);
  
@@ -233,7 +231,7 @@ flush_conflicting_wb:
         if (wb->state == AFS_WBACK_PENDING)
                 wb->state = AFS_WBACK_CONFLICTING;
         spin_unlock(&vnode->writeback_lock);
-       if (PageDirty(page)) {
+       if (clear_page_dirty_for_io(page)) {
                 ret = afs_write_back_from_locked_page(wb, page);
                 if (ret < 0) {
                         afs_put_writeback(candidate);
@@ -257,7 +255,9 @@ int afs_write_end(struct file *file, struct address_space *mapping,
                   struct page *page, void *fsdata)
  {
         struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+       struct key *key = file->private_data;
         loff_t i_size, maybe_i_size;
+       int ret;
  
         _enter("{%x:%u},{%lx}",
                vnode->fid.vid, vnode->fid.vnode, page->index);
@@ -273,6 +273,20 @@ int afs_write_end(struct file *file, struct address_space *mapping,
                 spin_unlock(&vnode->writeback_lock);
         }
  
+       if (!PageUptodate(page)) {
+               if (copied < len) {
+                       /* Try and load any missing data from the server.  The
+                        * unmarshalling routine will take care of clearing any
+                        * bits that are beyond the EOF.
+                        */
+                       ret = afs_fill_page(vnode, key, pos + copied,
+                                           len - copied, page);
+                       if (ret < 0)
+                               return ret;
+               }
+               SetPageUptodate(page);
+       }
+
         set_page_dirty(page);
         if (PageDirty(page))
                 _debug("dirtied");
@@ -307,10 +321,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
                 ASSERTCMP(pv.nr, ==, count);
  
                 for (loop = 0; loop < count; loop++) {
-                       ClearPageUptodate(pv.pages[loop]);
+                       struct page *page = pv.pages[loop];
+                       ClearPageUptodate(page);
                         if (error)
-                               SetPageError(pv.pages[loop]);
-                       end_page_writeback(pv.pages[loop]);
+                               SetPageError(page);
+                       if (PageWriteback(page))
+                               end_page_writeback(page);
+                       if (page->index >= first)
+                               first = page->index + 1;
                 }
  
                 __pagevec_release(&pv);
@@ -335,8 +353,6 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
         _enter(",%lx", primary_page->index);
  
         count = 1;
-       if (!clear_page_dirty_for_io(primary_page))
-               BUG();
         if (test_set_page_writeback(primary_page))
                 BUG();
  
@@ -502,17 +518,17 @@ static int afs_writepages_region(struct address_space *mapping,
                  */
                 lock_page(page);
  
-               if (page->mapping != mapping) {
+               if (page->mapping != mapping || !PageDirty(page)) {
                         unlock_page(page);
                         put_page(page);
                         continue;
                 }
  
-               if (wbc->sync_mode != WB_SYNC_NONE)
-                       wait_on_page_writeback(page);
-
-               if (PageWriteback(page) || !PageDirty(page)) {
+               if (PageWriteback(page)) {
                         unlock_page(page);
+                       if (wbc->sync_mode != WB_SYNC_NONE)
+                               wait_on_page_writeback(page);
+                       put_page(page);
                         continue;
                 }
  
@@ -523,6 +539,8 @@ static int afs_writepages_region(struct address_space *mapping,
                 wb->state = AFS_WBACK_WRITING;
                 spin_unlock(&wb->vnode->writeback_lock);
  
+               if (!clear_page_dirty_for_io(page))
+                       BUG();
                 ret = afs_write_back_from_locked_page(wb, page);
                 unlock_page(page);
                 put_page(page);
@@ -745,6 +763,20 @@ out:
         return ret;
  }
  
+/*
+ * Flush out all outstanding writes on a file opened for writing when it is
+ * closed.
+ */
+int afs_flush(struct file *file, fl_owner_t id)
+{
+       _enter("");
+
+       if ((file->f_mode & FMODE_WRITE) == 0)
+               return 0;
+
+       return vfs_fsync(file, 0);
+}
+
  /*
   * notification that a previously read-only page is about to become writable
   * - if it returns an error, the caller will deliver a bus error signal
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 2eca00ec43706bb78955cd221a24593942625d57..9ccabe3bb7de1635ba3cac78751db68bb5ead93e 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -885,6 +885,8 @@ static void bdev_evict_inode(struct inode *inode)
         spin_lock(&bdev_lock);
         list_del_init(&bdev->bd_list);
         spin_unlock(&bdev_lock);
+       /* Detach inode from wb early as bdi_put() may free bdi->wb */
+       inode_detach_wb(inode);
         if (bdev->bd_bdi != &noop_backing_dev_info) {
                 bdi_put(bdev->bd_bdi);
                 bdev->bd_bdi = &noop_backing_dev_info;
@@ -1451,7 +1453,6 @@ int revalidate_disk(struct gendisk *disk)
  
         if (disk->fops->revalidate_disk)
                 ret = disk->fops->revalidate_disk(disk);
-       blk_integrity_revalidate(disk);
         bdev = bdget_disk(disk, 0);
         if (!bdev)
                 return ret;
@@ -1556,8 +1557,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                 bdev->bd_disk = disk;
                 bdev->bd_queue = disk->queue;
                 bdev->bd_contains = bdev;
-               if (bdev->bd_bdi == &noop_backing_dev_info)
-                       bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
  
                 if (!partno) {
                         ret = -ENXIO;
@@ -1622,6 +1621,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                         }
                         bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
                 }
+
+               if (bdev->bd_bdi == &noop_backing_dev_info)
+                       bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
         } else {
                 if (bdev->bd_contains == bdev) {
                         ret = 0;
@@ -1653,8 +1655,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
         bdev->bd_disk = NULL;
         bdev->bd_part = NULL;
         bdev->bd_queue = NULL;
-       bdi_put(bdev->bd_bdi);
-       bdev->bd_bdi = &noop_backing_dev_info;
         if (bdev != bdev->bd_contains)
                 __blkdev_put(bdev->bd_contains, mode, 1);
         bdev->bd_contains = NULL;
@@ -1876,12 +1876,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                 kill_bdev(bdev);
  
                 bdev_write_inode(bdev);
-               /*
-                * Detaching bdev inode from its wb in __destroy_inode()
-                * is too late: the queue which embeds its bdi (along with
-                * root wb) can be gone as soon as we put_disk() below.
-                */
-               inode_detach_wb(bdev->bd_inode);
         }
         if (bdev->bd_contains == bdev) {
                 if (disk->fops->release)
@@ -2074,7 +2068,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
                              loff_t len)
  {
         struct block_device *bdev = I_BDEV(bdev_file_inode(file));
-       struct request_queue *q = bdev_get_queue(bdev);
         struct address_space *mapping;
         loff_t end = start + len - 1;
         loff_t isize;
@@ -2110,18 +2103,13 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
         case FALLOC_FL_ZERO_RANGE:
         case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
                 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
-                                           GFP_KERNEL, false);
+                                           GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
                 break;
         case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
-               /* Only punch if the device can do zeroing discard. */
-               if (!blk_queue_discard(q) || !q->limits.discard_zeroes_data)
-                       return -EOPNOTSUPP;
-               error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
-                                            GFP_KERNEL, 0);
+               error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
+                                            GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
                 break;
         case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
-               if (!blk_queue_discard(q))
-                       return -EOPNOTSUPP;
                 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
                                              GFP_KERNEL, 0);
                 break;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index 29b7fc28c607232987cc3b28fbe9a92e0f766df7..3e21211e99c39571968f79c95d88eb3eaba6f262 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -810,7 +810,6 @@ struct btrfs_fs_info {
         struct btrfs_super_block *super_for_commit;
         struct super_block *sb;
         struct inode *btree_inode;
-       struct backing_dev_info bdi;
         struct mutex tree_log_mutex;
         struct mutex transaction_kthread_mutex;
         struct mutex cleaner_mutex;
@@ -1259,7 +1258,7 @@ struct btrfs_root {
         atomic_t will_be_snapshoted;
  
         /* For qgroup metadata space reserve */
-       atomic_t qgroup_meta_rsv;
+       atomic64_t qgroup_meta_rsv;
  };
  static inline u32 btrfs_inode_sectorsize(const struct inode *inode)
  {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 08b74daf35d05f70dac01adbac73d10925f50879..061c1d1f774f289d854ea6114a74aaf9c2a67b84 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1342,7 +1342,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
         atomic_set(&root->orphan_inodes, 0);
         atomic_set(&root->refs, 1);
         atomic_set(&root->will_be_snapshoted, 0);
-       atomic_set(&root->qgroup_meta_rsv, 0);
+       atomic64_set(&root->qgroup_meta_rsv, 0);
         root->log_transid = 0;
         root->log_transid_committed = -1;
         root->last_log_commit = 0;
@@ -1808,21 +1808,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
         return ret;
  }
  
-static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
-{
-       int err;
-
-       err = bdi_setup_and_register(bdi, "btrfs");
-       if (err)
-               return err;
-
-       bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
-       bdi->congested_fn       = btrfs_congested_fn;
-       bdi->congested_data     = info;
-       bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
-       return 0;
-}
-
  /*
   * called by the kthread helper functions to finally call the bio end_io
   * functions.  This is where read checksum verification actually happens
@@ -2601,16 +2586,10 @@ int open_ctree(struct super_block *sb,
                 goto fail;
         }
  
-       ret = setup_bdi(fs_info, &fs_info->bdi);
-       if (ret) {
-               err = ret;
-               goto fail_srcu;
-       }
-
         ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
         if (ret) {
                 err = ret;
-               goto fail_bdi;
+               goto fail_srcu;
         }
         fs_info->dirty_metadata_batch = PAGE_SIZE *
                                         (1 + ilog2(nr_cpu_ids));
@@ -2718,7 +2697,6 @@ int open_ctree(struct super_block *sb,
  
         sb->s_blocksize = 4096;
         sb->s_blocksize_bits = blksize_bits(4096);
-       sb->s_bdi = &fs_info->bdi;
  
         btrfs_init_btree_inode(fs_info);
  
@@ -2915,9 +2893,12 @@ int open_ctree(struct super_block *sb,
                 goto fail_sb_buffer;
         }
  
-       fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
-       fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
-                                   SZ_4M / PAGE_SIZE);
+       sb->s_bdi->congested_fn = btrfs_congested_fn;
+       sb->s_bdi->congested_data = fs_info;
+       sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
+       sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
+       sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
+       sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
  
         sb->s_blocksize = sectorsize;
         sb->s_blocksize_bits = blksize_bits(sectorsize);
@@ -3285,8 +3266,6 @@ fail_delalloc_bytes:
         percpu_counter_destroy(&fs_info->delalloc_bytes);
  fail_dirty_metadata_bytes:
         percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
-fail_bdi:
-       bdi_destroy(&fs_info->bdi);
  fail_srcu:
         cleanup_srcu_struct(&fs_info->subvol_srcu);
  fail:
@@ -4007,7 +3986,6 @@ void close_ctree(struct btrfs_fs_info *fs_info)
         percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
         percpu_counter_destroy(&fs_info->delalloc_bytes);
         percpu_counter_destroy(&fs_info->bio_counter);
-       bdi_destroy(&fs_info->bdi);
         cleanup_srcu_struct(&fs_info->subvol_srcu);
  
         btrfs_free_stripe_hash_table(fs_info);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index 28e81922a21c1ecead950f50cf3e685ad03c57f6..27fdb250b4467f65a8c6a42d06835f3bb3a36aec 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1714,7 +1714,8 @@ static int __process_pages_contig(struct address_space *mapping,
                          * can we find nothing at @index.
                          */
                         ASSERT(page_ops & PAGE_LOCK);
-                       return ret;
+                       err = -EAGAIN;
+                       goto out;
                 }
  
                 for (i = 0; i < ret; i++) {
@@ -2583,26 +2584,36 @@ static void end_bio_extent_readpage(struct bio *bio)
  
                 if (tree->ops) {
                         ret = tree->ops->readpage_io_failed_hook(page, mirror);
-                       if (!ret && !bio->bi_error)
-                               uptodate = 1;
-               } else {
+                       if (ret == -EAGAIN) {
+                               /*
+                                * Data inode's readpage_io_failed_hook() always
+                                * returns -EAGAIN.
+                                *
+                                * The generic bio_readpage_error handles errors
+                                * the following way: If possible, new read
+                                * requests are created and submitted and will
+                                * end up in end_bio_extent_readpage as well (if
+                                * we're lucky, not in the !uptodate case). In
+                                * that case it returns 0 and we just go on with
+                                * the next page in our bio. If it can't handle
+                                * the error it will return -EIO and we remain
+                                * responsible for that page.
+                                */
+                               ret = bio_readpage_error(bio, offset, page,
+                                                        start, end, mirror);
+                               if (ret == 0) {
+                                       uptodate = !bio->bi_error;
+                                       offset += len;
+                                       continue;
+                               }
+                       }
+
                         /*
-                        * The generic bio_readpage_error handles errors the
-                        * following way: If possible, new read requests are
-                        * created and submitted and will end up in
-                        * end_bio_extent_readpage as well (if we're lucky, not
-                        * in the !uptodate case). In that case it returns 0 and
-                        * we just go on with the next page in our bio. If it
-                        * can't handle the error it will return -EIO and we
-                        * remain responsible for that page.
+                        * metadata's readpage_io_failed_hook() always returns
+                        * -EIO and fixes nothing.  -EIO is also returned if
+                        * data inode error could not be fixed.
                          */
-                       ret = bio_readpage_error(bio, offset, page, start, end,
-                                                mirror);
-                       if (ret == 0) {
-                               uptodate = !bio->bi_error;
-                               offset += len;
-                               continue;
-                       }
+                       ASSERT(ret == -EIO);
                 }
  readpage_ok:
                 if (likely(uptodate)) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index c40060cc481f60440044d00ea4a76904cc4d9761..5e71f1ea3391b034dc8e6f55f62d82dbe76e9811 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6709,6 +6709,20 @@ static noinline int uncompress_inline(struct btrfs_path *path,
         max_size = min_t(unsigned long, PAGE_SIZE, max_size);
         ret = btrfs_decompress(compress_type, tmp, page,
                                extent_offset, inline_size, max_size);
+
+       /*
+        * decompression code contains a memset to fill in any space between the end
+        * of the uncompressed data and the end of max_size in case the decompressed
+        * data ends up shorter than ram_bytes.  That doesn't cover the hole between
+        * the end of an inline extent and the beginning of the next block, so we
+        * cover that region here.
+        */
+
+       if (max_size + pg_offset < PAGE_SIZE) {
+               char *map = kmap(page);
+               memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
+               kunmap(page);
+       }
         kfree(tmp);
         return ret;
  }
@@ -7896,7 +7910,6 @@ struct btrfs_retry_complete {
  static void btrfs_retry_endio_nocsum(struct bio *bio)
  {
         struct btrfs_retry_complete *done = bio->bi_private;
-       struct inode *inode;
         struct bio_vec *bvec;
         int i;
  
@@ -7904,12 +7917,12 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
                 goto end;
  
         ASSERT(bio->bi_vcnt == 1);
-       inode = bio->bi_io_vec->bv_page->mapping->host;
-       ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(inode));
+       ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode));
  
         done->uptodate = 1;
         bio_for_each_segment_all(bvec, bio, i)
-       clean_io_failure(BTRFS_I(done->inode), done->start, bvec->bv_page, 0);
+               clean_io_failure(BTRFS_I(done->inode), done->start,
+                                bvec->bv_page, 0);
  end:
         complete(&done->done);
         bio_put(bio);
@@ -7959,8 +7972,10 @@ next_block_or_try_again:
  
                 start += sectorsize;
  
-               if (nr_sectors--) {
+               nr_sectors--;
+               if (nr_sectors) {
                         pgoff += sectorsize;
+                       ASSERT(pgoff < PAGE_SIZE);
                         goto next_block_or_try_again;
                 }
         }
@@ -7972,9 +7987,7 @@ static void btrfs_retry_endio(struct bio *bio)
  {
         struct btrfs_retry_complete *done = bio->bi_private;
         struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-       struct inode *inode;
         struct bio_vec *bvec;
-       u64 start;
         int uptodate;
         int ret;
         int i;
@@ -7984,11 +7997,8 @@ static void btrfs_retry_endio(struct bio *bio)
  
         uptodate = 1;
  
-       start = done->start;
-
         ASSERT(bio->bi_vcnt == 1);
-       inode = bio->bi_io_vec->bv_page->mapping->host;
-       ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(inode));
+       ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode));
  
         bio_for_each_segment_all(bvec, bio, i) {
                 ret = __readpage_endio_check(done->inode, io_bio, i,
@@ -8066,8 +8076,10 @@ next:
  
                 ASSERT(nr_sectors);
  
-               if (--nr_sectors) {
+               nr_sectors--;
+               if (nr_sectors) {
                         pgoff += sectorsize;
+                       ASSERT(pgoff < PAGE_SIZE);
                         goto next_block;
                 }
         }
@@ -10509,9 +10521,9 @@ out_inode:
  }
  
  __attribute__((const))
-static int dummy_readpage_io_failed_hook(struct page *page, int failed_mirror)
+static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror)
  {
-       return 0;
+       return -EAGAIN;
  }
  
  static const struct inode_operations btrfs_dir_inode_operations = {
@@ -10556,7 +10568,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
         .submit_bio_hook = btrfs_submit_bio_hook,
         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
         .merge_bio_hook = btrfs_merge_bio_hook,
-       .readpage_io_failed_hook = dummy_readpage_io_failed_hook,
+       .readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
  
         /* optional callbacks */
         .fill_delalloc = run_delalloc_range,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c

index a5da750c1087fdc118e3ba696962260a9a761fc6..afbea61d957e893db09effb75ec47c7d670e24e3 100644 (file)
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1042,9 +1042,12 @@ static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
                                       struct btrfs_qgroup *qgroup,
                                       u64 num_bytes)
  {
-       btrfs_warn(fs_info,
+#ifdef CONFIG_BTRFS_DEBUG
+       WARN_ON(qgroup->reserved < num_bytes);
+       btrfs_debug(fs_info,
                 "qgroup %llu reserved space underflow, have: %llu, to free: %llu",
                 qgroup->qgroupid, qgroup->reserved, num_bytes);
+#endif
         qgroup->reserved = 0;
  }
  /*
@@ -1075,7 +1078,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
         qgroup->excl += sign * num_bytes;
         qgroup->excl_cmpr += sign * num_bytes;
         if (sign > 0) {
-               if (WARN_ON(qgroup->reserved < num_bytes))
+               if (qgroup->reserved < num_bytes)
                         report_reserved_underflow(fs_info, qgroup, num_bytes);
                 else
                         qgroup->reserved -= num_bytes;
@@ -1100,7 +1103,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
                 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
                 qgroup->excl += sign * num_bytes;
                 if (sign > 0) {
-                       if (WARN_ON(qgroup->reserved < num_bytes))
+                       if (qgroup->reserved < num_bytes)
                                 report_reserved_underflow(fs_info, qgroup,
                                                           num_bytes);
                         else
@@ -2469,7 +2472,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
  
                 qg = unode_aux_to_qgroup(unode);
  
-               if (WARN_ON(qg->reserved < num_bytes))
+               if (qg->reserved < num_bytes)
                         report_reserved_underflow(fs_info, qg, num_bytes);
                 else
                         qg->reserved -= num_bytes;
@@ -2948,20 +2951,20 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
         ret = qgroup_reserve(root, num_bytes, enforce);
         if (ret < 0)
                 return ret;
-       atomic_add(num_bytes, &root->qgroup_meta_rsv);
+       atomic64_add(num_bytes, &root->qgroup_meta_rsv);
         return ret;
  }
  
  void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
  {
         struct btrfs_fs_info *fs_info = root->fs_info;
-       int reserved;
+       u64 reserved;
  
         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
             !is_fstree(root->objectid))
                 return;
  
-       reserved = atomic_xchg(&root->qgroup_meta_rsv, 0);
+       reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
         if (reserved == 0)
                 return;
         btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
@@ -2976,8 +2979,8 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
                 return;
  
         BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
-       WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes);
-       atomic_sub(num_bytes, &root->qgroup_meta_rsv);
+       WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
+       atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
         btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
  }
  
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c

index 456c8901489b6c6b468901854bcdcbc53cb5cf13..a60d5bfb8a49e2bfc3faef10f4ea353a9da5f8b8 100644 (file)
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6305,8 +6305,13 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
                 goto out;
         }
  
+       /*
+        * Check that we don't overflow at later allocations, we request
+        * clone_sources_count + 1 items, and compare to unsigned long inside
+        * access_ok.
+        */
         if (arg->clone_sources_count >
-           ULLONG_MAX / sizeof(*arg->clone_sources)) {
+           ULONG_MAX / sizeof(struct clone_root) - 1) {
                 ret = -EINVAL;
                 goto out;
         }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c

index da687dc79cce6155a278038a15775637c97ce3cc..72a053c9a7f097bfc9086ed6e0035e62dfd76bb1 100644 (file)
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -549,16 +549,19 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
                 case Opt_ssd:
                         btrfs_set_and_info(info, SSD,
                                            "use ssd allocation scheme");
+                       btrfs_clear_opt(info->mount_opt, NOSSD);
                         break;
                 case Opt_ssd_spread:
                         btrfs_set_and_info(info, SSD_SPREAD,
                                            "use spread ssd allocation scheme");
                         btrfs_set_opt(info->mount_opt, SSD);
+                       btrfs_clear_opt(info->mount_opt, NOSSD);
                         break;
                 case Opt_nossd:
                         btrfs_set_and_info(info, NOSSD,
                                              "not using ssd allocation scheme");
                         btrfs_clear_opt(info->mount_opt, SSD);
+                       btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
                         break;
                 case Opt_barrier:
                         btrfs_clear_and_info(info, NOBARRIER,
@@ -1133,6 +1136,13 @@ static int btrfs_fill_super(struct super_block *sb,
  #endif
         sb->s_flags |= MS_I_VERSION;
         sb->s_iflags |= SB_I_CGROUPWB;
+
+       err = super_setup_bdi(sb);
+       if (err) {
+               btrfs_err(fs_info, "super_setup_bdi failed");
+               return err;
+       }
+
         err = open_ctree(sb, fs_devices, (char *)data);
         if (err) {
                 btrfs_err(fs_info, "open_ctree failed");
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 73d56eef5e60f311225b06ad7adccedeed54a0db..ab8a66d852f91cb04206361a551b8c57760c9c40 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6213,7 +6213,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
         for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
                 dev = bbio->stripes[dev_nr].dev;
                 if (!dev || !dev->bdev ||
-                   (bio_op(bio) == REQ_OP_WRITE && !dev->writeable)) {
+                   (bio_op(first_bio) == REQ_OP_WRITE && !dev->writeable)) {
                         bbio_error(bbio, first_bio, logical);
                         continue;
                 }
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c

index 1a3e1b40799a086037fe529e1b0ff02e302adb43..9ecb2fd348cb3c01727a903c8ae151582179991e 100644 (file)
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -578,7 +578,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
         writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
         if (writeback_stat >
             CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
-               set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
+               set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
  
         set_page_writeback(page);
         err = ceph_osdc_writepages(osdc, ceph_vino(inode),
@@ -700,7 +700,7 @@ static void writepages_finish(struct ceph_osd_request *req)
                         if (atomic_long_dec_return(&fsc->writeback_count) <
                              CONGESTION_OFF_THRESH(
                                         fsc->mount_options->congestion_kb))
-                               clear_bdi_congested(&fsc->backing_dev_info,
+                               clear_bdi_congested(inode_to_bdi(inode),
                                                     BLK_RW_ASYNC);
  
                         if (rc < 0)
@@ -979,7 +979,7 @@ get_more_pages:
                         if (atomic_long_inc_return(&fsc->writeback_count) >
                             CONGESTION_ON_THRESH(
                                     fsc->mount_options->congestion_kb)) {
-                               set_bdi_congested(&fsc->backing_dev_info,
+                               set_bdi_congested(inode_to_bdi(inode),
                                                   BLK_RW_ASYNC);
                         }
  
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c

index f2ae393e2c31a2b3dbca7a5f81eeb5b81afa5e1b..3ef11bc8d728d6129818428ba192830186e23804 100644 (file)
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -251,7 +251,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
                 goto out;
  
         snprintf(name, sizeof(name), "../../bdi/%s",
-                dev_name(fsc->backing_dev_info.dev));
+                dev_name(fsc->sb->s_bdi->dev));
         fsc->debugfs_bdi =
                 debugfs_create_symlink("bdi",
                                        fsc->client->debugfs_dir,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c

index d449e1c03cbd791922148ad00c3d6d0f0e1599ce..d3119fe3ab45fdbdb534651ef68194815dcc544b 100644 (file)
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2071,11 +2071,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
         if (inode_dirty_flags)
                 __mark_inode_dirty(inode, inode_dirty_flags);
  
-       if (ia_valid & ATTR_MODE) {
-               err = posix_acl_chmod(inode, attr->ia_mode);
-               if (err)
-                       goto out_put;
-       }
  
         if (mask) {
                 req->r_inode = inode;
@@ -2088,14 +2083,12 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
         dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
              ceph_cap_string(dirtied), mask);
  
-       ceph_mdsc_put_request(req);
-       if (mask & CEPH_SETATTR_SIZE)
-               __ceph_do_pending_vmtruncate(inode);
-       ceph_free_cap_flush(prealloc_cf);
-       return err;
-out_put:
         ceph_mdsc_put_request(req);
         ceph_free_cap_flush(prealloc_cf);
+
+       if (err >= 0 && (mask & CEPH_SETATTR_SIZE))
+               __ceph_do_pending_vmtruncate(inode);
+
         return err;
  }
  
@@ -2114,7 +2107,12 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
         if (err != 0)
                 return err;
  
-       return __ceph_setattr(inode, attr);
+       err = __ceph_setattr(inode, attr);
+
+       if (err >= 0 && (attr->ia_valid & ATTR_MODE))
+               err = posix_acl_chmod(inode, attr->ia_mode);
+
+       return err;
  }
  
  /*
diff --git a/fs/ceph/super.c b/fs/ceph/super.c

index 0ec8d0114e57ba80fdc46b1acdc9b7de7373e276..a8c81b2052ca9052f67cd217785f1c77a510d788 100644 (file)
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -579,10 +579,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
  
         atomic_long_set(&fsc->writeback_count, 0);
  
-       err = bdi_init(&fsc->backing_dev_info);
-       if (err < 0)
-               goto fail_client;
-
         err = -ENOMEM;
         /*
          * The number of concurrent works can be high but they don't need
@@ -590,7 +586,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
          */
         fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
         if (fsc->wb_wq == NULL)
-               goto fail_bdi;
+               goto fail_client;
         fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
         if (fsc->pg_inv_wq == NULL)
                 goto fail_wb_wq;
@@ -624,8 +620,6 @@ fail_pg_inv_wq:
         destroy_workqueue(fsc->pg_inv_wq);
  fail_wb_wq:
         destroy_workqueue(fsc->wb_wq);
-fail_bdi:
-       bdi_destroy(&fsc->backing_dev_info);
  fail_client:
         ceph_destroy_client(fsc->client);
  fail:
@@ -643,8 +637,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
         destroy_workqueue(fsc->pg_inv_wq);
         destroy_workqueue(fsc->trunc_wq);
  
-       bdi_destroy(&fsc->backing_dev_info);
-
         mempool_destroy(fsc->wb_pagevec_pool);
  
         destroy_mount_options(fsc->mount_options);
@@ -937,33 +929,32 @@ static int ceph_compare_super(struct super_block *sb, void *data)
   */
  static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
  
-static int ceph_register_bdi(struct super_block *sb,
-                            struct ceph_fs_client *fsc)
+static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
  {
         int err;
  
+       err = super_setup_bdi_name(sb, "ceph-%ld",
+                                  atomic_long_inc_return(&bdi_seq));
+       if (err)
+               return err;
+
         /* set ra_pages based on rasize mount option? */
         if (fsc->mount_options->rasize >= PAGE_SIZE)
-               fsc->backing_dev_info.ra_pages =
+               sb->s_bdi->ra_pages =
                         (fsc->mount_options->rasize + PAGE_SIZE - 1)
                         >> PAGE_SHIFT;
         else
-               fsc->backing_dev_info.ra_pages =
-                       VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
+               sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
  
         if (fsc->mount_options->rsize > fsc->mount_options->rasize &&
             fsc->mount_options->rsize >= PAGE_SIZE)
-               fsc->backing_dev_info.io_pages =
+               sb->s_bdi->io_pages =
                         (fsc->mount_options->rsize + PAGE_SIZE - 1)
                         >> PAGE_SHIFT;
         else if (fsc->mount_options->rsize == 0)
-               fsc->backing_dev_info.io_pages = ULONG_MAX;
+               sb->s_bdi->io_pages = ULONG_MAX;
  
-       err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
-                          atomic_long_inc_return(&bdi_seq));
-       if (!err)
-               sb->s_bdi = &fsc->backing_dev_info;
-       return err;
+       return 0;
  }
  
  static struct dentry *ceph_mount(struct file_system_type *fs_type,
@@ -1018,7 +1009,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
                 dout("get_sb got existing client %p\n", fsc);
         } else {
                 dout("get_sb using new client %p\n", fsc);
-               err = ceph_register_bdi(sb, fsc);
+               err = ceph_setup_bdi(sb, fsc);
                 if (err < 0) {
                         res = ERR_PTR(err);
                         goto out_splat;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h

index fe6b9cfc4013e63c8b6f6ea3b9b3bd1d4eb89364..176186b124575225537afe7049686041d3148c26 100644 (file)
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -92,8 +92,6 @@ struct ceph_fs_client {
         struct workqueue_struct *trunc_wq;
         atomic_long_t writeback_count;
  
-       struct backing_dev_info backing_dev_info;
-
  #ifdef CONFIG_DEBUG_FS
         struct dentry *debugfs_dentry_lru, *debugfs_caps;
         struct dentry *debugfs_congestion_kb;
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h

index 07ed81cf1552e6ae97be753de0720741e3f80068..cbd216b572390ca76e481aabf9a311e4b7749d7c 100644 (file)
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -68,7 +68,6 @@ struct cifs_sb_info {
         umode_t mnt_dir_mode;
         unsigned int mnt_cifs_flags;
         char   *mountdata; /* options received at mount time or via DFS refs */
-       struct backing_dev_info bdi;
         struct delayed_work prune_tlinks;
         struct rcu_head rcu;
         char *prepath;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c

index 15e1db8738aecad0c8a86888c0fa1ada5f9b7623..34fee9fb7e4fe2e27b19e6b10f2496fb944b6b13 100644 (file)
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -138,7 +138,12 @@ cifs_read_super(struct super_block *sb)
         sb->s_magic = CIFS_MAGIC_NUMBER;
         sb->s_op = &cifs_super_ops;
         sb->s_xattr = cifs_xattr_handlers;
-       sb->s_bdi = &cifs_sb->bdi;
+       rc = super_setup_bdi(sb);
+       if (rc)
+               goto out_no_root;
+       /* tune readahead according to rsize */
+       sb->s_bdi->ra_pages = cifs_sb->rsize / PAGE_SIZE;
+
         sb->s_blocksize = CIFS_MAX_MSGSIZE;
         sb->s_blocksize_bits = 14;      /* default 2**14 = CIFS_MAX_MSGSIZE */
         inode = cifs_root_iget(sb);
@@ -972,6 +977,86 @@ out:
         return rc;
  }
  
+ssize_t cifs_file_copychunk_range(unsigned int xid,
+                               struct file *src_file, loff_t off,
+                               struct file *dst_file, loff_t destoff,
+                               size_t len, unsigned int flags)
+{
+       struct inode *src_inode = file_inode(src_file);
+       struct inode *target_inode = file_inode(dst_file);
+       struct cifsFileInfo *smb_file_src;
+       struct cifsFileInfo *smb_file_target;
+       struct cifs_tcon *src_tcon;
+       struct cifs_tcon *target_tcon;
+       ssize_t rc;
+
+       cifs_dbg(FYI, "copychunk range\n");
+
+       if (src_inode == target_inode) {
+               rc = -EINVAL;
+               goto out;
+       }
+
+       if (!src_file->private_data || !dst_file->private_data) {
+               rc = -EBADF;
+               cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
+               goto out;
+       }
+
+       rc = -EXDEV;
+       smb_file_target = dst_file->private_data;
+       smb_file_src = src_file->private_data;
+       src_tcon = tlink_tcon(smb_file_src->tlink);
+       target_tcon = tlink_tcon(smb_file_target->tlink);
+
+       if (src_tcon->ses != target_tcon->ses) {
+               cifs_dbg(VFS, "source and target of copy not on same server\n");
+               goto out;
+       }
+
+       /*
+        * Note: cifs case is easier than btrfs since server responsible for
+        * checks for proper open modes and file type and if it wants
+        * server could even support copy of range where source = target
+        */
+       lock_two_nondirectories(target_inode, src_inode);
+
+       cifs_dbg(FYI, "about to flush pages\n");
+       /* should we flush first and last page first */
+       truncate_inode_pages(&target_inode->i_data, 0);
+
+       if (target_tcon->ses->server->ops->copychunk_range)
+               rc = target_tcon->ses->server->ops->copychunk_range(xid,
+                       smb_file_src, smb_file_target, off, len, destoff);
+       else
+               rc = -EOPNOTSUPP;
+
+       /* force revalidate of size and timestamps of target file now
+        * that target is updated on the server
+        */
+       CIFS_I(target_inode)->time = 0;
+       /* although unlocking in the reverse order from locking is not
+        * strictly necessary here it is a little cleaner to be consistent
+        */
+       unlock_two_nondirectories(src_inode, target_inode);
+
+out:
+       return rc;
+}
+
+static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
+                               struct file *dst_file, loff_t destoff,
+                               size_t len, unsigned int flags)
+{
+       unsigned int xid = get_xid();
+       ssize_t rc;
+
+       rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff,
+                                       len, flags);
+       free_xid(xid);
+       return rc;
+}
+
  const struct file_operations cifs_file_ops = {
         .read_iter = cifs_loose_read_iter,
         .write_iter = cifs_file_write_iter,
@@ -984,6 +1069,7 @@ const struct file_operations cifs_file_ops = {
         .splice_read = generic_file_splice_read,
         .llseek = cifs_llseek,
         .unlocked_ioctl = cifs_ioctl,
+       .copy_file_range = cifs_copy_file_range,
         .clone_file_range = cifs_clone_file_range,
         .setlease = cifs_setlease,
         .fallocate = cifs_fallocate,
@@ -1001,6 +1087,7 @@ const struct file_operations cifs_file_strict_ops = {
         .splice_read = generic_file_splice_read,
         .llseek = cifs_llseek,
         .unlocked_ioctl = cifs_ioctl,
+       .copy_file_range = cifs_copy_file_range,
         .clone_file_range = cifs_clone_file_range,
         .setlease = cifs_setlease,
         .fallocate = cifs_fallocate,
@@ -1018,6 +1105,7 @@ const struct file_operations cifs_file_direct_ops = {
         .mmap = cifs_file_mmap,
         .splice_read = generic_file_splice_read,
         .unlocked_ioctl  = cifs_ioctl,
+       .copy_file_range = cifs_copy_file_range,
         .clone_file_range = cifs_clone_file_range,
         .llseek = cifs_llseek,
         .setlease = cifs_setlease,
@@ -1035,6 +1123,7 @@ const struct file_operations cifs_file_nobrl_ops = {
         .splice_read = generic_file_splice_read,
         .llseek = cifs_llseek,
         .unlocked_ioctl = cifs_ioctl,
+       .copy_file_range = cifs_copy_file_range,
         .clone_file_range = cifs_clone_file_range,
         .setlease = cifs_setlease,
         .fallocate = cifs_fallocate,
@@ -1051,6 +1140,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
         .splice_read = generic_file_splice_read,
         .llseek = cifs_llseek,
         .unlocked_ioctl = cifs_ioctl,
+       .copy_file_range = cifs_copy_file_range,
         .clone_file_range = cifs_clone_file_range,
         .setlease = cifs_setlease,
         .fallocate = cifs_fallocate,
@@ -1067,6 +1157,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
         .mmap = cifs_file_mmap,
         .splice_read = generic_file_splice_read,
         .unlocked_ioctl  = cifs_ioctl,
+       .copy_file_range = cifs_copy_file_range,
         .clone_file_range = cifs_clone_file_range,
         .llseek = cifs_llseek,
         .setlease = cifs_setlease,
@@ -1078,6 +1169,7 @@ const struct file_operations cifs_dir_ops = {
         .release = cifs_closedir,
         .read    = generic_read_dir,
         .unlocked_ioctl  = cifs_ioctl,
+       .copy_file_range = cifs_copy_file_range,
         .clone_file_range = cifs_clone_file_range,
         .llseek = generic_file_llseek,
  };
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h

index da717fee30260be533f0992d5ed93114907f1c31..30bf89b1fd9a789ec02070f534d9d9f68b593650 100644 (file)
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -139,6 +139,11 @@ extern ssize_t     cifs_listxattr(struct dentry *, char *, size_t);
  # define cifs_listxattr NULL
  #endif
  
+extern ssize_t cifs_file_copychunk_range(unsigned int xid,
+                                       struct file *src_file, loff_t off,
+                                       struct file *dst_file, loff_t destoff,
+                                       size_t len, unsigned int flags);
+
  extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
  #ifdef CONFIG_CIFS_NFSD_EXPORT
  extern const struct export_operations cifs_export_ops;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h

index d42dd3288647808216a6c22c56731dc960e525f8..37f5a41cc50cc523cd76c790100398d4db9e80ca 100644 (file)
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -243,6 +243,7 @@ struct smb_version_operations {
         /* verify the message */
         int (*check_message)(char *, unsigned int, struct TCP_Server_Info *);
         bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
+       int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *);
         void (*downgrade_oplock)(struct TCP_Server_Info *,
                                         struct cifsInodeInfo *, bool);
         /* process transaction2 response */
@@ -407,9 +408,10 @@ struct smb_version_operations {
         char * (*create_lease_buf)(u8 *, u8);
         /* parse lease context buffer and return oplock/epoch info */
         __u8 (*parse_lease_buf)(void *, unsigned int *);
-       int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file,
-                       struct cifsFileInfo *target_file, u64 src_off, u64 len,
-                       u64 dest_off);
+       ssize_t (*copychunk_range)(const unsigned int,
+                       struct cifsFileInfo *src_file,
+                       struct cifsFileInfo *target_file,
+                       u64 src_off, u64 len, u64 dest_off);
         int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src,
                         struct cifsFileInfo *target_file, u64 src_off, u64 len,
                         u64 dest_off);
@@ -946,7 +948,6 @@ struct cifs_tcon {
         bool use_persistent:1; /* use persistent instead of durable handles */
  #ifdef CONFIG_CIFS_SMB2
         bool print:1;           /* set if connection to printer share */
-       bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */
         __le32 capabilities;
         __u32 share_flags;
         __u32 maximal_access;
@@ -1343,6 +1344,7 @@ struct mid_q_entry {
         void *callback_data;      /* general purpose pointer for callback */
         void *resp_buf;         /* pointer to received SMB header */
         int mid_state;  /* wish this were enum but can not pass to wait_event */
+       unsigned int mid_flags;
         __le16 command;         /* smb command code */
         bool large_buf:1;       /* if valid response, is pointer to large buf */
         bool multiRsp:1;        /* multiple trans2 responses for one request  */
@@ -1350,6 +1352,12 @@ struct mid_q_entry {
         bool decrypted:1;       /* decrypted entry */
  };
  
+struct close_cancelled_open {
+       struct cifs_fid         fid;
+       struct cifs_tcon        *tcon;
+       struct work_struct      work;
+};
+
  /*     Make code in transport.c a little cleaner by moving
         update of optional stats into function below */
  #ifdef CONFIG_CIFS_STATS2
@@ -1481,6 +1489,9 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
  #define   MID_RESPONSE_MALFORMED 0x10
  #define   MID_SHUTDOWN          0x20
  
+/* Flags */
+#define   MID_WAIT_CANCELLED    1 /* Cancelled while waiting for response */
+
  /* Types of response buffer returned from SendReceive2 */
  #define   CIFS_NO_BUFFER        0    /* Response buffer not returned */
  #define   CIFS_SMALL_BUFFER     1
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c

index 066950671929682399974fb4fb8641cf78c60855..5d21f00ae341b4ec002eb33dd79c7a16a1e1dfa4 100644 (file)
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1428,6 +1428,8 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
  
         length = cifs_discard_remaining_data(server);
         dequeue_mid(mid, rdata->result);
+       mid->resp_buf = server->smallbuf;
+       server->smallbuf = NULL;
         return length;
  }
  
@@ -1541,6 +1543,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
                 return cifs_readv_discard(server, mid);
  
         dequeue_mid(mid, false);
+       mid->resp_buf = server->smallbuf;
+       server->smallbuf = NULL;
         return length;
  }
  
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c

index 9ae695ae3ed7be3788db2a889e34cde8a3224c9c..b3c9d8c310f2e15c608b9afc9ec3eaf3ee86896f 100644 (file)
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -904,10 +904,19 @@ cifs_demultiplex_thread(void *p)
  
                 server->lstrp = jiffies;
                 if (mid_entry != NULL) {
+                       if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) &&
+                            mid_entry->mid_state == MID_RESPONSE_RECEIVED &&
+                                       server->ops->handle_cancelled_mid)
+                               server->ops->handle_cancelled_mid(
+                                                       mid_entry->resp_buf,
+                                                       server);
+
                         if (!mid_entry->multiRsp || mid_entry->multiEnd)
                                 mid_entry->callback(mid_entry);
-               } else if (!server->ops->is_oplock_break ||
-                          !server->ops->is_oplock_break(buf, server)) {
+               } else if (server->ops->is_oplock_break &&
+                          server->ops->is_oplock_break(buf, server)) {
+                       cifs_dbg(FYI, "Received oplock break\n");
+               } else {
                         cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n",
                                  atomic_read(&midCount));
                         cifs_dump_mem("Received Data is: ", buf,
@@ -3683,10 +3692,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
         int referral_walks_count = 0;
  #endif
  
-       rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs");
-       if (rc)
-               return rc;
-
  #ifdef CONFIG_CIFS_DFS_UPCALL
  try_mount_again:
         /* cleanup activities if we're chasing a referral */
@@ -3714,7 +3719,6 @@ try_mount_again:
         server = cifs_get_tcp_session(volume_info);
         if (IS_ERR(server)) {
                 rc = PTR_ERR(server);
-               bdi_destroy(&cifs_sb->bdi);
                 goto out;
         }
         if ((volume_info->max_credits < 20) ||
@@ -3744,6 +3748,9 @@ try_mount_again:
         if (IS_ERR(tcon)) {
                 rc = PTR_ERR(tcon);
                 tcon = NULL;
+               if (rc == -EACCES)
+                       goto mount_fail_check;
+
                 goto remote_path_check;
         }
  
@@ -3768,9 +3775,6 @@ try_mount_again:
         cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info);
         cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info);
  
-       /* tune readahead according to rsize */
-       cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_SIZE;
-
  remote_path_check:
  #ifdef CONFIG_CIFS_DFS_UPCALL
         /*
@@ -3887,7 +3891,6 @@ mount_fail_check:
                         cifs_put_smb_ses(ses);
                 else
                         cifs_put_tcp_session(server, 0);
-               bdi_destroy(&cifs_sb->bdi);
         }
  
  out:
@@ -4090,7 +4093,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
         }
         spin_unlock(&cifs_sb->tlink_tree_lock);
  
-       bdi_destroy(&cifs_sb->bdi);
         kfree(cifs_sb->mountdata);
         kfree(cifs_sb->prepath);
         call_rcu(&cifs_sb->rcu, delayed_free);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c

index aa3debbba82648944a1e1426516abebb268b7ca0..21d4045357397de647b8fe0282ed1a436b7e3a7f 100644 (file)
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2597,7 +2597,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
                 wdata->credits = credits;
  
                 if (!wdata->cfile->invalidHandle ||
-                   !cifs_reopen_file(wdata->cfile, false))
+                   !(rc = cifs_reopen_file(wdata->cfile, false)))
                         rc = server->ops->async_writev(wdata,
                                         cifs_uncached_writedata_release);
                 if (rc) {
@@ -3022,7 +3022,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
                 rdata->credits = credits;
  
                 if (!rdata->cfile->invalidHandle ||
-                   !cifs_reopen_file(rdata->cfile, true))
+                   !(rc = cifs_reopen_file(rdata->cfile, true)))
                         rc = server->ops->async_readv(rdata);
  error:
                 if (rc) {
@@ -3617,7 +3617,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                 }
  
                 if (!rdata->cfile->invalidHandle ||
-                   !cifs_reopen_file(rdata->cfile, true))
+                   !(rc = cifs_reopen_file(rdata->cfile, true)))
                         rc = server->ops->async_readv(rdata);
                 if (rc) {
                         add_credits_and_wake_if(server, rdata->credits, 0);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c

index 001528781b6b0f80552c33f3d8e93fb9cfc27b86..265c45fe4ea5e5246f5304480c2b4b66bf9d6c73 100644 (file)
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -34,71 +34,14 @@
  #include "cifs_ioctl.h"
  #include <linux/btrfs.h>
  
-static int cifs_file_clone_range(unsigned int xid, struct file *src_file,
-                         struct file *dst_file)
-{
-       struct inode *src_inode = file_inode(src_file);
-       struct inode *target_inode = file_inode(dst_file);
-       struct cifsFileInfo *smb_file_src;
-       struct cifsFileInfo *smb_file_target;
-       struct cifs_tcon *src_tcon;
-       struct cifs_tcon *target_tcon;
-       int rc;
-
-       cifs_dbg(FYI, "ioctl clone range\n");
-
-       if (!src_file->private_data || !dst_file->private_data) {
-               rc = -EBADF;
-               cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
-               goto out;
-       }
-
-       rc = -EXDEV;
-       smb_file_target = dst_file->private_data;
-       smb_file_src = src_file->private_data;
-       src_tcon = tlink_tcon(smb_file_src->tlink);
-       target_tcon = tlink_tcon(smb_file_target->tlink);
-
-       if (src_tcon->ses != target_tcon->ses) {
-               cifs_dbg(VFS, "source and target of copy not on same server\n");
-               goto out;
-       }
-
-       /*
-        * Note: cifs case is easier than btrfs since server responsible for
-        * checks for proper open modes and file type and if it wants
-        * server could even support copy of range where source = target
-        */
-       lock_two_nondirectories(target_inode, src_inode);
-
-       cifs_dbg(FYI, "about to flush pages\n");
-       /* should we flush first and last page first */
-       truncate_inode_pages(&target_inode->i_data, 0);
-
-       if (target_tcon->ses->server->ops->clone_range)
-               rc = target_tcon->ses->server->ops->clone_range(xid,
-                       smb_file_src, smb_file_target, 0, src_inode->i_size, 0);
-       else
-               rc = -EOPNOTSUPP;
-
-       /* force revalidate of size and timestamps of target file now
-          that target is updated on the server */
-       CIFS_I(target_inode)->time = 0;
-       /* although unlocking in the reverse order from locking is not
-          strictly necessary here it is a little cleaner to be consistent */
-       unlock_two_nondirectories(src_inode, target_inode);
-out:
-       return rc;
-}
-
-static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
+static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file,
                         unsigned long srcfd)
  {
         int rc;
         struct fd src_file;
         struct inode *src_inode;
  
-       cifs_dbg(FYI, "ioctl clone range\n");
+       cifs_dbg(FYI, "ioctl copychunk range\n");
         /* the destination must be opened for writing */
         if (!(dst_file->f_mode & FMODE_WRITE)) {
                 cifs_dbg(FYI, "file target not open for write\n");
@@ -129,7 +72,8 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
         if (S_ISDIR(src_inode->i_mode))
                 goto out_fput;
  
-       rc = cifs_file_clone_range(xid, src_file.file, dst_file);
+       rc = cifs_file_copychunk_range(xid, src_file.file, 0, dst_file, 0,
+                                       src_inode->i_size, 0);
  
  out_fput:
         fdput(src_file);
@@ -251,7 +195,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                         }
                         break;
                 case CIFS_IOC_COPYCHUNK_FILE:
-                       rc = cifs_ioctl_clone(xid, filep, arg);
+                       rc = cifs_ioctl_copychunk(xid, filep, arg);
                         break;
                 case CIFS_IOC_SET_INTEGRITY:
                         if (pSMBFile == NULL)
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c

index cc93ba4da9b592468f36d37b80777e88b8e400e6..27bc360c7ffd7e1081f907c5f080dc4ba439fbfc 100644 (file)
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1015,6 +1015,15 @@ cifs_dir_needs_close(struct cifsFileInfo *cfile)
         return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
  }
  
+static bool
+cifs_can_echo(struct TCP_Server_Info *server)
+{
+       if (server->tcpStatus == CifsGood)
+               return true;
+
+       return false;
+}
+
  struct smb_version_operations smb1_operations = {
         .send_cancel = send_nt_cancel,
         .compare_fids = cifs_compare_fids,
@@ -1049,6 +1058,7 @@ struct smb_version_operations smb1_operations = {
         .get_dfs_refer = CIFSGetDFSRefer,
         .qfs_tcon = cifs_qfs_tcon,
         .is_path_accessible = cifs_is_path_accessible,
+       .can_echo = cifs_can_echo,
         .query_path_info = cifs_query_path_info,
         .query_file_info = cifs_query_file_info,
         .get_srv_inum = cifs_get_srv_inum,
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c

index fd516ea8b8f89c46cb1d9977d53fbbb9d16de9d3..1a04b3a5beb164cb22c166f5c7ac03b65e70d00b 100644 (file)
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -659,3 +659,49 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
         cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
         return false;
  }
+
+void
+smb2_cancelled_close_fid(struct work_struct *work)
+{
+       struct close_cancelled_open *cancelled = container_of(work,
+                                       struct close_cancelled_open, work);
+
+       cifs_dbg(VFS, "Close unmatched open\n");
+
+       SMB2_close(0, cancelled->tcon, cancelled->fid.persistent_fid,
+                  cancelled->fid.volatile_fid);
+       cifs_put_tcon(cancelled->tcon);
+       kfree(cancelled);
+}
+
+int
+smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
+{
+       struct smb2_sync_hdr *sync_hdr = get_sync_hdr(buffer);
+       struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer;
+       struct cifs_tcon *tcon;
+       struct close_cancelled_open *cancelled;
+
+       if (sync_hdr->Command != SMB2_CREATE ||
+           sync_hdr->Status != STATUS_SUCCESS)
+               return 0;
+
+       cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL);
+       if (!cancelled)
+               return -ENOMEM;
+
+       tcon = smb2_find_smb_tcon(server, sync_hdr->SessionId,
+                                 sync_hdr->TreeId);
+       if (!tcon) {
+               kfree(cancelled);
+               return -ENOENT;
+       }
+
+       cancelled->fid.persistent_fid = rsp->PersistentFileId;
+       cancelled->fid.volatile_fid = rsp->VolatileFileId;
+       cancelled->tcon = tcon;
+       INIT_WORK(&cancelled->work, smb2_cancelled_close_fid);
+       queue_work(cifsiod_wq, &cancelled->work);
+
+       return 0;
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c

index 0231108d9387a4af3448a2d89451a4c86a0bc17c..152e37f2ad9213462a2ae439296a6edb49d653f9 100644 (file)
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -21,6 +21,7 @@
  #include <linux/vfs.h>
  #include <linux/falloc.h>
  #include <linux/scatterlist.h>
+#include <linux/uuid.h>
  #include <crypto/aead.h>
  #include "cifsglob.h"
  #include "smb2pdu.h"
@@ -592,8 +593,8 @@ req_res_key_exit:
         return rc;
  }
  
-static int
-smb2_clone_range(const unsigned int xid,
+static ssize_t
+smb2_copychunk_range(const unsigned int xid,
                         struct cifsFileInfo *srcfile,
                         struct cifsFileInfo *trgtfile, u64 src_off,
                         u64 len, u64 dest_off)
@@ -605,13 +606,14 @@ smb2_clone_range(const unsigned int xid,
         struct cifs_tcon *tcon;
         int chunks_copied = 0;
         bool chunk_sizes_updated = false;
+       ssize_t bytes_written, total_bytes_written = 0;
  
         pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
  
         if (pcchunk == NULL)
                 return -ENOMEM;
  
-       cifs_dbg(FYI, "in smb2_clone_range - about to call request res key\n");
+       cifs_dbg(FYI, "in smb2_copychunk_range - about to call request res key\n");
         /* Request a key from the server to identify the source of the copy */
         rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink),
                                 srcfile->fid.persistent_fid,
@@ -669,14 +671,16 @@ smb2_clone_range(const unsigned int xid,
                         }
                         chunks_copied++;
  
-                       src_off += le32_to_cpu(retbuf->TotalBytesWritten);
-                       dest_off += le32_to_cpu(retbuf->TotalBytesWritten);
-                       len -= le32_to_cpu(retbuf->TotalBytesWritten);
+                       bytes_written = le32_to_cpu(retbuf->TotalBytesWritten);
+                       src_off += bytes_written;
+                       dest_off += bytes_written;
+                       len -= bytes_written;
+                       total_bytes_written += bytes_written;
  
-                       cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n",
+                       cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %zu\n",
                                 le32_to_cpu(retbuf->ChunksWritten),
                                 le32_to_cpu(retbuf->ChunkBytesWritten),
-                               le32_to_cpu(retbuf->TotalBytesWritten));
+                               bytes_written);
                 } else if (rc == -EINVAL) {
                         if (ret_data_len != sizeof(struct copychunk_ioctl_rsp))
                                 goto cchunk_out;
@@ -713,7 +717,10 @@ smb2_clone_range(const unsigned int xid,
  cchunk_out:
         kfree(pcchunk);
         kfree(retbuf);
-       return rc;
+       if (rc)
+               return rc;
+       else
+               return total_bytes_written;
  }
  
  static int
@@ -2322,6 +2329,7 @@ struct smb_version_operations smb20_operations = {
         .clear_stats = smb2_clear_stats,
         .print_stats = smb2_print_stats,
         .is_oplock_break = smb2_is_valid_oplock_break,
+       .handle_cancelled_mid = smb2_handle_cancelled_mid,
         .downgrade_oplock = smb2_downgrade_oplock,
         .need_neg = smb2_need_neg,
         .negotiate = smb2_negotiate,
@@ -2377,7 +2385,7 @@ struct smb_version_operations smb20_operations = {
         .set_oplock_level = smb2_set_oplock_level,
         .create_lease_buf = smb2_create_lease_buf,
         .parse_lease_buf = smb2_parse_lease_buf,
-       .clone_range = smb2_clone_range,
+       .copychunk_range = smb2_copychunk_range,
         .wp_retry_size = smb2_wp_retry_size,
         .dir_needs_close = smb2_dir_needs_close,
         .get_dfs_refer = smb2_get_dfs_refer,
@@ -2404,6 +2412,7 @@ struct smb_version_operations smb21_operations = {
         .clear_stats = smb2_clear_stats,
         .print_stats = smb2_print_stats,
         .is_oplock_break = smb2_is_valid_oplock_break,
+       .handle_cancelled_mid = smb2_handle_cancelled_mid,
         .downgrade_oplock = smb2_downgrade_oplock,
         .need_neg = smb2_need_neg,
         .negotiate = smb2_negotiate,
@@ -2459,7 +2468,7 @@ struct smb_version_operations smb21_operations = {
         .set_oplock_level = smb21_set_oplock_level,
         .create_lease_buf = smb2_create_lease_buf,
         .parse_lease_buf = smb2_parse_lease_buf,
-       .clone_range = smb2_clone_range,
+       .copychunk_range = smb2_copychunk_range,
         .wp_retry_size = smb2_wp_retry_size,
         .dir_needs_close = smb2_dir_needs_close,
         .enum_snapshots = smb3_enum_snapshots,
@@ -2488,6 +2497,7 @@ struct smb_version_operations smb30_operations = {
         .print_stats = smb2_print_stats,
         .dump_share_caps = smb2_dump_share_caps,
         .is_oplock_break = smb2_is_valid_oplock_break,
+       .handle_cancelled_mid = smb2_handle_cancelled_mid,
         .downgrade_oplock = smb2_downgrade_oplock,
         .need_neg = smb2_need_neg,
         .negotiate = smb2_negotiate,
@@ -2545,7 +2555,7 @@ struct smb_version_operations smb30_operations = {
         .set_oplock_level = smb3_set_oplock_level,
         .create_lease_buf = smb3_create_lease_buf,
         .parse_lease_buf = smb3_parse_lease_buf,
-       .clone_range = smb2_clone_range,
+       .copychunk_range = smb2_copychunk_range,
         .duplicate_extents = smb2_duplicate_extents,
         .validate_negotiate = smb3_validate_negotiate,
         .wp_retry_size = smb2_wp_retry_size,
@@ -2582,6 +2592,7 @@ struct smb_version_operations smb311_operations = {
         .print_stats = smb2_print_stats,
         .dump_share_caps = smb2_dump_share_caps,
         .is_oplock_break = smb2_is_valid_oplock_break,
+       .handle_cancelled_mid = smb2_handle_cancelled_mid,
         .downgrade_oplock = smb2_downgrade_oplock,
         .need_neg = smb2_need_neg,
         .negotiate = smb2_negotiate,
@@ -2639,7 +2650,7 @@ struct smb_version_operations smb311_operations = {
         .set_oplock_level = smb3_set_oplock_level,
         .create_lease_buf = smb3_create_lease_buf,
         .parse_lease_buf = smb3_parse_lease_buf,
-       .clone_range = smb2_clone_range,
+       .copychunk_range = smb2_copychunk_range,
         .duplicate_extents = smb2_duplicate_extents,
  /*     .validate_negotiate = smb3_validate_negotiate, */ /* not used in 3.11 */
         .wp_retry_size = smb2_wp_retry_size,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c

index 7446496850a3bd5f21fb36e12b65ba5c78532612..02da648041fcd58d6e37431a60d596a5bfd4ca06 100644 (file)
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -562,8 +562,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
          * but for time being this is our only auth choice so doesn't matter.
          * We just found a server which sets blob length to zero expecting raw.
          */
-       if (blob_length == 0)
+       if (blob_length == 0) {
                 cifs_dbg(FYI, "missing security blob on negprot\n");
+               server->sec_ntlmssp = true;
+       }
  
         rc = cifs_enable_signing(server, ses->sign);
         if (rc)
@@ -1171,9 +1173,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
         else
                 return -EIO;
  
-       if (tcon && tcon->bad_network_name)
-               return -ENOENT;
-
         unc_path = kmalloc(MAX_SHARENAME_LENGTH * 2, GFP_KERNEL);
         if (unc_path == NULL)
                 return -ENOMEM;
@@ -1185,6 +1184,10 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
                 return -EINVAL;
         }
  
+       /* SMB2 TREE_CONNECT request must be called with TreeId == 0 */
+       if (tcon)
+               tcon->tid = 0;
+
         rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req);
         if (rc) {
                 kfree(unc_path);
@@ -1273,8 +1276,6 @@ tcon_exit:
  tcon_error_exit:
         if (rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) {
                 cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
-               if (tcon)
-                       tcon->bad_network_name = true;
         }
         goto tcon_exit;
  }
@@ -2177,6 +2178,9 @@ void smb2_reconnect_server(struct work_struct *work)
         struct cifs_tcon *tcon, *tcon2;
         struct list_head tmp_list;
         int tcon_exist = false;
+       int rc;
+       int resched = false;
+
  
         /* Prevent simultaneous reconnects that can corrupt tcon->rlist list */
         mutex_lock(&server->reconnect_mutex);
@@ -2204,13 +2208,18 @@ void smb2_reconnect_server(struct work_struct *work)
         spin_unlock(&cifs_tcp_ses_lock);
  
         list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) {
-               if (!smb2_reconnect(SMB2_INTERNAL_CMD, tcon))
+               rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon);
+               if (!rc)
                         cifs_reopen_persistent_handles(tcon);
+               else
+                       resched = true;
                 list_del_init(&tcon->rlist);
                 cifs_put_tcon(tcon);
         }
  
         cifs_dbg(FYI, "Reconnecting tcons finished\n");
+       if (resched)
+               queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ);
         mutex_unlock(&server->reconnect_mutex);
  
         /* now we can safely release srv struct */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h

index 69e35873b1de734991fbe027fd68b97b505d01c3..6853454fc871a7947591effb0c98d55accd955dc 100644 (file)
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -48,6 +48,10 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses,
                               struct smb_rqst *rqst);
  extern struct mid_q_entry *smb2_setup_async_request(
                         struct TCP_Server_Info *server, struct smb_rqst *rqst);
+extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server,
+                                          __u64 ses_id);
+extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server,
+                                               __u64 ses_id, __u32  tid);
  extern int smb2_calc_signature(struct smb_rqst *rqst,
                                 struct TCP_Server_Info *server);
  extern int smb3_calc_signature(struct smb_rqst *rqst,
@@ -164,6 +168,9 @@ extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
  extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
                              const u64 persistent_fid, const u64 volatile_fid,
                              const __u8 oplock_level);
+extern int smb2_handle_cancelled_mid(char *buffer,
+                                       struct TCP_Server_Info *server);
+void smb2_cancelled_close_fid(struct work_struct *work);
  extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
                          u64 persistent_file_id, u64 volatile_file_id,
                          struct kstatfs *FSData);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c

index 7c3bb1bd7eedfd044e0a7716951a39181a9e1bae..506b67fc93d9611ea859cf738126fb2d67e52e16 100644 (file)
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -115,23 +115,70 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
         return 0;
  }
  
-struct cifs_ses *
-smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id)
+static struct cifs_ses *
+smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
  {
         struct cifs_ses *ses;
  
-       spin_lock(&cifs_tcp_ses_lock);
         list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
                 if (ses->Suid != ses_id)
                         continue;
-               spin_unlock(&cifs_tcp_ses_lock);
                 return ses;
         }
+
+       return NULL;
+}
+
+struct cifs_ses *
+smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id)
+{
+       struct cifs_ses *ses;
+
+       spin_lock(&cifs_tcp_ses_lock);
+       ses = smb2_find_smb_ses_unlocked(server, ses_id);
         spin_unlock(&cifs_tcp_ses_lock);
  
+       return ses;
+}
+
+static struct cifs_tcon *
+smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32  tid)
+{
+       struct cifs_tcon *tcon;
+
+       list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+               if (tcon->tid != tid)
+                       continue;
+               ++tcon->tc_count;
+               return tcon;
+       }
+
         return NULL;
  }
  
+/*
+ * Obtain tcon corresponding to the tid in the given
+ * cifs_ses
+ */
+
+struct cifs_tcon *
+smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32  tid)
+{
+       struct cifs_ses *ses;
+       struct cifs_tcon *tcon;
+
+       spin_lock(&cifs_tcp_ses_lock);
+       ses = smb2_find_smb_ses_unlocked(server, ses_id);
+       if (!ses) {
+               spin_unlock(&cifs_tcp_ses_lock);
+               return NULL;
+       }
+       tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid);
+       spin_unlock(&cifs_tcp_ses_lock);
+
+       return tcon;
+}
+
  int
  smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
  {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c

index 526f0533cb4e8ca0eb1209561bd4bf59db9af864..f6e13a977fc8e907e3fa89fc0a5d14a4ab0eccd5 100644 (file)
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -752,9 +752,11 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
  
         rc = wait_for_response(ses->server, midQ);
         if (rc != 0) {
+               cifs_dbg(FYI, "Cancelling wait for mid %llu\n", midQ->mid);
                 send_cancel(ses->server, rqst, midQ);
                 spin_lock(&GlobalMid_Lock);
                 if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
+                       midQ->mid_flags |= MID_WAIT_CANCELLED;
                         midQ->callback = DeleteMidQEntry;
                         spin_unlock(&GlobalMid_Lock);
                         add_credits(ses->server, 1, optype);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c

index 2dea594da19968288586138a35c22a81151914e7..6058df380cc00ed0d4be8cb81a0a87369cb9aac3 100644 (file)
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -183,10 +183,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
                 goto unlock_out;
         }
  
-       error = bdi_setup_and_register(&vc->bdi, "coda");
-       if (error)
-               goto unlock_out;
-
         vc->vc_sb = sb;
         mutex_unlock(&vc->vc_mutex);
  
@@ -197,7 +193,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
         sb->s_magic = CODA_SUPER_MAGIC;
         sb->s_op = &coda_super_operations;
         sb->s_d_op = &coda_dentry_operations;
-       sb->s_bdi = &vc->bdi;
+
+       error = super_setup_bdi(sb);
+       if (error)
+               goto error;
  
         /* get root fid from Venus: this needs the root inode */
         error = venus_rootfid(sb, &fid);
@@ -228,7 +227,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
  
  error:
         mutex_lock(&vc->vc_mutex);
-       bdi_destroy(&vc->bdi);
         vc->vc_sb = NULL;
         sb->s_fs_info = NULL;
  unlock_out:
@@ -240,7 +238,6 @@ static void coda_put_super(struct super_block *sb)
  {
         struct venus_comm *vcp = coda_vcp(sb);
         mutex_lock(&vcp->vc_mutex);
-       bdi_destroy(&vcp->bdi);
         vcp->vc_sb = NULL;
         sb->s_fs_info = NULL;
         mutex_unlock(&vcp->vc_mutex);
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c

index 02a7a9286449d467741d64e8e817bb2902309926..6d6eca394d4d4107b8459b828b563acbbc9f082e 100644 (file)
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -327,7 +327,6 @@ EXPORT_SYMBOL(fscrypt_decrypt_page);
  static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
  {
         struct dentry *dir;
-       struct fscrypt_info *ci;
         int dir_has_key, cached_with_key;
  
         if (flags & LOOKUP_RCU)
@@ -339,18 +338,11 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
                 return 0;
         }
  
-       ci = d_inode(dir)->i_crypt_info;
-       if (ci && ci->ci_keyring_key &&
-           (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-                                         (1 << KEY_FLAG_REVOKED) |
-                                         (1 << KEY_FLAG_DEAD))))
-               ci = NULL;
-
         /* this should eventually be an flag in d_flags */
         spin_lock(&dentry->d_lock);
         cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
         spin_unlock(&dentry->d_lock);
-       dir_has_key = (ci != NULL);
+       dir_has_key = (d_inode(dir)->i_crypt_info != NULL);
         dput(dir);
  
         /*
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c

index 13052b85c3930f071be764c5fbbeb091429002d3..37b49894c762344841117b2a0042c5e9ec8b7140 100644 (file)
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -350,7 +350,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
                 fname->disk_name.len = iname->len;
                 return 0;
         }
-       ret = fscrypt_get_crypt_info(dir);
+       ret = fscrypt_get_encryption_info(dir);
         if (ret && ret != -EOPNOTSUPP)
                 return ret;
  
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h

index fdbb8af32eafdb6bae492658d376abf15f8a3d69..e39696e644942a80d110035e2d49570840823af7 100644 (file)
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -67,7 +67,6 @@ struct fscrypt_info {
         u8 ci_filename_mode;
         u8 ci_flags;
         struct crypto_skcipher *ci_ctfm;
-       struct key *ci_keyring_key;
         u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
  };
  
@@ -101,7 +100,4 @@ extern int fscrypt_do_page_crypto(const struct inode *inode,
  extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
                                               gfp_t gfp_flags);
  
-/* keyinfo.c */
-extern int fscrypt_get_crypt_info(struct inode *);
-
  #endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c

index d5d896fa5a71675272131d797919924b2398a85a..8cdfddce2b34868f0cfe3f71da55d64187172a38 100644 (file)
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -95,6 +95,7 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
         kfree(description);
         if (IS_ERR(keyring_key))
                 return PTR_ERR(keyring_key);
+       down_read(&keyring_key->sem);
  
         if (keyring_key->type != &key_type_logon) {
                 printk_once(KERN_WARNING
@@ -102,11 +103,9 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
                 res = -ENOKEY;
                 goto out;
         }
-       down_read(&keyring_key->sem);
         ukp = user_key_payload_locked(keyring_key);
         if (ukp->datalen != sizeof(struct fscrypt_key)) {
                 res = -EINVAL;
-               up_read(&keyring_key->sem);
                 goto out;
         }
         master_key = (struct fscrypt_key *)ukp->data;
@@ -117,17 +116,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
                                 "%s: key size incorrect: %d\n",
                                 __func__, master_key->size);
                 res = -ENOKEY;
-               up_read(&keyring_key->sem);
                 goto out;
         }
         res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
-       up_read(&keyring_key->sem);
-       if (res)
-               goto out;
-
-       crypt_info->ci_keyring_key = keyring_key;
-       return 0;
  out:
+       up_read(&keyring_key->sem);
         key_put(keyring_key);
         return res;
  }
@@ -169,12 +162,11 @@ static void put_crypt_info(struct fscrypt_info *ci)
         if (!ci)
                 return;
  
-       key_put(ci->ci_keyring_key);
         crypto_free_skcipher(ci->ci_ctfm);
         kmem_cache_free(fscrypt_info_cachep, ci);
  }
  
-int fscrypt_get_crypt_info(struct inode *inode)
+int fscrypt_get_encryption_info(struct inode *inode)
  {
         struct fscrypt_info *crypt_info;
         struct fscrypt_context ctx;
@@ -184,21 +176,15 @@ int fscrypt_get_crypt_info(struct inode *inode)
         u8 *raw_key = NULL;
         int res;
  
+       if (inode->i_crypt_info)
+               return 0;
+
         res = fscrypt_initialize(inode->i_sb->s_cop->flags);
         if (res)
                 return res;
  
         if (!inode->i_sb->s_cop->get_context)
                 return -EOPNOTSUPP;
-retry:
-       crypt_info = ACCESS_ONCE(inode->i_crypt_info);
-       if (crypt_info) {
-               if (!crypt_info->ci_keyring_key ||
-                               key_validate(crypt_info->ci_keyring_key) == 0)
-                       return 0;
-               fscrypt_put_encryption_info(inode, crypt_info);
-               goto retry;
-       }
  
         res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
         if (res < 0) {
@@ -229,7 +215,6 @@ retry:
         crypt_info->ci_data_mode = ctx.contents_encryption_mode;
         crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
         crypt_info->ci_ctfm = NULL;
-       crypt_info->ci_keyring_key = NULL;
         memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
                                 sizeof(crypt_info->ci_master_key));
  
@@ -273,14 +258,8 @@ retry:
         if (res)
                 goto out;
  
-       kzfree(raw_key);
-       raw_key = NULL;
-       if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
-               put_crypt_info(crypt_info);
-               goto retry;
-       }
-       return 0;
-
+       if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL)
+               crypt_info = NULL;
  out:
         if (res == -ENOKEY)
                 res = 0;
@@ -288,6 +267,7 @@ out:
         kzfree(raw_key);
         return res;
  }
+EXPORT_SYMBOL(fscrypt_get_encryption_info);
  
  void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
  {
@@ -305,17 +285,3 @@ void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
         put_crypt_info(ci);
  }
  EXPORT_SYMBOL(fscrypt_put_encryption_info);
-
-int fscrypt_get_encryption_info(struct inode *inode)
-{
-       struct fscrypt_info *ci = inode->i_crypt_info;
-
-       if (!ci ||
-               (ci->ci_keyring_key &&
-                (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-                                              (1 << KEY_FLAG_REVOKED) |
-                                              (1 << KEY_FLAG_DEAD)))))
-               return fscrypt_get_crypt_info(inode);
-       return 0;
-}
-EXPORT_SYMBOL(fscrypt_get_encryption_info);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c

index 14b76da71269487f22b941f82dbf01d6adea07c7..4908906d54d562263093cd5245fcb14e36d18b8e 100644 (file)
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -33,17 +33,10 @@ static int create_encryption_context_from_policy(struct inode *inode,
                                 const struct fscrypt_policy *policy)
  {
         struct fscrypt_context ctx;
-       int res;
  
         if (!inode->i_sb->s_cop->set_context)
                 return -EOPNOTSUPP;
  
-       if (inode->i_sb->s_cop->prepare_context) {
-               res = inode->i_sb->s_cop->prepare_context(inode);
-               if (res)
-                       return res;
-       }
-
         ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
         memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
                                         FS_KEY_DESCRIPTOR_SIZE);
diff --git a/fs/dax.c b/fs/dax.c

index de622d4282a6507a9c4e4eb082ac6ff8286efb48..6433650be833876eea42cd36ca109e4298bff073 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -373,6 +373,22 @@ restart:
                 }
                 spin_lock_irq(&mapping->tree_lock);
  
+               if (!entry) {
+                       /*
+                        * We needed to drop the page_tree lock while calling
+                        * radix_tree_preload() and we didn't have an entry to
+                        * lock.  See if another thread inserted an entry at
+                        * our index during this time.
+                        */
+                       entry = __radix_tree_lookup(&mapping->page_tree, index,
+                                       NULL, &slot);
+                       if (entry) {
+                               radix_tree_preload_end();
+                               spin_unlock_irq(&mapping->tree_lock);
+                               goto restart;
+                       }
+               }
+
                 if (pmd_downgrade) {
                         radix_tree_delete(&mapping->page_tree, index);
                         mapping->nrexceptional--;
@@ -388,19 +404,12 @@ restart:
                 if (err) {
                         spin_unlock_irq(&mapping->tree_lock);
                         /*
-                        * Someone already created the entry?  This is a
-                        * normal failure when inserting PMDs in a range
-                        * that already contains PTEs.  In that case we want
-                        * to return -EEXIST immediately.
-                        */
-                       if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
-                               goto restart;
-                       /*
-                        * Our insertion of a DAX PMD entry failed, most
-                        * likely because it collided with a PTE sized entry
-                        * at a different index in the PMD range.  We haven't
-                        * inserted anything into the radix tree and have no
-                        * waiters to wake.
+                        * Our insertion of a DAX entry failed, most likely
+                        * because we were inserting a PMD entry and it
+                        * collided with a PTE sized entry at a different
+                        * index in the PMD range.  We haven't inserted
+                        * anything into the radix tree and have no waiters to
+                        * wake.
                          */
                         return ERR_PTR(err);
                 }
@@ -982,7 +991,7 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
                 sector_t start_sector = dax.sector + (offset >> 9);
  
                 return blkdev_issue_zeroout(bdev, start_sector,
-                               length >> 9, GFP_NOFS, true);
+                               length >> 9, GFP_NOFS, 0);
         } else {
                 if (dax_map_atomic(bdev, &dax) < 0)
                         return PTR_ERR(dax.addr);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c

index 7d398d300e972c3604727ac8d6d4fbf7302318a6..9382db998ec9549319f47b55ccd561abf1169874 100644 (file)
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con)
         newsock->type = con->sock->type;
         newsock->ops = con->sock->ops;
  
-       result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
+       result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true);
         if (result < 0)
                 goto accept_err;
  
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h

index 95c1c8d3453922be63c7c0d13e2bd8acd18d7e12..9c351bf757b20e037f39aeadf0fa0ed12f963db6 100644 (file)
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -349,7 +349,6 @@ struct ecryptfs_mount_crypt_stat {
  struct ecryptfs_sb_info {
         struct super_block *wsi_sb;
         struct ecryptfs_mount_crypt_stat mount_crypt_stat;
-       struct backing_dev_info bdi;
  };
  
  /* file private data. */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c

index 151872dcc1f402ef47273b6da96a5d15a3f251ba..9014479d01600be356b87c284c675ada55b6a723 100644 (file)
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -519,12 +519,11 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
                 goto out;
         }
  
-       rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs");
+       rc = super_setup_bdi(s);
         if (rc)
                 goto out1;
  
         ecryptfs_set_superblock_private(s, sbi);
-       s->s_bdi = &sbi->bdi;
  
         /* ->kill_sb() will take care of sbi after that point */
         sbi = NULL;
@@ -633,7 +632,6 @@ static void ecryptfs_kill_block_super(struct super_block *sb)
         if (!sb_info)
                 return;
         ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
-       bdi_destroy(&sb_info->bdi);
         kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
  }
  
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h

index 2e86086bc9403efe99a25ab0df439db0c714eb4e..5dc392404559b033445a38a0f25bfacfd41a5c9c 100644 (file)
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -64,7 +64,6 @@ struct exofs_dev {
   * our extension to the in-memory superblock
   */
  struct exofs_sb_info {
-       struct backing_dev_info bdi;            /* register our bdi with VFS  */
         struct exofs_sb_stats s_ess;            /* Written often, pre-allocate*/
         int             s_timeout;              /* timeout for OSD operations */
         uint64_t        s_nextid;               /* highest object ID used     */
diff --git a/fs/exofs/super.c b/fs/exofs/super.c

index 1076a4233b3962e34df904f55f9923dbdec2b48f..819624cfc8da4c7bad401ee26611d69b7ff38f98 100644 (file)
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -464,7 +464,6 @@ static void exofs_put_super(struct super_block *sb)
                             sbi->one_comp.obj.partition);
  
         exofs_sysfs_sb_del(sbi);
-       bdi_destroy(&sbi->bdi);
         exofs_free_sbi(sbi);
         sb->s_fs_info = NULL;
  }
@@ -809,8 +808,12 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
         __sbi_read_stats(sbi);
  
         /* set up operation vectors */
-       sbi->bdi.ra_pages = __ra_pages(&sbi->layout);
-       sb->s_bdi = &sbi->bdi;
+       ret = super_setup_bdi(sb);
+       if (ret) {
+               EXOFS_DBGMSG("Failed to super_setup_bdi\n");
+               goto free_sbi;
+       }
+       sb->s_bdi->ra_pages = __ra_pages(&sbi->layout);
         sb->s_fs_info = sbi;
         sb->s_op = &exofs_sops;
         sb->s_export_op = &exofs_export_ops;
@@ -836,14 +839,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
                 goto free_sbi;
         }
  
-       ret = bdi_setup_and_register(&sbi->bdi, "exofs");
-       if (ret) {
-               EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
-               dput(sb->s_root);
-               sb->s_root = NULL;
-               goto free_sbi;
-       }
-
         exofs_sysfs_dbg_print();
         _exofs_print_device("Mounting", opts->dev_name,
                             ore_comp_dev(&sbi->oc, 0),
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index f493af66659134dafce0e6078437834968ef2eb7..fb69ee2388dba0b83b4f29c57b37ec5cb0aefb34 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2466,6 +2466,7 @@ extern int  ext4_setattr(struct dentry *, struct iattr *);
  extern int  ext4_getattr(const struct path *, struct kstat *, u32, unsigned int);
  extern void ext4_evict_inode(struct inode *);
  extern void ext4_clear_inode(struct inode *);
+extern int  ext4_file_getattr(const struct path *, struct kstat *, u32, unsigned int);
  extern int  ext4_sync_inode(handle_t *, struct inode *);
  extern void ext4_dirty_inode(struct inode *, int);
  extern int ext4_change_inode_journal_flag(struct inode *, int);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c

index 8210c1f43556f4358e9b602a93d158e5c0780c44..cefa9835f275d9b062ae9b13765ea743edb53f64 100644 (file)
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -744,7 +744,7 @@ const struct file_operations ext4_file_operations = {
  
  const struct inode_operations ext4_file_inode_operations = {
         .setattr        = ext4_setattr,
-       .getattr        = ext4_getattr,
+       .getattr        = ext4_file_getattr,
         .listxattr      = ext4_listxattr,
         .get_acl        = ext4_get_acl,
         .set_acl        = ext4_set_acl,
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c

index 30a9f210d1e32c8a01635821b2937407d5b21773..375fb1c05d49ce87a287213720ca0ebf5e0deef1 100644 (file)
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1169,10 +1169,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
         set_buffer_uptodate(dir_block);
         err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
         if (err)
-               goto out;
+               return err;
         set_buffer_verified(dir_block);
-out:
-       return err;
+       return ext4_mark_inode_dirty(handle, inode);
  }
  
  static int ext4_convert_inline_data_nolock(handle_t *handle,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 7385e6a6b6cb549041d098a565c36c20794f7f14..b9ffa9f4191f4cb3a122c215894ef76689fd9604 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5390,17 +5390,52 @@ err_out:
  int ext4_getattr(const struct path *path, struct kstat *stat,
                  u32 request_mask, unsigned int query_flags)
  {
-       struct inode *inode;
-       unsigned long long delalloc_blocks;
+       struct inode *inode = d_inode(path->dentry);
+       struct ext4_inode *raw_inode;
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       unsigned int flags;
+
+       if (EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) {
+               stat->result_mask |= STATX_BTIME;
+               stat->btime.tv_sec = ei->i_crtime.tv_sec;
+               stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
+       }
+
+       flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
+       if (flags & EXT4_APPEND_FL)
+               stat->attributes |= STATX_ATTR_APPEND;
+       if (flags & EXT4_COMPR_FL)
+               stat->attributes |= STATX_ATTR_COMPRESSED;
+       if (flags & EXT4_ENCRYPT_FL)
+               stat->attributes |= STATX_ATTR_ENCRYPTED;
+       if (flags & EXT4_IMMUTABLE_FL)
+               stat->attributes |= STATX_ATTR_IMMUTABLE;
+       if (flags & EXT4_NODUMP_FL)
+               stat->attributes |= STATX_ATTR_NODUMP;
+
+       stat->attributes_mask |= (STATX_ATTR_APPEND |
+                                 STATX_ATTR_COMPRESSED |
+                                 STATX_ATTR_ENCRYPTED |
+                                 STATX_ATTR_IMMUTABLE |
+                                 STATX_ATTR_NODUMP);
  
-       inode = d_inode(path->dentry);
         generic_fillattr(inode, stat);
+       return 0;
+}
+
+int ext4_file_getattr(const struct path *path, struct kstat *stat,
+                     u32 request_mask, unsigned int query_flags)
+{
+       struct inode *inode = d_inode(path->dentry);
+       u64 delalloc_blocks;
+
+       ext4_getattr(path, stat, request_mask, query_flags);
  
         /*
          * If there is inline data in the inode, the inode will normally not
          * have data blocks allocated (it may have an external xattr block).
          * Report at least one sector for such files, so tools like tar, rsync,
-        * others doen't incorrectly think the file is completely sparse.
+        * others don't incorrectly think the file is completely sparse.
          */
         if (unlikely(ext4_has_inline_data(inode)))
                 stat->blocks += (stat->size + 511) >> 9;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c

index 578f8c33fb44ad34062e978277f5def1d8aeebe1..c992ef2c2f94c0865d14de67e2c5b99857edf71f 100644 (file)
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -511,7 +511,7 @@ mext_check_arguments(struct inode *orig_inode,
         if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
             (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
                 ext4_debug("ext4 move extent: orig and donor's start "
-                       "offset are not alligned [ino:orig %lu, donor %lu]\n",
+                       "offsets are not aligned [ino:orig %lu, donor %lu]\n",
                         orig_inode->i_ino, donor_inode->i_ino);
                 return -EINVAL;
         }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index 6ad612c576fc733f8a6d95540c20d8e75995e1c6..07e5e140577176e13db84089eaa048713ddb4797 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3912,6 +3912,7 @@ const struct inode_operations ext4_dir_inode_operations = {
         .tmpfile        = ext4_tmpfile,
         .rename         = ext4_rename2,
         .setattr        = ext4_setattr,
+       .getattr        = ext4_getattr,
         .listxattr      = ext4_listxattr,
         .get_acl        = ext4_get_acl,
         .set_acl        = ext4_set_acl,
@@ -3920,6 +3921,7 @@ const struct inode_operations ext4_dir_inode_operations = {
  
  const struct inode_operations ext4_special_inode_operations = {
         .setattr        = ext4_setattr,
+       .getattr        = ext4_getattr,
         .listxattr      = ext4_listxattr,
         .get_acl        = ext4_get_acl,
         .set_acl        = ext4_set_acl,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index 2e03a0a88d92f7731346a26cbdc934458549660f..a9448db1cf7e87c2bd41daac6fbab7729bc87ecc 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1120,17 +1120,16 @@ static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
                                  EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
  }
  
-static int ext4_prepare_context(struct inode *inode)
-{
-       return ext4_convert_inline_data(inode);
-}
-
  static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
                                                         void *fs_data)
  {
         handle_t *handle = fs_data;
         int res, res2, retries = 0;
  
+       res = ext4_convert_inline_data(inode);
+       if (res)
+               return res;
+
         /*
          * If a journal handle was specified, then the encryption context is
          * being set on a new inode via inheritance and is part of a larger
@@ -1196,7 +1195,6 @@ static unsigned ext4_max_namelen(struct inode *inode)
  static const struct fscrypt_operations ext4_cryptops = {
         .key_prefix             = "ext4:",
         .get_context            = ext4_get_context,
-       .prepare_context        = ext4_prepare_context,
         .set_context            = ext4_set_context,
         .dummy_context          = ext4_dummy_context,
         .is_encrypted           = ext4_encrypted_inode,
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c

index 73b184d161fc98dc6c870758243b1a03932cb85f..5c8fc53cb0e5a3127366b6324514407668f150b3 100644 (file)
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -85,17 +85,20 @@ errout:
  const struct inode_operations ext4_encrypted_symlink_inode_operations = {
         .get_link       = ext4_encrypted_get_link,
         .setattr        = ext4_setattr,
+       .getattr        = ext4_getattr,
         .listxattr      = ext4_listxattr,
  };
  
  const struct inode_operations ext4_symlink_inode_operations = {
         .get_link       = page_get_link,
         .setattr        = ext4_setattr,
+       .getattr        = ext4_getattr,
         .listxattr      = ext4_listxattr,
  };
  
  const struct inode_operations ext4_fast_symlink_inode_operations = {
         .get_link       = simple_get_link,
         .setattr        = ext4_setattr,
+       .getattr        = ext4_getattr,
         .listxattr      = ext4_listxattr,
  };
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c

index 67636acf762475e211a641f4720e862ba886a40a..996e7900d4c8ea2d16f65f47e3f1082552b2fc66 100644 (file)
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -131,31 +131,26 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
  }
  
  static int ext4_xattr_block_csum_verify(struct inode *inode,
-                                       sector_t block_nr,
-                                       struct ext4_xattr_header *hdr)
+                                       struct buffer_head *bh)
  {
-       if (ext4_has_metadata_csum(inode->i_sb) &&
-           (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
-               return 0;
-       return 1;
-}
-
-static void ext4_xattr_block_csum_set(struct inode *inode,
-                                     sector_t block_nr,
-                                     struct ext4_xattr_header *hdr)
-{
-       if (!ext4_has_metadata_csum(inode->i_sb))
-               return;
+       struct ext4_xattr_header *hdr = BHDR(bh);
+       int ret = 1;
  
-       hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
+       if (ext4_has_metadata_csum(inode->i_sb)) {
+               lock_buffer(bh);
+               ret = (hdr->h_checksum == ext4_xattr_block_csum(inode,
+                                                       bh->b_blocknr, hdr));
+               unlock_buffer(bh);
+       }
+       return ret;
  }
  
-static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
-                                               struct inode *inode,
-                                               struct buffer_head *bh)
+static void ext4_xattr_block_csum_set(struct inode *inode,
+                                     struct buffer_head *bh)
  {
-       ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
-       return ext4_handle_dirty_metadata(handle, inode, bh);
+       if (ext4_has_metadata_csum(inode->i_sb))
+               BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode,
+                                               bh->b_blocknr, BHDR(bh));
  }
  
  static inline const struct xattr_handler *
@@ -233,7 +228,7 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
         if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
             BHDR(bh)->h_blocks != cpu_to_le32(1))
                 return -EFSCORRUPTED;
-       if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
+       if (!ext4_xattr_block_csum_verify(inode, bh))
                 return -EFSBADCRC;
         error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
                                        bh->b_data);
@@ -618,23 +613,22 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
                         }
                 }
  
+               ext4_xattr_block_csum_set(inode, bh);
                 /*
                  * Beware of this ugliness: Releasing of xattr block references
                  * from different inodes can race and so we have to protect
                  * from a race where someone else frees the block (and releases
                  * its journal_head) before we are done dirtying the buffer. In
                  * nojournal mode this race is harmless and we actually cannot
-                * call ext4_handle_dirty_xattr_block() with locked buffer as
+                * call ext4_handle_dirty_metadata() with locked buffer as
                  * that function can call sync_dirty_buffer() so for that case
                  * we handle the dirtying after unlocking the buffer.
                  */
                 if (ext4_handle_valid(handle))
-                       error = ext4_handle_dirty_xattr_block(handle, inode,
-                                                             bh);
+                       error = ext4_handle_dirty_metadata(handle, inode, bh);
                 unlock_buffer(bh);
                 if (!ext4_handle_valid(handle))
-                       error = ext4_handle_dirty_xattr_block(handle, inode,
-                                                             bh);
+                       error = ext4_handle_dirty_metadata(handle, inode, bh);
                 if (IS_SYNC(inode))
                         ext4_handle_sync(handle);
                 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
@@ -863,13 +857,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
                                 ext4_xattr_cache_insert(ext4_mb_cache,
                                         bs->bh);
                         }
+                       ext4_xattr_block_csum_set(inode, bs->bh);
                         unlock_buffer(bs->bh);
                         if (error == -EFSCORRUPTED)
                                 goto bad_block;
                         if (!error)
-                               error = ext4_handle_dirty_xattr_block(handle,
-                                                                     inode,
-                                                                     bs->bh);
+                               error = ext4_handle_dirty_metadata(handle,
+                                                                  inode,
+                                                                  bs->bh);
                         if (error)
                                 goto cleanup;
                         goto inserted;
@@ -967,10 +962,11 @@ inserted:
                                         ce->e_reusable = 0;
                                 ea_bdebug(new_bh, "reusing; refcount now=%d",
                                           ref);
+                               ext4_xattr_block_csum_set(inode, new_bh);
                                 unlock_buffer(new_bh);
-                               error = ext4_handle_dirty_xattr_block(handle,
-                                                                     inode,
-                                                                     new_bh);
+                               error = ext4_handle_dirty_metadata(handle,
+                                                                  inode,
+                                                                  new_bh);
                                 if (error)
                                         goto cleanup_dquot;
                         }
@@ -1020,11 +1016,12 @@ getblk_failed:
                                 goto getblk_failed;
                         }
                         memcpy(new_bh->b_data, s->base, new_bh->b_size);
+                       ext4_xattr_block_csum_set(inode, new_bh);
                         set_buffer_uptodate(new_bh);
                         unlock_buffer(new_bh);
                         ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
-                       error = ext4_handle_dirty_xattr_block(handle,
-                                                             inode, new_bh);
+                       error = ext4_handle_dirty_metadata(handle, inode,
+                                                          new_bh);
                         if (error)
                                 goto cleanup;
                 }
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c

index a77df377e2e8197097912c9248948c7e729ce566..ee2d0a485fc3478fc5f93b5b85c6dad0431e8ea0 100644 (file)
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -196,6 +196,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
         si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
         si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
         si->base_mem += NM_I(sbi)->nat_blocks / 8;
+       si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
  
  get_cache:
         si->cache_mem = 0;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c

index 4650c9b85de77679adaa275406512868671bb1bb..8d5c62b07b283f53e90ded2366c8bb9375409fa2 100644 (file)
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -750,7 +750,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
         dentry_blk = page_address(page);
         bit_pos = dentry - dentry_blk->dentry;
         for (i = 0; i < slots; i++)
-               clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+               __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
  
         /* Let's check and deallocate this dentry page */
         bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h

index e849f83d611407b8968bec904c10f1939c40b4f1..0a6e115562f62edca5b60ee4c833e889a904c202 100644 (file)
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -561,6 +561,8 @@ struct f2fs_nm_info {
         struct mutex build_lock;        /* lock for build free nids */
         unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
         unsigned char *nat_block_bitmap;
+       unsigned short *free_nid_count; /* free nid count of NAT block */
+       spinlock_t free_nid_lock;       /* protect updating of nid count */
  
         /* for checkpoint */
         char *nat_bitmap;               /* NAT bitmap pointer */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c

index 94967171dee87a381655ede9190ff0f66b3ca4af..481aa8dc79f46f4c156cf67cca665e8160e36e6a 100644 (file)
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
                 set_nat_flag(e, IS_CHECKPOINTED, false);
         __set_nat_cache_dirty(nm_i, e);
  
-       if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
-               clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
-
         /* update fsync_mark if its inode nat entry is still alive */
         if (ni->nid != ni->ino)
                 e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1823,7 +1820,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
                 kmem_cache_free(free_nid_slab, i);
  }
  
-void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
+static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
+                       bool set, bool build, bool locked)
  {
         struct f2fs_nm_info *nm_i = NM_I(sbi);
         unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1833,9 +1831,18 @@ void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
                 return;
  
         if (set)
-               set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+               __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
         else
-               clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+               __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+
+       if (!locked)
+               spin_lock(&nm_i->free_nid_lock);
+       if (set)
+               nm_i->free_nid_count[nat_ofs]++;
+       else if (!build)
+               nm_i->free_nid_count[nat_ofs]--;
+       if (!locked)
+               spin_unlock(&nm_i->free_nid_lock);
  }
  
  static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1847,7 +1854,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
         unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
         int i;
  
-       set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
+       if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+               return;
+
+       __set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
  
         i = start_nid % NAT_ENTRY_PER_BLOCK;
  
@@ -1861,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
                 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
                 if (blk_addr == NULL_ADDR)
                         freed = add_free_nid(sbi, start_nid, true);
-               update_free_nid_bitmap(sbi, start_nid, freed);
+               update_free_nid_bitmap(sbi, start_nid, freed, true, false);
         }
  }
  
@@ -1877,6 +1887,8 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
         for (i = 0; i < nm_i->nat_blocks; i++) {
                 if (!test_bit_le(i, nm_i->nat_block_bitmap))
                         continue;
+               if (!nm_i->free_nid_count[i])
+                       continue;
                 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
                         nid_t nid;
  
@@ -1907,58 +1919,6 @@ out:
         up_read(&nm_i->nat_tree_lock);
  }
  
-static int scan_nat_bits(struct f2fs_sb_info *sbi)
-{
-       struct f2fs_nm_info *nm_i = NM_I(sbi);
-       struct page *page;
-       unsigned int i = 0;
-       nid_t nid;
-
-       if (!enabled_nat_bits(sbi, NULL))
-               return -EAGAIN;
-
-       down_read(&nm_i->nat_tree_lock);
-check_empty:
-       i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
-       if (i >= nm_i->nat_blocks) {
-               i = 0;
-               goto check_partial;
-       }
-
-       for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
-                                                                       nid++) {
-               if (unlikely(nid >= nm_i->max_nid))
-                       break;
-               add_free_nid(sbi, nid, true);
-       }
-
-       if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
-               goto out;
-       i++;
-       goto check_empty;
-
-check_partial:
-       i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
-       if (i >= nm_i->nat_blocks) {
-               disable_nat_bits(sbi, true);
-               up_read(&nm_i->nat_tree_lock);
-               return -EINVAL;
-       }
-
-       nid = i * NAT_ENTRY_PER_BLOCK;
-       page = get_current_nat_page(sbi, nid);
-       scan_nat_page(sbi, page, nid);
-       f2fs_put_page(page, 1);
-
-       if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
-               i++;
-               goto check_partial;
-       }
-out:
-       up_read(&nm_i->nat_tree_lock);
-       return 0;
-}
-
  static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
  {
         struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1980,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
  
                 if (nm_i->nid_cnt[FREE_NID_LIST])
                         return;
-
-               /* try to find free nids with nat_bits */
-               if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
-                       return;
-       }
-
-       /* find next valid candidate */
-       if (enabled_nat_bits(sbi, NULL)) {
-               int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
-                                       nm_i->nat_blocks, 0);
-
-               if (idx >= nm_i->nat_blocks)
-                       set_sbi_flag(sbi, SBI_NEED_FSCK);
-               else
-                       nid = idx * NAT_ENTRY_PER_BLOCK;
         }
  
         /* readahead nat pages to be scanned */
@@ -2081,7 +2026,7 @@ retry:
                 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
                 nm_i->available_nids--;
  
-               update_free_nid_bitmap(sbi, *nid, false);
+               update_free_nid_bitmap(sbi, *nid, false, false, false);
  
                 spin_unlock(&nm_i->nid_list_lock);
                 return true;
@@ -2137,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
  
         nm_i->available_nids++;
  
-       update_free_nid_bitmap(sbi, nid, true);
+       update_free_nid_bitmap(sbi, nid, true, false, false);
  
         spin_unlock(&nm_i->nid_list_lock);
  
@@ -2383,7 +2328,7 @@ add_out:
         list_add_tail(&nes->set_list, head);
  }
  
-void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
                                                 struct page *page)
  {
         struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2402,16 +2347,16 @@ void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
                         valid++;
         }
         if (valid == 0) {
-               set_bit_le(nat_index, nm_i->empty_nat_bits);
-               clear_bit_le(nat_index, nm_i->full_nat_bits);
+               __set_bit_le(nat_index, nm_i->empty_nat_bits);
+               __clear_bit_le(nat_index, nm_i->full_nat_bits);
                 return;
         }
  
-       clear_bit_le(nat_index, nm_i->empty_nat_bits);
+       __clear_bit_le(nat_index, nm_i->empty_nat_bits);
         if (valid == NAT_ENTRY_PER_BLOCK)
-               set_bit_le(nat_index, nm_i->full_nat_bits);
+               __set_bit_le(nat_index, nm_i->full_nat_bits);
         else
-               clear_bit_le(nat_index, nm_i->full_nat_bits);
+               __clear_bit_le(nat_index, nm_i->full_nat_bits);
  }
  
  static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -2467,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                         add_free_nid(sbi, nid, false);
                         spin_lock(&NM_I(sbi)->nid_list_lock);
                         NM_I(sbi)->available_nids++;
-                       update_free_nid_bitmap(sbi, nid, true);
+                       update_free_nid_bitmap(sbi, nid, true, false, false);
                         spin_unlock(&NM_I(sbi)->nid_list_lock);
                 } else {
                         spin_lock(&NM_I(sbi)->nid_list_lock);
-                       update_free_nid_bitmap(sbi, nid, false);
+                       update_free_nid_bitmap(sbi, nid, false, false, false);
                         spin_unlock(&NM_I(sbi)->nid_list_lock);
                 }
         }
@@ -2577,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
         return 0;
  }
  
+inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+{
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+       unsigned int i = 0;
+       nid_t nid, last_nid;
+
+       if (!enabled_nat_bits(sbi, NULL))
+               return;
+
+       for (i = 0; i < nm_i->nat_blocks; i++) {
+               i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+               if (i >= nm_i->nat_blocks)
+                       break;
+
+               __set_bit_le(i, nm_i->nat_block_bitmap);
+
+               nid = i * NAT_ENTRY_PER_BLOCK;
+               last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
+
+               spin_lock(&nm_i->free_nid_lock);
+               for (; nid < last_nid; nid++)
+                       update_free_nid_bitmap(sbi, nid, true, true, true);
+               spin_unlock(&nm_i->free_nid_lock);
+       }
+
+       for (i = 0; i < nm_i->nat_blocks; i++) {
+               i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+               if (i >= nm_i->nat_blocks)
+                       break;
+
+               __set_bit_le(i, nm_i->nat_block_bitmap);
+       }
+}
+
  static int init_node_manager(struct f2fs_sb_info *sbi)
  {
         struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
@@ -2638,7 +2617,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
         return 0;
  }
  
-int init_free_nid_cache(struct f2fs_sb_info *sbi)
+static int init_free_nid_cache(struct f2fs_sb_info *sbi)
  {
         struct f2fs_nm_info *nm_i = NM_I(sbi);
  
@@ -2651,6 +2630,14 @@ int init_free_nid_cache(struct f2fs_sb_info *sbi)
                                                                 GFP_KERNEL);
         if (!nm_i->nat_block_bitmap)
                 return -ENOMEM;
+
+       nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks *
+                                       sizeof(unsigned short), GFP_KERNEL);
+       if (!nm_i->free_nid_count)
+               return -ENOMEM;
+
+       spin_lock_init(&nm_i->free_nid_lock);
+
         return 0;
  }
  
@@ -2670,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi)
         if (err)
                 return err;
  
+       /* load free nid status from nat_bits table */
+       load_free_nid_bitmap(sbi);
+
         build_free_nids(sbi, true, true);
         return 0;
  }
@@ -2730,6 +2720,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
  
         kvfree(nm_i->nat_block_bitmap);
         kvfree(nm_i->free_nid_bitmap);
+       kvfree(nm_i->free_nid_count);
  
         kfree(nm_i->nat_bitmap);
         kfree(nm_i->nat_bits);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c

index 4bd7a8b19332d176d78b0a40c24e7bb12bbe2f5e..29ef7088c5582a480b6a1f7965fbbcca4f07e24e 100644 (file)
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1163,6 +1163,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                 if (f2fs_discard_en(sbi) &&
                         !f2fs_test_and_set_bit(offset, se->discard_map))
                         sbi->discard_blks--;
+
+               /* don't overwrite by SSR to keep node chain */
+               if (se->type == CURSEG_WARM_NODE) {
+                       if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
+                               se->ckpt_valid_blocks++;
+               }
         } else {
                 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
  #ifdef CONFIG_F2FS_CHECK_FS
diff --git a/fs/fat/inode.c b/fs/fat/inode.c

index 338d2f73eb29c8f1691a22a162e5929875bbf8cf..a2c05f2ada6dd86576df1dede141c05248126187 100644 (file)
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1359,6 +1359,16 @@ out:
         return 0;
  }
  
+static void fat_dummy_inode_init(struct inode *inode)
+{
+       /* Initialize this dummy inode to work as no-op. */
+       MSDOS_I(inode)->mmu_private = 0;
+       MSDOS_I(inode)->i_start = 0;
+       MSDOS_I(inode)->i_logstart = 0;
+       MSDOS_I(inode)->i_attrs = 0;
+       MSDOS_I(inode)->i_pos = 0;
+}
+
  static int fat_read_root(struct inode *inode)
  {
         struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1803,12 +1813,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
         fat_inode = new_inode(sb);
         if (!fat_inode)
                 goto out_fail;
-       MSDOS_I(fat_inode)->i_pos = 0;
+       fat_dummy_inode_init(fat_inode);
         sbi->fat_inode = fat_inode;
  
         fsinfo_inode = new_inode(sb);
         if (!fsinfo_inode)
                 goto out_fail;
+       fat_dummy_inode_init(fsinfo_inode);
         fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
         sbi->fsinfo_inode = fsinfo_inode;
         insert_inode_hash(fsinfo_inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index ef600591d96f9a42be98699025f4cf94ef8e7762..63ee2940775ce9c16daca5c2f7590e0c6e57bc07 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb)
         spin_unlock_bh(&wb->work_lock);
  }
  
+static void finish_writeback_work(struct bdi_writeback *wb,
+                                 struct wb_writeback_work *work)
+{
+       struct wb_completion *done = work->done;
+
+       if (work->auto_free)
+               kfree(work);
+       if (done && atomic_dec_and_test(&done->cnt))
+               wake_up_all(&wb->bdi->wb_waitq);
+}
+
  static void wb_queue_work(struct bdi_writeback *wb,
                           struct wb_writeback_work *work)
  {
         trace_writeback_queue(wb, work);
  
-       spin_lock_bh(&wb->work_lock);
-       if (!test_bit(WB_registered, &wb->state))
-               goto out_unlock;
         if (work->done)
                 atomic_inc(&work->done->cnt);
-       list_add_tail(&work->list, &wb->work_list);
-       mod_delayed_work(bdi_wq, &wb->dwork, 0);
-out_unlock:
+
+       spin_lock_bh(&wb->work_lock);
+
+       if (test_bit(WB_registered, &wb->state)) {
+               list_add_tail(&work->list, &wb->work_list);
+               mod_delayed_work(bdi_wq, &wb->dwork, 0);
+       } else
+               finish_writeback_work(wb, work);
+
         spin_unlock_bh(&wb->work_lock);
  }
  
@@ -1873,16 +1887,9 @@ static long wb_do_writeback(struct bdi_writeback *wb)
  
         set_bit(WB_writeback_running, &wb->state);
         while ((work = get_next_work_item(wb)) != NULL) {
-               struct wb_completion *done = work->done;
-
                 trace_writeback_exec(wb, work);
-
                 wrote += wb_writeback(wb, work);
-
-               if (work->auto_free)
-                       kfree(work);
-               if (done && atomic_dec_and_test(&done->cnt))
-                       wake_up_all(&wb->bdi->wb_waitq);
+               finish_writeback_work(wb, work);
         }
  
         /*
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c

index b681b43c766e11daf45814fc694b4b988f218a43..c2d7f3a92679dd371af2a5fe9f9c2dfe1e0f39b2 100644 (file)
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -382,9 +382,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
                         wake_up(&fc->blocked_waitq);
  
                 if (fc->num_background == fc->congestion_threshold &&
-                   fc->connected && fc->bdi_initialized) {
-                       clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
-                       clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
+                   fc->connected && fc->sb) {
+                       clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
+                       clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
                 }
                 fc->num_background--;
                 fc->active_background--;
@@ -573,10 +573,9 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
         fc->num_background++;
         if (fc->num_background == fc->max_background)
                 fc->blocked = 1;
-       if (fc->num_background == fc->congestion_threshold &&
-           fc->bdi_initialized) {
-               set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
-               set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
+       if (fc->num_background == fc->congestion_threshold && fc->sb) {
+               set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
+               set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
         }
         list_add_tail(&req->list, &fc->bg_queue);
         flush_bg_queue(fc);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h

index 32ac2c9b09c0302c99337c374263a6fa428b5cf4..f33341d9501a02079d1da148add1a23642c68a81 100644 (file)
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -527,9 +527,6 @@ struct fuse_conn {
         /** Filesystem supports NFS exporting.  Only set in INIT */
         unsigned export_support:1;
  
-       /** Set if bdi is valid */
-       unsigned bdi_initialized:1;
-
         /** write-back cache policy (default is write-through) */
         unsigned writeback_cache:1;
  
@@ -631,9 +628,6 @@ struct fuse_conn {
         /** Negotiated minor version */
         unsigned minor;
  
-       /** Backing dev info */
-       struct backing_dev_info bdi;
-
         /** Entry on the fuse_conn_list */
         struct list_head entry;
  
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c

index 6fe6a88ecb4afd9eaaad1b0c75fe961108efc64a..73cf051352521ad400af30befd3afbd2fc595c31 100644 (file)
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -386,12 +386,6 @@ static void fuse_send_destroy(struct fuse_conn *fc)
         }
  }
  
-static void fuse_bdi_destroy(struct fuse_conn *fc)
-{
-       if (fc->bdi_initialized)
-               bdi_destroy(&fc->bdi);
-}
-
  static void fuse_put_super(struct super_block *sb)
  {
         struct fuse_conn *fc = get_fuse_conn_super(sb);
@@ -403,7 +397,6 @@ static void fuse_put_super(struct super_block *sb)
         list_del(&fc->entry);
         fuse_ctl_remove_conn(fc);
         mutex_unlock(&fuse_mutex);
-       fuse_bdi_destroy(fc);
  
         fuse_conn_put(fc);
  }
@@ -928,7 +921,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                         fc->no_flock = 1;
                 }
  
-               fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
+               fc->sb->s_bdi->ra_pages =
+                               min(fc->sb->s_bdi->ra_pages, ra_pages);
                 fc->minor = arg->minor;
                 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
                 fc->max_write = max_t(unsigned, 4096, fc->max_write);
@@ -944,7 +938,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
  
         arg->major = FUSE_KERNEL_VERSION;
         arg->minor = FUSE_KERNEL_MINOR_VERSION;
-       arg->max_readahead = fc->bdi.ra_pages * PAGE_SIZE;
+       arg->max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
         arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
                 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
                 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
@@ -976,27 +970,18 @@ static void fuse_free_conn(struct fuse_conn *fc)
  static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
  {
         int err;
+       char *suffix = "";
  
-       fc->bdi.name = "fuse";
-       fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
-       /* fuse does it's own writeback accounting */
-       fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
-
-       err = bdi_init(&fc->bdi);
+       if (sb->s_bdev)
+               suffix = "-fuseblk";
+       err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
+                                  MINOR(fc->dev), suffix);
         if (err)
                 return err;
  
-       fc->bdi_initialized = 1;
-
-       if (sb->s_bdev) {
-               err =  bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
-                                   MAJOR(fc->dev), MINOR(fc->dev));
-       } else {
-               err = bdi_register_dev(&fc->bdi, fc->dev);
-       }
-
-       if (err)
-               return err;
+       sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
+       /* fuse does it's own writeback accounting */
+       sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
  
         /*
          * For a single fuse filesystem use max 1% of dirty +
@@ -1010,7 +995,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
          *
          *    /sys/class/bdi/<bdi>/max_ratio
          */
-       bdi_set_max_ratio(&fc->bdi, 1);
+       bdi_set_max_ratio(sb->s_bdi, 1);
  
         return 0;
  }
@@ -1113,8 +1098,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
         if (err)
                 goto err_dev_free;
  
-       sb->s_bdi = &fc->bdi;
-
         /* Handle umasking inside the fuse code */
         if (sb->s_flags & MS_POSIXACL)
                 fc->dont_mask = 1;
@@ -1182,7 +1165,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
   err_dev_free:
         fuse_dev_free(fud);
   err_put_conn:
-       fuse_bdi_destroy(fc);
         fuse_conn_put(fc);
   err_fput:
         fput(file);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h

index c45084ac642d1929058ea5d903ad796d574a45cc..511e1ed7e2ded7b0a9dc9882cffe0b66c37c96a2 100644 (file)
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -207,7 +207,7 @@ struct lm_lockname {
         struct gfs2_sbd *ln_sbd;
         u64 ln_number;
         unsigned int ln_type;
-};
+} __packed __aligned(sizeof(int));
  
  #define lm_name_equal(name1, name2) \
          (((name1)->ln_number == (name2)->ln_number) && \
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c

index b108e7ba81af73568b71ec0b163bf68fbbde8948..ed67548b286ccc58d84732d00af5ca281772bd39 100644 (file)
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -23,6 +23,7 @@
  #include <linux/quotaops.h>
  #include <linux/lockdep.h>
  #include <linux/module.h>
+#include <linux/backing-dev.h>
  
  #include "gfs2.h"
  #include "incore.h"
@@ -1222,12 +1223,7 @@ static int set_gfs2_super(struct super_block *s, void *data)
  {
         s->s_bdev = data;
         s->s_dev = s->s_bdev->bd_dev;
-
-       /*
-        * We set the bdi here to the queue backing, file systems can
-        * overwrite this in ->fill_super()
-        */
-       s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info;
+       s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
         return 0;
  }
  
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index 8f96461236f655c3eef66694d45d9c4381a58210..dde861387a407810b35f73ff37c3ce6389048326 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
         vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
         vma->vm_ops = &hugetlb_vm_ops;
  
+       /*
+        * Offset passed to mmap (before page shift) could have been
+        * negative when represented as a (l)off_t.
+        */
+       if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
+               return -EINVAL;
+
         if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
                 return -EINVAL;
  
         vma_len = (loff_t)(vma->vm_end - vma->vm_start);
+       len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+       /* check for overflow */
+       if (len < vma_len)
+               return -EINVAL;
  
         inode_lock(inode);
         file_accessed(file);
  
         ret = -ENOMEM;
-       len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
-
         if (hugetlb_reserve_pages(inode,
                                 vma->vm_pgoff >> huge_page_order(h),
                                 len >> huge_page_shift(h), vma,
@@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
  
         ret = 0;
         if (vma->vm_flags & VM_WRITE && inode->i_size < len)
-               inode->i_size = len;
+               i_size_write(inode, len);
  out:
         inode_unlock(inode);
  
@@ -695,14 +704,11 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
  
         inode = new_inode(sb);
         if (inode) {
-               struct hugetlbfs_inode_info *info;
                 inode->i_ino = get_next_ino();
                 inode->i_mode = S_IFDIR | config->mode;
                 inode->i_uid = config->uid;
                 inode->i_gid = config->gid;
                 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
-               info = HUGETLBFS_I(inode);
-               mpol_shared_policy_init(&info->policy, NULL);
                 inode->i_op = &hugetlbfs_dir_inode_operations;
                 inode->i_fop = &simple_dir_operations;
                 /* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -733,7 +739,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
  
         inode = new_inode(sb);
         if (inode) {
-               struct hugetlbfs_inode_info *info;
                 inode->i_ino = get_next_ino();
                 inode_init_owner(inode, dir, mode);
                 lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
@@ -741,15 +746,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
                 inode->i_mapping->a_ops = &hugetlbfs_aops;
                 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
                 inode->i_mapping->private_data = resv_map;
-               info = HUGETLBFS_I(inode);
-               /*
-                * The policy is initialized here even if we are creating a
-                * private inode because initialization simply creates an
-                * an empty rb tree and calls rwlock_init(), later when we
-                * call mpol_free_shared_policy() it will just return because
-                * the rb tree will still be empty.
-                */
-               mpol_shared_policy_init(&info->policy, NULL);
                 switch (mode & S_IFMT) {
                 default:
                         init_special_inode(inode, mode, dev);
@@ -937,6 +933,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
                 hugetlbfs_inc_free_inodes(sbinfo);
                 return NULL;
         }
+
+       /*
+        * Any time after allocation, hugetlbfs_destroy_inode can be called
+        * for the inode.  mpol_free_shared_policy is unconditionally called
+        * as part of hugetlbfs_destroy_inode.  So, initialize policy here
+        * in case of a quick call to destroy.
+        *
+        * Note that the policy is initialized even if we are creating a
+        * private inode.  This simplifies hugetlbfs_destroy_inode.
+        */
+       mpol_shared_policy_init(&p->policy, NULL);
+
         return &p->vfs_inode;
  }
  
diff --git a/fs/iomap.c b/fs/iomap.c

index 3ca1a8e44135ed757bc309cd750899d51f093970..141c3cd55a8b2d974f431d7710fbe4de58f78355 100644 (file)
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
         struct address_space *mapping = iocb->ki_filp->f_mapping;
         struct inode *inode = file_inode(iocb->ki_filp);
         size_t count = iov_iter_count(iter);
-       loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0;
+       loff_t pos = iocb->ki_pos, start = pos;
+       loff_t end = iocb->ki_pos + count - 1, ret = 0;
         unsigned int flags = IOMAP_DIRECT;
         struct blk_plug plug;
         struct iomap_dio *dio;
@@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
         }
  
         if (mapping->nrpages) {
-               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+               ret = filemap_write_and_wait_range(mapping, start, end);
                 if (ret)
                         goto out_free_dio;
  
                 ret = invalidate_inode_pages2_range(mapping,
-                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+                               start >> PAGE_SHIFT, end >> PAGE_SHIFT);
                 WARN_ON_ONCE(ret);
                 ret = 0;
         }
@@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                 __set_current_state(TASK_RUNNING);
         }
  
+       ret = iomap_dio_complete(dio);
+
         /*
          * Try again to invalidate clean pages which might have been cached by
          * non-direct readahead, or faulted in by get_user_pages() if the source
@@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
          * this invalidation fails, tough, the write still worked...
          */
         if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
-               ret = invalidate_inode_pages2_range(mapping,
-                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
-               WARN_ON_ONCE(ret);
+               int err = invalidate_inode_pages2_range(mapping,
+                               start >> PAGE_SHIFT, end >> PAGE_SHIFT);
+               WARN_ON_ONCE(err);
         }
  
-       return iomap_dio_complete(dio);
+       return ret;
  
  out_free_dio:
         kfree(dio);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c

index a1a359bfcc9cd4ff84254e464788ab3031dfe90f..5adc2fb62b0fab89899e5d0acba1e8019a73c766 100644 (file)
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1125,10 +1125,8 @@ static journal_t *journal_init_common(struct block_device *bdev,
  
         /* Set up a default-sized revoke table for the new mount. */
         err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
-       if (err) {
-               kfree(journal);
-               return NULL;
-       }
+       if (err)
+               goto err_cleanup;
  
         spin_lock_init(&journal->j_history_lock);
  
@@ -1145,23 +1143,25 @@ static journal_t *journal_init_common(struct block_device *bdev,
         journal->j_wbufsize = n;
         journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
                                         GFP_KERNEL);
-       if (!journal->j_wbuf) {
-               kfree(journal);
-               return NULL;
-       }
+       if (!journal->j_wbuf)
+               goto err_cleanup;
  
         bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
         if (!bh) {
                 pr_err("%s: Cannot get buffer for journal superblock\n",
                         __func__);
-               kfree(journal->j_wbuf);
-               kfree(journal);
-               return NULL;
+               goto err_cleanup;
         }
         journal->j_sb_buffer = bh;
         journal->j_superblock = (journal_superblock_t *)bh->b_data;
  
         return journal;
+
+err_cleanup:
+       kfree(journal->j_wbuf);
+       jbd2_journal_destroy_revoke(journal);
+       kfree(journal);
+       return NULL;
  }
  
  /* jbd2_journal_init_dev and jbd2_journal_init_inode:
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c

index cfc38b5521189f8ff64330ff33aa6ac8c25794ec..f9aefcda585418abcc37e58226eb39f3ac883172 100644 (file)
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -280,6 +280,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
  
  fail1:
         jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
+       journal->j_revoke_table[0] = NULL;
  fail0:
         return -ENOMEM;
  }
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c

index 8e4dc7ab584c2df9bf802c2827dacb03ef534097..ac2dfe0c5a9c8520aa8b40c4bab97b710799cc78 100644 (file)
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -809,7 +809,8 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
                 if (kn->flags & KERNFS_HAS_MMAP)
                         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
  
-               kernfs_release_file(kn, of);
+               if (kn->flags & KERNFS_HAS_RELEASE)
+                       kernfs_release_file(kn, of);
         }
  
         mutex_unlock(&kernfs_open_file_mutex);
diff --git a/fs/namei.c b/fs/namei.c

index d41fab78798b2e2510ca4f8b54925ef304a14c7d..19dcf62133cc95162d364f7ef43c17d280ee6448 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2145,6 +2145,9 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
         int retval = 0;
         const char *s = nd->name->name;
  
+       if (!*s)
+               flags &= ~LOOKUP_RCU;
+
         nd->last_type = LAST_ROOT; /* if there are only slashes... */
         nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
         nd->depth = 0;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c

index d5606099712a4cb2059c4dac9036aba35e9c3751..6d0f14c8609971378ee75104380873a6da67a4be 100644 (file)
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -554,12 +554,11 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
         sb->s_magic = NCP_SUPER_MAGIC;
         sb->s_op = &ncp_sops;
         sb->s_d_op = &ncp_dentry_operations;
-       sb->s_bdi = &server->bdi;
  
         server = NCP_SBP(sb);
         memset(server, 0, sizeof(*server));
  
-       error = bdi_setup_and_register(&server->bdi, "ncpfs");
+       error = super_setup_bdi(sb);
         if (error)
                 goto out_fput;
  
@@ -568,7 +567,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
         if (data.info_fd != -1) {
                 struct socket *info_sock = sockfd_lookup(data.info_fd, &error);
                 if (!info_sock)
-                       goto out_bdi;
+                       goto out_fput;
                 server->info_sock = info_sock;
                 error = -EBADFD;
                 if (info_sock->type != SOCK_STREAM)
@@ -746,8 +745,6 @@ out_nls:
  out_fput2:
         if (server->info_sock)
                 sockfd_put(server->info_sock);
-out_bdi:
-       bdi_destroy(&server->bdi);
  out_fput:
         sockfd_put(sock);
  out:
@@ -788,7 +785,6 @@ static void ncp_put_super(struct super_block *sb)
         kill_pid(server->m.wdog_pid, SIGTERM, 1);
         put_pid(server->m.wdog_pid);
  
-       bdi_destroy(&server->bdi);
         kfree(server->priv.data);
         kfree(server->auth.object_name);
         vfree(server->rxbuf);
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h

index 55e26fd8088694308aeee3ceff1d11b91d6b1fb5..366fd63cc506fc3e61be93877349ef6032cf5734 100644 (file)
--- a/fs/ncpfs/ncp_fs_sb.h
+++ b/fs/ncpfs/ncp_fs_sb.h
@@ -143,7 +143,6 @@ struct ncp_server {
                 size_t len;
                 __u8 data[128];
         } unexpected_packet;
-       struct backing_dev_info bdi;
  };
  
  extern void ncp_tcp_rcv_proc(struct work_struct *work);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c

index bb79972dc638ba8bf27beef1930deeb186820af5..773774531aff5fc081610706ea39756b0e5a5c25 100644 (file)
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -232,12 +232,12 @@ static struct svc_serv_ops nfs41_cb_sv_ops = {
         .svo_module             = THIS_MODULE,
  };
  
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
         [0] = &nfs40_cb_sv_ops,
         [1] = &nfs41_cb_sv_ops,
  };
  #else
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
         [0] = &nfs40_cb_sv_ops,
         [1] = NULL,
  };
diff --git a/fs/nfs/client.c b/fs/nfs/client.c

index 91a8d610ba0fa6db7cc76458ec2514aec9b124db..04d15a0045e37173c124f78771243f8b444f3dce 100644 (file)
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -325,10 +325,33 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
         return NULL;
  }
  
-static bool nfs_client_init_is_complete(const struct nfs_client *clp)
+/*
+ * Return true if @clp is done initializing, false if still working on it.
+ *
+ * Use nfs_client_init_status to check if it was successful.
+ */
+bool nfs_client_init_is_complete(const struct nfs_client *clp)
  {
         return clp->cl_cons_state <= NFS_CS_READY;
  }
+EXPORT_SYMBOL_GPL(nfs_client_init_is_complete);
+
+/*
+ * Return 0 if @clp was successfully initialized, -errno otherwise.
+ *
+ * This must be called *after* nfs_client_init_is_complete() returns true,
+ * otherwise it will pop WARN_ON_ONCE and return -EINVAL
+ */
+int nfs_client_init_status(const struct nfs_client *clp)
+{
+       /* called without checking nfs_client_init_is_complete */
+       if (clp->cl_cons_state > NFS_CS_READY) {
+               WARN_ON_ONCE(1);
+               return -EINVAL;
+       }
+       return clp->cl_cons_state;
+}
+EXPORT_SYMBOL_GPL(nfs_client_init_status);
  
  int nfs_wait_client_init_complete(const struct nfs_client *clp)
  {
@@ -738,9 +761,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
                 server->rsize = NFS_MAX_FILE_IO_SIZE;
         server->rpages = (server->rsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
  
-       server->backing_dev_info.name = "nfs";
-       server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
-
         if (server->wsize > max_rpc_payload)
                 server->wsize = max_rpc_payload;
         if (server->wsize > NFS_MAX_FILE_IO_SIZE)
@@ -894,12 +914,6 @@ struct nfs_server *nfs_alloc_server(void)
                 return NULL;
         }
  
-       if (bdi_init(&server->backing_dev_info)) {
-               nfs_free_iostats(server->io_stats);
-               kfree(server);
-               return NULL;
-       }
-
         ida_init(&server->openowner_id);
         ida_init(&server->lockowner_id);
         pnfs_init_server(server);
@@ -930,7 +944,6 @@ void nfs_free_server(struct nfs_server *server)
         ida_destroy(&server->lockowner_id);
         ida_destroy(&server->openowner_id);
         nfs_free_iostats(server->io_stats);
-       bdi_destroy(&server->backing_dev_info);
         kfree(server);
         nfs_release_automount_timer();
         dprintk("<-- nfs_free_server()\n");
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index fb499a3f21b58ed341bbe17933bd5e191c850212..f92ba8d6c5569099f6c469eda92446ad0d7e148d 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2055,7 +2055,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
  {
         struct inode *old_inode = d_inode(old_dentry);
         struct inode *new_inode = d_inode(new_dentry);
-       struct dentry *dentry = NULL, *rehash = NULL;
+       struct dentry *dentry = NULL;
         struct rpc_task *task;
         int error = -EBUSY;
  
@@ -2078,10 +2078,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                  * To prevent any new references to the target during the
                  * rename, we unhash the dentry in advance.
                  */
-               if (!d_unhashed(new_dentry)) {
+               if (!d_unhashed(new_dentry))
                         d_drop(new_dentry);
-                       rehash = new_dentry;
-               }
  
                 if (d_count(new_dentry) > 2) {
                         int err;
@@ -2098,7 +2096,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                 goto out;
  
                         new_dentry = dentry;
-                       rehash = NULL;
                         new_inode = NULL;
                 }
         }
@@ -2119,8 +2116,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 error = task->tk_status;
         rpc_put_task(task);
  out:
-       if (rehash)
-               d_rehash(rehash);
         trace_nfs_rename_exit(old_dir, old_dentry,
                         new_dir, new_dentry, error);
         /* new dentry created? */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c

index aab32fc3d6a84965ea879854c942b12888548411..c1b5fed7c863b2b730e46f0139a7b6f9a5f68fa0 100644 (file)
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -537,7 +537,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
  
         if (put_dreq(dreq))
                 nfs_direct_complete(dreq);
-       return 0;
+       return requested_bytes;
  }
  
  /**
@@ -566,7 +566,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
         struct inode *inode = mapping->host;
         struct nfs_direct_req *dreq;
         struct nfs_lock_context *l_ctx;
-       ssize_t result = -EINVAL;
+       ssize_t result = -EINVAL, requested;
         size_t count = iov_iter_count(iter);
         nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
  
@@ -600,14 +600,19 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
         nfs_start_io_direct(inode);
  
         NFS_I(inode)->read_io += count;
-       result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
+       requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
  
         nfs_end_io_direct(inode);
  
-       if (!result) {
+       if (requested > 0) {
                 result = nfs_direct_wait(dreq);
-               if (result > 0)
+               if (result > 0) {
+                       requested -= result;
                         iocb->ki_pos += result;
+               }
+               iov_iter_revert(iter, requested);
+       } else {
+               result = requested;
         }
  
  out_release:
@@ -954,7 +959,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
  
         if (put_dreq(dreq))
                 nfs_direct_write_complete(dreq);
-       return 0;
+       return requested_bytes;
  }
  
  /**
@@ -979,7 +984,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
   */
  ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
  {
-       ssize_t result = -EINVAL;
+       ssize_t result = -EINVAL, requested;
         size_t count;
         struct file *file = iocb->ki_filp;
         struct address_space *mapping = file->f_mapping;
@@ -1022,7 +1027,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
  
         nfs_start_io_direct(inode);
  
-       result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
+       requested = nfs_direct_write_schedule_iovec(dreq, iter, pos);
  
         if (mapping->nrpages) {
                 invalidate_inode_pages2_range(mapping,
@@ -1031,13 +1036,17 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
  
         nfs_end_io_direct(inode);
  
-       if (!result) {
+       if (requested > 0) {
                 result = nfs_direct_wait(dreq);
                 if (result > 0) {
+                       requested -= result;
                         iocb->ki_pos = pos + result;
                         /* XXX: should check the generic_write_sync retval */
                         generic_write_sync(iocb, result);
                 }
+               iov_iter_revert(iter, requested);
+       } else {
+               result = requested;
         }
  out_release:
         nfs_direct_req_release(dreq);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c

index 44347f4bdc1516f54f030ca9f0d95332ab816116..acd30baca46166c902aa5dfae1663184cc30e235 100644 (file)
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -202,10 +202,10 @@ static int filelayout_async_handle_error(struct rpc_task *task,
                         task->tk_status);
                 nfs4_mark_deviceid_unavailable(devid);
                 pnfs_error_mark_layout_for_return(inode, lseg);
-               pnfs_set_lo_fail(lseg);
                 rpc_wake_up(&tbl->slot_tbl_waitq);
                 /* fall through */
         default:
+               pnfs_set_lo_fail(lseg);
  reset:
                 dprintk("%s Retry through MDS. Error %d\n", __func__,
                         task->tk_status);
@@ -560,6 +560,50 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
         return PNFS_ATTEMPTED;
  }
  
+static int
+filelayout_check_deviceid(struct pnfs_layout_hdr *lo,
+                         struct nfs4_filelayout_segment *fl,
+                         gfp_t gfp_flags)
+{
+       struct nfs4_deviceid_node *d;
+       struct nfs4_file_layout_dsaddr *dsaddr;
+       int status = -EINVAL;
+
+       /* find and reference the deviceid */
+       d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid,
+                       lo->plh_lc_cred, gfp_flags);
+       if (d == NULL)
+               goto out;
+
+       dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+       /* Found deviceid is unavailable */
+       if (filelayout_test_devid_unavailable(&dsaddr->id_node))
+               goto out_put;
+
+       fl->dsaddr = dsaddr;
+
+       if (fl->first_stripe_index >= dsaddr->stripe_count) {
+               dprintk("%s Bad first_stripe_index %u\n",
+                               __func__, fl->first_stripe_index);
+               goto out_put;
+       }
+
+       if ((fl->stripe_type == STRIPE_SPARSE &&
+           fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
+           (fl->stripe_type == STRIPE_DENSE &&
+           fl->num_fh != dsaddr->stripe_count)) {
+               dprintk("%s num_fh %u not valid for given packing\n",
+                       __func__, fl->num_fh);
+               goto out_put;
+       }
+       status = 0;
+out:
+       return status;
+out_put:
+       nfs4_fl_put_deviceid(dsaddr);
+       goto out;
+}
+
  /*
   * filelayout_check_layout()
   *
@@ -572,11 +616,8 @@ static int
  filelayout_check_layout(struct pnfs_layout_hdr *lo,
                         struct nfs4_filelayout_segment *fl,
                         struct nfs4_layoutget_res *lgr,
-                       struct nfs4_deviceid *id,
                         gfp_t gfp_flags)
  {
-       struct nfs4_deviceid_node *d;
-       struct nfs4_file_layout_dsaddr *dsaddr;
         int status = -EINVAL;
  
         dprintk("--> %s\n", __func__);
@@ -601,41 +642,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
                 goto out;
         }
  
-       /* find and reference the deviceid */
-       d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), id,
-                       lo->plh_lc_cred, gfp_flags);
-       if (d == NULL)
-               goto out;
-
-       dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
-       /* Found deviceid is unavailable */
-       if (filelayout_test_devid_unavailable(&dsaddr->id_node))
-               goto out_put;
-
-       fl->dsaddr = dsaddr;
-
-       if (fl->first_stripe_index >= dsaddr->stripe_count) {
-               dprintk("%s Bad first_stripe_index %u\n",
-                               __func__, fl->first_stripe_index);
-               goto out_put;
-       }
-
-       if ((fl->stripe_type == STRIPE_SPARSE &&
-           fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
-           (fl->stripe_type == STRIPE_DENSE &&
-           fl->num_fh != dsaddr->stripe_count)) {
-               dprintk("%s num_fh %u not valid for given packing\n",
-                       __func__, fl->num_fh);
-               goto out_put;
-       }
-
         status = 0;
  out:
         dprintk("--> %s returns %d\n", __func__, status);
         return status;
-out_put:
-       nfs4_fl_put_deviceid(dsaddr);
-       goto out;
  }
  
  static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
@@ -657,7 +667,6 @@ static int
  filelayout_decode_layout(struct pnfs_layout_hdr *flo,
                          struct nfs4_filelayout_segment *fl,
                          struct nfs4_layoutget_res *lgr,
-                        struct nfs4_deviceid *id,
                          gfp_t gfp_flags)
  {
         struct xdr_stream stream;
@@ -682,9 +691,9 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
         if (unlikely(!p))
                 goto out_err;
  
-       memcpy(id, p, sizeof(*id));
+       memcpy(&fl->deviceid, p, sizeof(fl->deviceid));
         p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
-       nfs4_print_deviceid(id);
+       nfs4_print_deviceid(&fl->deviceid);
  
         nfl_util = be32_to_cpup(p++);
         if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
@@ -831,15 +840,14 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
  {
         struct nfs4_filelayout_segment *fl;
         int rc;
-       struct nfs4_deviceid id;
  
         dprintk("--> %s\n", __func__);
         fl = kzalloc(sizeof(*fl), gfp_flags);
         if (!fl)
                 return NULL;
  
-       rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags);
-       if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) {
+       rc = filelayout_decode_layout(layoutid, fl, lgr, gfp_flags);
+       if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, gfp_flags)) {
                 _filelayout_free_lseg(fl);
                 return NULL;
         }
@@ -888,18 +896,51 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
         return min(stripe_unit - (unsigned int)stripe_offset, size);
  }
  
+static struct pnfs_layout_segment *
+fl_pnfs_update_layout(struct inode *ino,
+                     struct nfs_open_context *ctx,
+                     loff_t pos,
+                     u64 count,
+                     enum pnfs_iomode iomode,
+                     bool strict_iomode,
+                     gfp_t gfp_flags)
+{
+       struct pnfs_layout_segment *lseg = NULL;
+       struct pnfs_layout_hdr *lo;
+       struct nfs4_filelayout_segment *fl;
+       int status;
+
+       lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
+                                 gfp_flags);
+       if (!lseg)
+               lseg = ERR_PTR(-ENOMEM);
+       if (IS_ERR(lseg))
+               goto out;
+
+       lo = NFS_I(ino)->layout;
+       fl = FILELAYOUT_LSEG(lseg);
+
+       status = filelayout_check_deviceid(lo, fl, gfp_flags);
+       if (status)
+               lseg = ERR_PTR(status);
+out:
+       if (IS_ERR(lseg))
+               pnfs_put_lseg(lseg);
+       return lseg;
+}
+
  static void
  filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
                         struct nfs_page *req)
  {
         if (!pgio->pg_lseg) {
-               pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                          req->wb_context,
-                                          0,
-                                          NFS4_MAX_UINT64,
-                                          IOMODE_READ,
-                                          false,
-                                          GFP_KERNEL);
+               pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
+                                                     req->wb_context,
+                                                     0,
+                                                     NFS4_MAX_UINT64,
+                                                     IOMODE_READ,
+                                                     false,
+                                                     GFP_KERNEL);
                 if (IS_ERR(pgio->pg_lseg)) {
                         pgio->pg_error = PTR_ERR(pgio->pg_lseg);
                         pgio->pg_lseg = NULL;
@@ -919,13 +960,13 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
         int status;
  
         if (!pgio->pg_lseg) {
-               pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                          req->wb_context,
-                                          0,
-                                          NFS4_MAX_UINT64,
-                                          IOMODE_RW,
-                                          false,
-                                          GFP_NOFS);
+               pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
+                                                     req->wb_context,
+                                                     0,
+                                                     NFS4_MAX_UINT64,
+                                                     IOMODE_RW,
+                                                     false,
+                                                     GFP_NOFS);
                 if (IS_ERR(pgio->pg_lseg)) {
                         pgio->pg_error = PTR_ERR(pgio->pg_lseg);
                         pgio->pg_lseg = NULL;
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h

index 2896cb833a11375e064f926c3a50b5cbfd499f39..79323b5dab0cb38a212318d774273cb7ce88187f 100644 (file)
--- a/fs/nfs/filelayout/filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -55,15 +55,16 @@ struct nfs4_file_layout_dsaddr {
  };
  
  struct nfs4_filelayout_segment {
-       struct pnfs_layout_segment generic_hdr;
-       u32 stripe_type;
-       u32 commit_through_mds;
-       u32 stripe_unit;
-       u32 first_stripe_index;
-       u64 pattern_offset;
-       struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
-       unsigned int num_fh;
-       struct nfs_fh **fh_array;
+       struct pnfs_layout_segment      generic_hdr;
+       u32                             stripe_type;
+       u32                             commit_through_mds;
+       u32                             stripe_unit;
+       u32                             first_stripe_index;
+       u64                             pattern_offset;
+       struct nfs4_deviceid            deviceid;
+       struct nfs4_file_layout_dsaddr  *dsaddr; /* Point to GETDEVINFO data */
+       unsigned int                    num_fh;
+       struct nfs_fh                   **fh_array;
  };
  
  struct nfs4_filelayout {
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c

index f956ca20a8a3595e36e6cae0e913dc90a47b1e22..d913e818858f3fee8d7d5c199714d2d79b1bef39 100644 (file)
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -266,6 +266,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
         struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
         struct nfs4_pnfs_ds *ret = ds;
         struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
+       int status;
  
         if (ds == NULL) {
                 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
@@ -277,9 +278,14 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
         if (ds->ds_clp)
                 goto out_test_devid;
  
-       nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+       status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
                              dataserver_retrans, 4,
                              s->nfs_client->cl_minorversion);
+       if (status) {
+               nfs4_mark_deviceid_unavailable(devid);
+               ret = NULL;
+               goto out;
+       }
  
  out_test_devid:
         if (ret->ds_clp == NULL ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h

index f4f39b0ab09b25170ed1f9f9a9a961ecadb9a5d2..98b34c9b0564b348615a0d560b863c11cd17ad5e 100644 (file)
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -175,7 +175,19 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
  static inline bool
  ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
  {
-       return nfs4_test_deviceid_unavailable(node);
+       /*
+        * Flexfiles should never mark a DS unavailable, but if it does
+        * print a (ratelimited) warning as this can affect performance.
+        */
+       if (nfs4_test_deviceid_unavailable(node)) {
+               u32 *p = (u32 *)node->deviceid.data;
+
+               pr_warn_ratelimited("NFS: flexfiles layout referencing an "
+                               "unavailable device [%x%x%x%x]\n",
+                               p[0], p[1], p[2], p[3]);
+               return true;
+       }
+       return false;
  }
  
  static inline int
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c

index e5a6f248697b369003e89ed526608d7cd2a296eb..457cfeb1d5c162e4177450eb941460a2fe39f3b1 100644 (file)
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -208,6 +208,10 @@ static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg,
                 } else
                         goto outerr;
         }
+
+       if (IS_ERR(mirror->mirror_ds))
+               goto outerr;
+
         if (mirror->mirror_ds->ds == NULL) {
                 struct nfs4_deviceid_node *devid;
                 devid = &mirror->mirror_ds->id_node;
@@ -384,6 +388,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
         struct inode *ino = lseg->pls_layout->plh_inode;
         struct nfs_server *s = NFS_SERVER(ino);
         unsigned int max_payload;
+       int status;
  
         if (!ff_layout_mirror_valid(lseg, mirror, true)) {
                 pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
@@ -404,7 +409,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
         /* FIXME: For now we assume the server sent only one version of NFS
          * to use for the DS.
          */
-       nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+       status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
                              dataserver_retrans,
                              mirror->mirror_ds->ds_versions[0].version,
                              mirror->mirror_ds->ds_versions[0].minor_version);
@@ -420,11 +425,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
                         mirror->mirror_ds->ds_versions[0].wsize = max_payload;
                 goto out;
         }
+out_fail:
         ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
                                  mirror, lseg->pls_range.offset,
                                  lseg->pls_range.length, NFS4ERR_NXIO,
                                  OP_ILLEGAL, GFP_NOIO);
-out_fail:
         if (fail_return || !ff_layout_has_available_ds(lseg))
                 pnfs_error_mark_layout_for_return(ino, lseg);
         ds = NULL;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h

index 09ca5095c04e427c881785170aefe7fdf58e7621..9dc65d7ae7541902e4235abbc5a5cc8285fc7f30 100644 (file)
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -139,7 +139,7 @@ struct nfs_mount_request {
  };
  
  struct nfs_mount_info {
-       void (*fill_super)(struct super_block *, struct nfs_mount_info *);
+       int (*fill_super)(struct super_block *, struct nfs_mount_info *);
         int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
         struct nfs_parsed_mount_data *parsed;
         struct nfs_clone_mount *cloned;
@@ -186,6 +186,8 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
                                            struct nfs_fh *,
                                            struct nfs_fattr *,
                                            rpc_authflavor_t);
+extern bool nfs_client_init_is_complete(const struct nfs_client *clp);
+extern int nfs_client_init_status(const struct nfs_client *clp);
  extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
  extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
  extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
@@ -405,7 +407,7 @@ struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *
  struct dentry * nfs_xdev_mount_common(struct file_system_type *, int,
                 const char *, struct nfs_mount_info *);
  void nfs_kill_super(struct super_block *);
-void nfs_fill_super(struct super_block *, struct nfs_mount_info *);
+int nfs_fill_super(struct super_block *, struct nfs_mount_info *);
  
  extern struct rpc_stat nfs_rpcstat;
  
@@ -456,7 +458,7 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
  extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
  
  /* super.c */
-void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
+int nfs_clone_super(struct super_block *, struct nfs_mount_info *);
  void nfs_umount_begin(struct super_block *);
  int  nfs_statfs(struct dentry *, struct kstatfs *);
  int  nfs_show_options(struct seq_file *, struct dentry *);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c

index 5ae9d64ea08bc80c97c7c4c5b71ee73ef1a6ba8b..8346ccbf2d52e518b6fa61d0c8cbb3d033ec1f02 100644 (file)
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1023,9 +1023,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
         server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
         server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
  
-       if (server->rsize > server_resp_sz)
+       if (!server->rsize || server->rsize > server_resp_sz)
                 server->rsize = server_resp_sz;
-       if (server->wsize > server_rqst_sz)
+       if (!server->wsize || server->wsize > server_rqst_sz)
                 server->wsize = server_rqst_sz;
  #endif /* CONFIG_NFS_V4_1 */
  }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index 1b183686c6d4f06c3b1d4ed044c527bff6ba4a83..201ca3f2c4bac14986220fcdf6a6c37b734ffa96 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2258,8 +2258,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
         if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0)
                 return 0;
  
-       /* even though OPEN succeeded, access is denied. Close the file */
-       nfs4_close_state(state, fmode);
         return -EACCES;
  }
  
@@ -2444,17 +2442,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
         }
  
         nfs4_stateid_copy(&stateid, &delegation->stateid);
-       if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+       if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) ||
+               !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
+                       &delegation->flags)) {
                 rcu_read_unlock();
                 nfs_finish_clear_delegation_stateid(state, &stateid);
                 return;
         }
  
-       if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) {
-               rcu_read_unlock();
-               return;
-       }
-
         cred = get_rpccred(delegation->cred);
         rcu_read_unlock();
         status = nfs41_test_and_free_expired_stateid(server, &stateid, cred);
@@ -7427,11 +7422,11 @@ static void nfs4_exchange_id_release(void *data)
         struct nfs41_exchange_id_data *cdata =
                                         (struct nfs41_exchange_id_data *)data;
  
-       nfs_put_client(cdata->args.client);
         if (cdata->xprt) {
                 xprt_put(cdata->xprt);
                 rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient);
         }
+       nfs_put_client(cdata->args.client);
         kfree(cdata->res.impl_id);
         kfree(cdata->res.server_scope);
         kfree(cdata->res.server_owner);
@@ -7538,10 +7533,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
         task_setup_data.callback_data = calldata;
  
         task = rpc_run_task(&task_setup_data);
-       if (IS_ERR(task)) {
-       status = PTR_ERR(task);
-               goto out_impl_id;
-       }
+       if (IS_ERR(task))
+               return PTR_ERR(task);
  
         if (!xprt) {
                 status = rpc_wait_for_completion_task(task);
@@ -7569,6 +7562,7 @@ out_server_owner:
         kfree(calldata->res.server_owner);
  out_calldata:
         kfree(calldata);
+       nfs_put_client(clp);
         goto out;
  }
  
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c

index f0369e36275341404db0684aebb4e9bdba273205..80ce289eea05326336a7edecbe8a132ee4900d23 100644 (file)
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3942,7 +3942,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
                 if (len <= 0)
                         goto out;
                 dprintk("%s: name=%s\n", __func__, group_name->data);
-               return NFS_ATTR_FATTR_OWNER_NAME;
+               return NFS_ATTR_FATTR_GROUP_NAME;
         } else {
                 len = xdr_stream_decode_opaque_inline(xdr, (void **)&p,
                                 XDR_MAX_NETOBJ);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h

index 63f77b49a586a53a1abbcf7b517aa2a90f3ddb2e..590e1e35781f0b737b5b277d76ab56092f8e3f3b 100644 (file)
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -367,7 +367,7 @@ void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
  struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
                                       gfp_t gfp_flags);
  void nfs4_pnfs_v3_ds_connect_unload(void);
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
                           struct nfs4_deviceid_node *devid, unsigned int timeo,
                           unsigned int retrans, u32 version, u32 minor_version);
  struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c

index 9414b492439fbf0e70d32f9238ac29b8e9cf50be..7250b95549ecc73bd1dbdae9ec909aac64f93a49 100644 (file)
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -745,15 +745,17 @@ out:
  /*
   * Create an rpc connection to the nfs4_pnfs_ds data server.
   * Currently only supports IPv4 and IPv6 addresses.
- * If connection fails, make devid unavailable.
+ * If connection fails, make devid unavailable and return a -errno.
   */
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
                           struct nfs4_deviceid_node *devid, unsigned int timeo,
                           unsigned int retrans, u32 version, u32 minor_version)
  {
-       if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
-               int err = 0;
+       int err;
  
+again:
+       err = 0;
+       if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
                 if (version == 3) {
                         err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
                                                        retrans);
@@ -766,12 +768,29 @@ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
                         err = -EPROTONOSUPPORT;
                 }
  
-               if (err)
-                       nfs4_mark_deviceid_unavailable(devid);
                 nfs4_clear_ds_conn_bit(ds);
         } else {
                 nfs4_wait_ds_connect(ds);
+
+               /* what was waited on didn't connect AND didn't mark unavail */
+               if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
+                       goto again;
         }
+
+       /*
+        * At this point the ds->ds_clp should be ready, but it might have
+        * hit an error.
+        */
+       if (!err) {
+               if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
+                       WARN_ON_ONCE(ds->ds_clp ||
+                               !nfs4_test_deviceid_unavailable(devid));
+                       return -EINVAL;
+               }
+               err = nfs_client_init_status(ds->ds_clp);
+       }
+
+       return err;
  }
  EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
  
diff --git a/fs/nfs/super.c b/fs/nfs/super.c

index 54e0f9f2dd94903d2d6d96de46287ebc31fe1b6e..dc69314d455e7091e189362edcb02bfdd2508d63 100644 (file)
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2315,18 +2315,17 @@ inline void nfs_initialise_sb(struct super_block *sb)
                 sb->s_blocksize = nfs_block_bits(server->wsize,
                                                  &sb->s_blocksize_bits);
  
-       sb->s_bdi = &server->backing_dev_info;
-
         nfs_super_set_maxbytes(sb, server->maxfilesize);
  }
  
  /*
   * Finish setting up an NFS2/3 superblock
   */
-void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
+int nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
  {
         struct nfs_parsed_mount_data *data = mount_info->parsed;
         struct nfs_server *server = NFS_SB(sb);
+       int ret;
  
         sb->s_blocksize_bits = 0;
         sb->s_blocksize = 0;
@@ -2344,13 +2343,21 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
         }
  
         nfs_initialise_sb(sb);
+
+       ret = super_setup_bdi_name(sb, "%u:%u", MAJOR(server->s_dev),
+                                  MINOR(server->s_dev));
+       if (ret)
+               return ret;
+       sb->s_bdi->ra_pages = server->rpages * NFS_MAX_READAHEAD;
+       return 0;
+
  }
  EXPORT_SYMBOL_GPL(nfs_fill_super);
  
  /*
   * Finish setting up a cloned NFS2/3/4 superblock
   */
-void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
+int nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
  {
         const struct super_block *old_sb = mount_info->cloned->sb;
         struct nfs_server *server = NFS_SB(sb);
@@ -2370,6 +2377,10 @@ void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
         }
  
         nfs_initialise_sb(sb);
+
+       sb->s_bdi = bdi_get(old_sb->s_bdi);
+
+       return 0;
  }
  
  static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
@@ -2522,11 +2533,6 @@ static void nfs_get_cache_cookie(struct super_block *sb,
  }
  #endif
  
-static int nfs_bdi_register(struct nfs_server *server)
-{
-       return bdi_register_dev(&server->backing_dev_info, server->s_dev);
-}
-
  int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
                         struct nfs_mount_info *mount_info)
  {
@@ -2594,17 +2600,14 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
                 nfs_free_server(server);
                 server = NULL;
         } else {
-               error = nfs_bdi_register(server);
-               if (error) {
-                       mntroot = ERR_PTR(error);
-                       goto error_splat_super;
-               }
                 server->super = s;
         }
  
         if (!s->s_root) {
                 /* initial superblock/root creation */
-               mount_info->fill_super(s, mount_info);
+               error = mount_info->fill_super(s, mount_info);
+               if (error)
+                       goto error_splat_super;
                 nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
         }
  
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index e75b056f46f43583b84da4a423cbafedb850c630..cc341fc7fd44212d508022c87e9023ba291cfc7d 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -263,16 +263,15 @@ int nfs_congestion_kb;
  
  static void nfs_set_page_writeback(struct page *page)
  {
-       struct nfs_server *nfss = NFS_SERVER(page_file_mapping(page)->host);
+       struct inode *inode = page_file_mapping(page)->host;
+       struct nfs_server *nfss = NFS_SERVER(inode);
         int ret = test_set_page_writeback(page);
  
         WARN_ON_ONCE(ret != 0);
  
         if (atomic_long_inc_return(&nfss->writeback) >
-                       NFS_CONGESTION_ON_THRESH) {
-               set_bdi_congested(&nfss->backing_dev_info,
-                                       BLK_RW_ASYNC);
-       }
+                       NFS_CONGESTION_ON_THRESH)
+               set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
  }
  
  static void nfs_end_page_writeback(struct nfs_page *req)
@@ -285,7 +284,7 @@ static void nfs_end_page_writeback(struct nfs_page *req)
  
         end_page_writeback(req->wb_page);
         if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
-               clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
+               clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
  }
  
  
@@ -1784,7 +1783,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
                         (long long)req_offset(req));
                 if (status < 0) {
                         nfs_context_set_write_error(req->wb_context, status);
-                       nfs_inode_remove_request(req);
+                       if (req->wb_page)
+                               nfs_inode_remove_request(req);
                         dprintk_cont(", error = %d\n", status);
                         goto next;
                 }
@@ -1793,7 +1793,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
                  * returned by the server against all stored verfs. */
                 if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
                         /* We have a match */
-                       nfs_inode_remove_request(req);
+                       if (req->wb_page)
+                               nfs_inode_remove_request(req);
                         dprintk_cont(" OK\n");
                         goto next;
                 }
@@ -1806,7 +1807,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
         }
         nfss = NFS_SERVER(data->inode);
         if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
-               clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
+               clear_bdi_congested(inode_to_bdi(data->inode), BLK_RW_ASYNC);
  
         nfs_init_cinfo(&cinfo, data->inode, data->dreq);
         nfs_commit_end(cinfo.mds);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c

index 92b4b41d19d2a2e23b469ce993e3a9f159f5f578..fb5213afc854e2c28edafc238f374a5a93c01d9f 100644 (file)
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -242,10 +242,11 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
         req->cmd[4] = bufflen & 0xff;
         req->cmd_len = COMMAND_SIZE(INQUIRY);
  
-       error = blk_execute_rq(rq->q, NULL, rq, 1);
-       if (error) {
+       blk_execute_rq(rq->q, NULL, rq, 1);
+       if (req->result) {
                 pr_err("pNFS: INQUIRY 0x83 failed with: %x\n",
-                       rq->errors);
+                       req->result);
+               error = -EIO;
                 goto out_put_request;
         }
  
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c

index dba2ff8eaa68e3365deac3d61df3da5641099d9e..452334694a5d1f37cc480e5d1cf2873c4246019d 100644 (file)
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -358,6 +358,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
  {
         unsigned int len, v, hdr, dlen;
         u32 max_blocksize = svc_max_payload(rqstp);
+       struct kvec *head = rqstp->rq_arg.head;
+       struct kvec *tail = rqstp->rq_arg.tail;
  
         p = decode_fh(p, &args->fh);
         if (!p)
@@ -367,6 +369,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
         args->count = ntohl(*p++);
         args->stable = ntohl(*p++);
         len = args->len = ntohl(*p++);
+       if ((void *)p > head->iov_base + head->iov_len)
+               return 0;
         /*
          * The count must equal the amount of data passed.
          */
@@ -377,9 +381,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
          * Check to make sure that we got the right number of
          * bytes.
          */
-       hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
-       dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len
-               + rqstp->rq_arg.tail[0].iov_len - hdr;
+       hdr = (void*)p - head->iov_base;
+       dlen = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len - hdr;
         /*
          * Round the length of the data which was specified up to
          * the next multiple of XDR units and then compare that
@@ -396,7 +399,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
                 len = args->len = max_blocksize;
         }
         rqstp->rq_vec[0].iov_base = (void*)p;
-       rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
+       rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
         v = 0;
         while (len > rqstp->rq_vec[v].iov_len) {
                 len -= rqstp->rq_vec[v].iov_len;
@@ -471,6 +474,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
         /* first copy and check from the first page */
         old = (char*)p;
         vec = &rqstp->rq_arg.head[0];
+       if ((void *)old > vec->iov_base + vec->iov_len)
+               return 0;
         avail = vec->iov_len - (old - (char*)vec->iov_base);
         while (len && avail && *old) {
                 *new++ = *old++;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c

index cbeeda1e94a2fbbba61e2adeeb4f9ba89287eaf9..d86031b6ad79301c8ca0ceec9c8b991dd5db70f1 100644 (file)
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2489,7 +2489,7 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
  
  int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
  {
-       if (op->opnum == OP_ILLEGAL)
+       if (op->opnum == OP_ILLEGAL || op->status == nfserr_notsupp)
                 return op_encode_hdr_size * sizeof(__be32);
  
         BUG_ON(OPDESC(op)->op_rsize_bop == NULL);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c

index 73e75ac905258c17bdc107c0c071e8d14df739f0..8bf8f667a8cf2fe8359f74b41497bc9436ca0efb 100644 (file)
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -538,13 +538,21 @@ out_free:
  
  static ssize_t
  nfsd_print_version_support(char *buf, int remaining, const char *sep,
-               unsigned vers, unsigned minor)
+               unsigned vers, int minor)
  {
-       const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u";
+       const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u";
         bool supported = !!nfsd_vers(vers, NFSD_TEST);
  
-       if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST))
+       if (vers == 4 && minor >= 0 &&
+           !nfsd_minorversion(minor, NFSD_TEST))
                 supported = false;
+       if (minor == 0 && supported)
+               /*
+                * special case for backward compatability.
+                * +4.0 is never reported, it is implied by
+                * +4, unless -4.0 is present.
+                */
+               return 0;
         return snprintf(buf, remaining, format, sep,
                         supported ? '+' : '-', vers, minor);
  }
@@ -554,7 +562,6 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
         char *mesg = buf;
         char *vers, *minorp, sign;
         int len, num, remaining;
-       unsigned minor;
         ssize_t tlen = 0;
         char *sep;
         struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
@@ -575,6 +582,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
                 if (len <= 0) return -EINVAL;
                 do {
                         enum vers_op cmd;
+                       unsigned minor;
                         sign = *vers;
                         if (sign == '+' || sign == '-')
                                 num = simple_strtol((vers+1), &minorp, 0);
@@ -585,8 +593,8 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
                                         return -EINVAL;
                                 if (kstrtouint(minorp+1, 0, &minor) < 0)
                                         return -EINVAL;
-                       } else
-                               minor = 0;
+                       }
+
                         cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
                         switch(num) {
                         case 2:
@@ -594,8 +602,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
                                 nfsd_vers(num, cmd);
                                 break;
                         case 4:
-                               if (nfsd_minorversion(minor, cmd) >= 0)
-                                       break;
+                               if (*minorp == '.') {
+                                       if (nfsd_minorversion(minor, cmd) < 0)
+                                               return -EINVAL;
+                               } else if ((cmd == NFSD_SET) != nfsd_vers(num, NFSD_TEST)) {
+                                       /*
+                                        * Either we have +4 and no minors are enabled,
+                                        * or we have -4 and at least one minor is enabled.
+                                        * In either case, propagate 'cmd' to all minors.
+                                        */
+                                       minor = 0;
+                                       while (nfsd_minorversion(minor, cmd) >= 0)
+                                               minor++;
+                               }
+                               break;
                         default:
                                 return -EINVAL;
                         }
@@ -612,9 +632,11 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
         sep = "";
         remaining = SIMPLE_TRANSACTION_LIMIT;
         for (num=2 ; num <= 4 ; num++) {
+               int minor;
                 if (!nfsd_vers(num, NFSD_AVAIL))
                         continue;
-               minor = 0;
+
+               minor = -1;
                 do {
                         len = nfsd_print_version_support(buf, remaining,
                                         sep, num, minor);
@@ -624,7 +646,8 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
                         buf += len;
                         tlen += len;
                         minor++;
-                       sep = " ";
+                       if (len)
+                               sep = " ";
                 } while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION);
         }
  out:
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c

index fa82b7707e8531f9b7e8065391c3f54387c2740d..03a7e9da4da0225e58fd53c4589ed682e0732141 100644 (file)
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -786,6 +786,7 @@ nfserrno (int errno)
                 { nfserr_serverfault, -ESERVERFAULT },
                 { nfserr_serverfault, -ENFILE },
                 { nfserr_io, -EUCLEAN },
+               { nfserr_perm, -ENOKEY },
         };
         int     i;
  
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c

index 786a4a2cb2d7a96cbde629c17b6ac58ab82bc84e..59979f0bbd4bf255f5ea6f8fbd33cb9b2a5aa073 100644 (file)
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -167,7 +167,8 @@ nfsd_adjust_nfsd_versions4(void)
  
  int nfsd_minorversion(u32 minorversion, enum vers_op change)
  {
-       if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
+       if (minorversion > NFSD_SUPPORTED_MINOR_VERSION &&
+           change != NFSD_AVAIL)
                 return -1;
         switch(change) {
         case NFSD_SET:
@@ -415,23 +416,20 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
  
  void nfsd_reset_versions(void)
  {
-       int found_one = 0;
         int i;
  
-       for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
-               if (nfsd_program.pg_vers[i])
-                       found_one = 1;
-       }
+       for (i = 0; i < NFSD_NRVERS; i++)
+               if (nfsd_vers(i, NFSD_TEST))
+                       return;
  
-       if (!found_one) {
-               for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
-                       nfsd_program.pg_vers[i] = nfsd_version[i];
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-               for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
-                       nfsd_acl_program.pg_vers[i] =
-                               nfsd_acl_version[i];
-#endif
-       }
+       for (i = 0; i < NFSD_NRVERS; i++)
+               if (i != 4)
+                       nfsd_vers(i, NFSD_SET);
+               else {
+                       int minor = 0;
+                       while (nfsd_minorversion(minor, NFSD_SET) >= 0)
+                               minor++;
+               }
  }
  
  /*
@@ -749,6 +747,37 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
         return nfserr;
  }
  
+/*
+ * A write procedure can have a large argument, and a read procedure can
+ * have a large reply, but no NFSv2 or NFSv3 procedure has argument and
+ * reply that can both be larger than a page.  The xdr code has taken
+ * advantage of this assumption to be a sloppy about bounds checking in
+ * some cases.  Pending a rewrite of the NFSv2/v3 xdr code to fix that
+ * problem, we enforce these assumptions here:
+ */
+static bool nfs_request_too_big(struct svc_rqst *rqstp,
+                               struct svc_procedure *proc)
+{
+       /*
+        * The ACL code has more careful bounds-checking and is not
+        * susceptible to this problem:
+        */
+       if (rqstp->rq_prog != NFS_PROGRAM)
+               return false;
+       /*
+        * Ditto NFSv4 (which can in theory have argument and reply both
+        * more than a page):
+        */
+       if (rqstp->rq_vers >= 4)
+               return false;
+       /* The reply will be small, we're OK: */
+       if (proc->pc_xdrressize > 0 &&
+           proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE))
+               return false;
+
+       return rqstp->rq_arg.len > PAGE_SIZE;
+}
+
  int
  nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
  {
@@ -761,6 +790,11 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
                                 rqstp->rq_vers, rqstp->rq_proc);
         proc = rqstp->rq_procinfo;
  
+       if (nfs_request_too_big(rqstp, proc)) {
+               dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers);
+               *statp = rpc_garbage_args;
+               return 1;
+       }
         /*
          * Give the xdr decoder a chance to change this if it wants
          * (necessary in the NFSv4.0 compound case)
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c

index 41b468a6a90f807fe3f3d2e4ceeaa9f8c7ae0f8c..de07ff625777820fefc98bfa56adea81962e8135 100644 (file)
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -280,6 +280,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
                                         struct nfsd_writeargs *args)
  {
         unsigned int len, hdr, dlen;
+       struct kvec *head = rqstp->rq_arg.head;
         int v;
  
         p = decode_fh(p, &args->fh);
@@ -300,9 +301,10 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
          * Check to make sure that we got the right number of
          * bytes.
          */
-       hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
-       dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len
-               - hdr;
+       hdr = (void*)p - head->iov_base;
+       if (hdr > head->iov_len)
+               return 0;
+       dlen = head->iov_len + rqstp->rq_arg.page_len - hdr;
  
         /*
          * Round the length of the data which was specified up to
@@ -316,7 +318,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
                 return 0;
  
         rqstp->rq_vec[0].iov_base = (void*)p;
-       rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
+       rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
         v = 0;
         while (len > rqstp->rq_vec[v].iov_len) {
                 len -= rqstp->rq_vec[v].iov_len;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c

index e1872f36147f590765cf1d7548cce2e0d5243752..926682981d61f0bd1671e9f49c1a0a8b80e72f03 100644 (file)
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1068,7 +1068,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
         sb->s_time_gran = 1;
         sb->s_max_links = NILFS_LINK_MAX;
  
-       sb->s_bdi = bdev_get_queue(sb->s_bdev)->backing_dev_info;
+       sb->s_bdi = bdi_get(sb->s_bdev->bd_bdi);
  
         err = load_nilfs(nilfs, sb);
         if (err)
diff --git a/fs/nsfs.c b/fs/nsfs.c

index 1656843e87d2bef47b69d65b23c23946e8936f33..323f492e0822dd3286365d5cdbe56c59ec2d5463 100644 (file)
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -91,6 +91,7 @@ slow:
                 return ERR_PTR(-ENOMEM);
         }
         d_instantiate(dentry, inode);
+       dentry->d_flags |= DCACHE_RCUACCESS;
         dentry->d_fsdata = (void *)ns->ops;
         d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
         if (d) {
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c

index 4348027384f5edf06a66dd417214b9bbd3dd05cd..d0ab7e56d0b41a7a97f3640eb0ab4801f747dffc 100644 (file)
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
  
         new_sock->type = sock->type;
         new_sock->ops = sock->ops;
-       ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+       ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
         if (ret < 0)
                 goto out;
  
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c

index c4ab6fdf17a01426db5d6e2bb9638130147bee61..e1534c9bab16ce69e30eefd6614ca2872595fb04 100644 (file)
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -208,14 +208,19 @@ restart:
                                 continue;
                         /*
                          * Skip ops whose filesystem we don't know about unless
-                        * it is being mounted.
+                        * it is being mounted or unmounted.  It is possible for
+                        * a filesystem we don't know about to be unmounted if
+                        * it fails to mount in the kernel after userspace has
+                        * been sent the mount request.
                          */
                         /* XXX: is there a better way to detect this? */
                         } else if (ret == -1 &&
                                    !(op->upcall.type ==
                                         ORANGEFS_VFS_OP_FS_MOUNT ||
                                      op->upcall.type ==
-                                       ORANGEFS_VFS_OP_GETATTR)) {
+                                       ORANGEFS_VFS_OP_GETATTR ||
+                                    op->upcall.type ==
+                                       ORANGEFS_VFS_OP_FS_UMOUNT)) {
                                 gossip_debug(GOSSIP_DEV_DEBUG,
                                     "orangefs: skipping op tag %llu %s\n",
                                     llu(op->tag), get_opname_string(op));
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c

index 6333cbbdfef7ae652c1a4e6c4d2818ae1cd188d7..83b506020718980a69e5285a8c86496c14a2d050 100644 (file)
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -521,13 +521,11 @@ int orangefs_bufmap_copy_from_iovec(struct iov_iter *iter,
                 size_t n = size;
                 if (n > PAGE_SIZE)
                         n = PAGE_SIZE;
-               n = copy_page_from_iter(page, 0, n, iter);
-               if (!n)
+               if (copy_page_from_iter(page, 0, n, iter) != n)
                         return -EFAULT;
                 size -= n;
         }
         return 0;
-
  }
  
  /*
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h

index 5e48a0be976194f466b654fc1aa11dc670cdd084..8afac46fcc87a1e1d3ea8c658e0ae60c336c3d5d 100644 (file)
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -249,6 +249,7 @@ struct orangefs_sb_info_s {
         char devname[ORANGEFS_MAX_SERVER_ADDR_LEN];
         struct super_block *sb;
         int mount_pending;
+       int no_list;
         struct list_head list;
  };
  
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c

index 67c24351a67f8d38e7e4eb1b95d94b9b80cf12a2..629d8c917fa679886715360fcfe8f6cee1b5a37f 100644 (file)
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -263,8 +263,13 @@ int orangefs_remount(struct orangefs_sb_info_s *orangefs_sb)
                 if (!new_op)
                         return -ENOMEM;
                 new_op->upcall.req.features.features = 0;
-               ret = service_operation(new_op, "orangefs_features", 0);
-               orangefs_features = new_op->downcall.resp.features.features;
+               ret = service_operation(new_op, "orangefs_features",
+                   ORANGEFS_OP_PRIORITY | ORANGEFS_OP_NO_MUTEX);
+               if (!ret)
+                       orangefs_features =
+                           new_op->downcall.resp.features.features;
+               else
+                       orangefs_features = 0;
                 op_release(new_op);
         } else {
                 orangefs_features = 0;
@@ -488,7 +493,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
  
         if (ret) {
                 d = ERR_PTR(ret);
-               goto free_op;
+               goto free_sb_and_op;
         }
  
         /*
@@ -514,6 +519,9 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
         spin_unlock(&orangefs_superblocks_lock);
         op_release(new_op);
  
+       /* Must be removed from the list now. */
+       ORANGEFS_SB(sb)->no_list = 0;
+
         if (orangefs_userspace_version >= 20906) {
                 new_op = op_alloc(ORANGEFS_VFS_OP_FEATURES);
                 if (!new_op)
@@ -528,6 +536,10 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
  
         return dget(sb->s_root);
  
+free_sb_and_op:
+       /* Will call orangefs_kill_sb with sb not in list. */
+       ORANGEFS_SB(sb)->no_list = 1;
+       deactivate_locked_super(sb);
  free_op:
         gossip_err("orangefs_mount: mount request failed with %d\n", ret);
         if (ret == -EINVAL) {
@@ -553,12 +565,14 @@ void orangefs_kill_sb(struct super_block *sb)
          */
          orangefs_unmount_sb(sb);
  
-       /* remove the sb from our list of orangefs specific sb's */
-
-       spin_lock(&orangefs_superblocks_lock);
-       __list_del_entry(&ORANGEFS_SB(sb)->list);       /* not list_del_init */
-       ORANGEFS_SB(sb)->list.prev = NULL;
-       spin_unlock(&orangefs_superblocks_lock);
+       if (!ORANGEFS_SB(sb)->no_list) {
+               /* remove the sb from our list of orangefs specific sb's */
+               spin_lock(&orangefs_superblocks_lock);
+               /* not list_del_init */
+               __list_del_entry(&ORANGEFS_SB(sb)->list);
+               ORANGEFS_SB(sb)->list.prev = NULL;
+               spin_unlock(&orangefs_superblocks_lock);
+       }
  
         /*
          * make sure that ORANGEFS_DEV_REMOUNT_ALL loop that might've seen us
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c

index 1953986ee6bc221f555f4c53f8129f9c865f91e5..6e610a205e1556477ba80e512f1243629c193141 100644 (file)
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -12,7 +12,6 @@
  #include <linux/slab.h>
  #include <linux/cred.h>
  #include <linux/xattr.h>
-#include <linux/sched/signal.h>
  #include "overlayfs.h"
  #include "ovl_entry.h"
  
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c

index 8f91ec66baa3261299430c47fce487bf5164246c..d04ea43499096e4fc02eec0f363683e3713f65ba 100644 (file)
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1074,6 +1074,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
  
                 if ((table->proc_handler == proc_dostring) ||
                     (table->proc_handler == proc_dointvec) ||
+                   (table->proc_handler == proc_douintvec) ||
                     (table->proc_handler == proc_dointvec_minmax) ||
                     (table->proc_handler == proc_dointvec_jiffies) ||
                     (table->proc_handler == proc_dointvec_userhz_jiffies) ||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c

index f08bd31c1081cc0536602db9d865f4bf491440c9..312578089544dbd652aac303d0cbabee8fbcb968 100644 (file)
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -900,7 +900,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
  static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
                 unsigned long addr, pmd_t *pmdp)
  {
-       pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
+       pmd_t pmd = *pmdp;
+
+       /* See comment in change_huge_pmd() */
+       pmdp_invalidate(vma, addr, pmdp);
+       if (pmd_dirty(*pmdp))
+               pmd = pmd_mkdirty(pmd);
+       if (pmd_young(*pmdp))
+               pmd = pmd_mkyoung(pmd);
  
         pmd = pmd_wrprotect(pmd);
         pmd = pmd_clear_soft_dirty(pmd);
diff --git a/fs/stat.c b/fs/stat.c

index fa0be59340cc9136eb8c508434e10289acc8fc97..a257b872a53d1105a797c75b14f13c31a442a12b 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -130,9 +130,13 @@ EXPORT_SYMBOL(vfs_getattr);
  int vfs_statx_fd(unsigned int fd, struct kstat *stat,
                  u32 request_mask, unsigned int query_flags)
  {
-       struct fd f = fdget_raw(fd);
+       struct fd f;
         int error = -EBADF;
  
+       if (query_flags & ~KSTAT_QUERY_FLAGS)
+               return -EINVAL;
+
+       f = fdget_raw(fd);
         if (f.file) {
                 error = vfs_getattr(&f.file->f_path, stat,
                                     request_mask, query_flags);
@@ -155,9 +159,6 @@ EXPORT_SYMBOL(vfs_statx_fd);
   * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink
   * at the given name from being referenced.
   *
- * The caller must have preset stat->request_mask as for vfs_getattr().  The
- * flags are also used to load up stat->query_flags.
- *
   * 0 will be returned on success, and a -ve error code if unsuccessful.
   */
  int vfs_statx(int dfd, const char __user *filename, int flags,
@@ -509,58 +510,50 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
  }
  #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
  
-static inline int __put_timestamp(struct timespec *kts,
-                                 struct statx_timestamp __user *uts)
-{
-       return (__put_user(kts->tv_sec,         &uts->tv_sec            ) ||
-               __put_user(kts->tv_nsec,        &uts->tv_nsec           ) ||
-               __put_user(0,                   &uts->__reserved        ));
-}
-
-/*
- * Set the statx results.
- */
-static long statx_set_result(struct kstat *stat, struct statx __user *buffer)
+static noinline_for_stack int
+cp_statx(const struct kstat *stat, struct statx __user *buffer)
  {
-       uid_t uid = from_kuid_munged(current_user_ns(), stat->uid);
-       gid_t gid = from_kgid_munged(current_user_ns(), stat->gid);
-
-       if (__put_user(stat->result_mask,       &buffer->stx_mask       ) ||
-           __put_user(stat->mode,              &buffer->stx_mode       ) ||
-           __clear_user(&buffer->__spare0, sizeof(buffer->__spare0))     ||
-           __put_user(stat->nlink,             &buffer->stx_nlink      ) ||
-           __put_user(uid,                     &buffer->stx_uid        ) ||
-           __put_user(gid,                     &buffer->stx_gid        ) ||
-           __put_user(stat->attributes,        &buffer->stx_attributes ) ||
-           __put_user(stat->blksize,           &buffer->stx_blksize    ) ||
-           __put_user(MAJOR(stat->rdev),       &buffer->stx_rdev_major ) ||
-           __put_user(MINOR(stat->rdev),       &buffer->stx_rdev_minor ) ||
-           __put_user(MAJOR(stat->dev),        &buffer->stx_dev_major  ) ||
-           __put_user(MINOR(stat->dev),        &buffer->stx_dev_minor  ) ||
-           __put_timestamp(&stat->atime,       &buffer->stx_atime      ) ||
-           __put_timestamp(&stat->btime,       &buffer->stx_btime      ) ||
-           __put_timestamp(&stat->ctime,       &buffer->stx_ctime      ) ||
-           __put_timestamp(&stat->mtime,       &buffer->stx_mtime      ) ||
-           __put_user(stat->ino,               &buffer->stx_ino        ) ||
-           __put_user(stat->size,              &buffer->stx_size       ) ||
-           __put_user(stat->blocks,            &buffer->stx_blocks     ) ||
-           __clear_user(&buffer->__spare1, sizeof(buffer->__spare1))     ||
-           __clear_user(&buffer->__spare2, sizeof(buffer->__spare2)))
-               return -EFAULT;
-
-       return 0;
+       struct statx tmp;
+
+       memset(&tmp, 0, sizeof(tmp));
+
+       tmp.stx_mask = stat->result_mask;
+       tmp.stx_blksize = stat->blksize;
+       tmp.stx_attributes = stat->attributes;
+       tmp.stx_nlink = stat->nlink;
+       tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
+       tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
+       tmp.stx_mode = stat->mode;
+       tmp.stx_ino = stat->ino;
+       tmp.stx_size = stat->size;
+       tmp.stx_blocks = stat->blocks;
+       tmp.stx_attributes_mask = stat->attributes_mask;
+       tmp.stx_atime.tv_sec = stat->atime.tv_sec;
+       tmp.stx_atime.tv_nsec = stat->atime.tv_nsec;
+       tmp.stx_btime.tv_sec = stat->btime.tv_sec;
+       tmp.stx_btime.tv_nsec = stat->btime.tv_nsec;
+       tmp.stx_ctime.tv_sec = stat->ctime.tv_sec;
+       tmp.stx_ctime.tv_nsec = stat->ctime.tv_nsec;
+       tmp.stx_mtime.tv_sec = stat->mtime.tv_sec;
+       tmp.stx_mtime.tv_nsec = stat->mtime.tv_nsec;
+       tmp.stx_rdev_major = MAJOR(stat->rdev);
+       tmp.stx_rdev_minor = MINOR(stat->rdev);
+       tmp.stx_dev_major = MAJOR(stat->dev);
+       tmp.stx_dev_minor = MINOR(stat->dev);
+
+       return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
  }
  
  /**
   * sys_statx - System call to get enhanced stats
   * @dfd: Base directory to pathwalk from *or* fd to stat.
- * @filename: File to stat *or* NULL.
+ * @filename: File to stat or "" with AT_EMPTY_PATH
   * @flags: AT_* flags to control pathwalk.
   * @mask: Parts of statx struct actually required.
   * @buffer: Result buffer.
   *
- * Note that if filename is NULL, then it does the equivalent of fstat() using
- * dfd to indicate the file of interest.
+ * Note that fstat() can be emulated by setting dfd to the fd of interest,
+ * supplying "" as the filename and setting AT_EMPTY_PATH in the flags.
   */
  SYSCALL_DEFINE5(statx,
                 int, dfd, const char __user *, filename, unsigned, flags,
@@ -570,18 +563,16 @@ SYSCALL_DEFINE5(statx,
         struct kstat stat;
         int error;
  
+       if (mask & STATX__RESERVED)
+               return -EINVAL;
         if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
                 return -EINVAL;
-       if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer)))
-               return -EFAULT;
  
-       if (filename)
-               error = vfs_statx(dfd, filename, flags, &stat, mask);
-       else
-               error = vfs_statx_fd(dfd, &stat, mask, flags);
+       error = vfs_statx(dfd, filename, flags, &stat, mask);
         if (error)
                 return error;
-       return statx_set_result(&stat, buffer);
+
+       return cp_statx(&stat, buffer);
  }
  
  /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
diff --git a/fs/super.c b/fs/super.c

index b8b6a086c03b9a5478dc6a67e0b5054127032f47..adb0c0de428c2c88c0322ae739cbe108f01ae085 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -446,6 +446,10 @@ void generic_shutdown_super(struct super_block *sb)
         hlist_del_init(&sb->s_instances);
         spin_unlock(&sb_lock);
         up_write(&sb->s_umount);
+       if (sb->s_bdi != &noop_backing_dev_info) {
+               bdi_put(sb->s_bdi);
+               sb->s_bdi = &noop_backing_dev_info;
+       }
  }
  
  EXPORT_SYMBOL(generic_shutdown_super);
@@ -1049,12 +1053,8 @@ static int set_bdev_super(struct super_block *s, void *data)
  {
         s->s_bdev = data;
         s->s_dev = s->s_bdev->bd_dev;
+       s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
  
-       /*
-        * We set the bdi here to the queue backing, file systems can
-        * overwrite this in ->fill_super()
-        */
-       s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info;
         return 0;
  }
  
@@ -1255,6 +1255,49 @@ out:
         return ERR_PTR(error);
  }
  
+/*
+ * Setup private BDI for given superblock. It gets automatically cleaned up
+ * in generic_shutdown_super().
+ */
+int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
+{
+       struct backing_dev_info *bdi;
+       int err;
+       va_list args;
+
+       bdi = bdi_alloc(GFP_KERNEL);
+       if (!bdi)
+               return -ENOMEM;
+
+       bdi->name = sb->s_type->name;
+
+       va_start(args, fmt);
+       err = bdi_register_va(bdi, fmt, args);
+       va_end(args);
+       if (err) {
+               bdi_put(bdi);
+               return err;
+       }
+       WARN_ON(sb->s_bdi != &noop_backing_dev_info);
+       sb->s_bdi = bdi;
+
+       return 0;
+}
+EXPORT_SYMBOL(super_setup_bdi_name);
+
+/*
+ * Setup private BDI for given superblock. I gets automatically cleaned up
+ * in generic_shutdown_super().
+ */
+int super_setup_bdi(struct super_block *sb)
+{
+       static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
+
+       return super_setup_bdi_name(sb, "%.28s-%ld", sb->s_type->name,
+                                   atomic_long_inc_return(&bdi_seq));
+}
+EXPORT_SYMBOL(super_setup_bdi);
+
  /*
   * This is an internal function, please use sb_end_{write,pagefault,intwrite}
   * instead.
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c

index b803213d1307e9137c3bfe5e04ee4ac5bd396cce..39c75a86c67f1d24dc14bc79ce0d0f9d7dccef2b 100644 (file)
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -108,7 +108,7 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
  {
         const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
         struct kobject *kobj = of->kn->parent->priv;
-       size_t len;
+       ssize_t len;
  
         /*
          * If buf != of->prealloc_buf, we don't know how
@@ -117,13 +117,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
         if (WARN_ON_ONCE(buf != of->prealloc_buf))
                 return 0;
         len = ops->show(kobj, of->kn->priv, buf);
+       if (len < 0)
+               return len;
         if (pos) {
                 if (len <= pos)
                         return 0;
                 len -= pos;
                 memmove(buf, buf + pos, len);
         }
-       return min(count, len);
+       return min_t(ssize_t, count, len);
  }
  
  /* kernfs write callback for regular sysfs files */
diff --git a/fs/timerfd.c b/fs/timerfd.c

index 384fa759a563341b309df47537f9c94da0ee9ebb..c543cdb5f8ed9b803eea973dbb5ed68cfe395e1f 100644 (file)
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -400,9 +400,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
              clockid != CLOCK_BOOTTIME_ALARM))
                 return -EINVAL;
  
-       if (!capable(CAP_WAKE_ALARM) &&
-           (clockid == CLOCK_REALTIME_ALARM ||
-            clockid == CLOCK_BOOTTIME_ALARM))
+       if ((clockid == CLOCK_REALTIME_ALARM ||
+            clockid == CLOCK_BOOTTIME_ALARM) &&
+           !capable(CAP_WAKE_ALARM))
                 return -EPERM;
  
         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -449,7 +449,7 @@ static int do_timerfd_settime(int ufd, int flags,
                 return ret;
         ctx = f.file->private_data;
  
-       if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) {
+       if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) {
                 fdput(f);
                 return -EPERM;
         }
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c

index 1e712a36468064a75910e8ba9c0e1cfb2bd3ab28..718b749fa11aa8901544a1e6925a7486bbb357f5 100644 (file)
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -32,6 +32,7 @@
  #include <linux/math64.h>
  #include <linux/uaccess.h>
  #include <linux/random.h>
+#include <linux/ctype.h>
  #include "ubifs.h"
  
  static DEFINE_SPINLOCK(dbg_lock);
@@ -286,8 +287,10 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
                         break;
                 }
  
-               pr_err("\t%d: %s (%s)\n",
-                      count++, dent->name, get_dent_type(dent->type));
+               pr_err("\t%d: inode %llu, type %s, len %d\n",
+                      count++, (unsigned long long) le64_to_cpu(dent->inum),
+                      get_dent_type(dent->type),
+                      le16_to_cpu(dent->nlen));
  
                 fname_name(&nm) = dent->name;
                 fname_len(&nm) = le16_to_cpu(dent->nlen);
@@ -464,7 +467,8 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
                         pr_err("(bad name length, not printing, bad or corrupted node)");
                 else {
                         for (i = 0; i < nlen && dent->name[i]; i++)
-                               pr_cont("%c", dent->name[i]);
+                               pr_cont("%c", isprint(dent->name[i]) ?
+                                       dent->name[i] : '?');
                 }
                 pr_cont("\n");
  
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c

index 30825d882aa94a4c2486d47581f33d1dfca1a409..b777bddaa1dda9f2768952dc562e06df6b068d32 100644 (file)
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -606,8 +606,8 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
         }
  
         while (1) {
-               dbg_gen("feed '%s', ino %llu, new f_pos %#x",
-                       dent->name, (unsigned long long)le64_to_cpu(dent->inum),
+               dbg_gen("ino %llu, new f_pos %#x",
+                       (unsigned long long)le64_to_cpu(dent->inum),
                         key_hash_flash(c, &dent->key));
                 ubifs_assert(le64_to_cpu(dent->ch.sqnum) >
                              ubifs_inode(dir)->creat_sqnum);
@@ -748,6 +748,11 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
                 goto out_fname;
  
         lock_2_inodes(dir, inode);
+
+       /* Handle O_TMPFILE corner case, it is allowed to link a O_TMPFILE. */
+       if (inode->i_nlink == 0)
+               ubifs_delete_orphan(c, inode->i_ino);
+
         inc_nlink(inode);
         ihold(inode);
         inode->i_ctime = ubifs_current_time(inode);
@@ -768,6 +773,8 @@ out_cancel:
         dir->i_size -= sz_change;
         dir_ui->ui_size = dir->i_size;
         drop_nlink(inode);
+       if (inode->i_nlink == 0)
+               ubifs_add_orphan(c, inode->i_ino);
         unlock_2_inodes(dir, inode);
         ubifs_release_budget(c, &req);
         iput(inode);
@@ -1068,8 +1075,10 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
         }
  
         err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
-       if (err)
+       if (err) {
+               kfree(dev);
                 goto out_budg;
+       }
  
         sz_change = CALC_DENT_SIZE(fname_len(&nm));
  
@@ -1316,9 +1325,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
         unsigned int uninitialized_var(saved_nlink);
         struct fscrypt_name old_nm, new_nm;
  
-       if (flags & ~RENAME_NOREPLACE)
-               return -EINVAL;
-
         /*
          * Budget request settings: deletion direntry, new direntry, removing
          * the old inode, and changing old and new parent directory inodes.
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c

index b73811bd7676d6221243f21d52b06d9efed7cf4a..cf4cc99b75b55531789a03b065d86f521489f67e 100644 (file)
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1827,7 +1827,6 @@ static void ubifs_put_super(struct super_block *sb)
         }
  
         ubifs_umount(c);
-       bdi_destroy(&c->bdi);
         ubi_close_volume(c->ubi);
         mutex_unlock(&c->umount_mutex);
  }
@@ -2019,29 +2018,25 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
                 goto out;
         }
  
+       err = ubifs_parse_options(c, data, 0);
+       if (err)
+               goto out_close;
+
         /*
          * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For
          * UBIFS, I/O is not deferred, it is done immediately in readpage,
          * which means the user would have to wait not just for their own I/O
          * but the read-ahead I/O as well i.e. completely pointless.
          *
-        * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
+        * Read-ahead will be disabled because @sb->s_bdi->ra_pages is 0. Also
+        * @sb->s_bdi->capabilities are initialized to 0 so there won't be any
+        * writeback happening.
          */
-       c->bdi.name = "ubifs",
-       c->bdi.capabilities = 0;
-       err  = bdi_init(&c->bdi);
+       err = super_setup_bdi_name(sb, "ubifs_%d_%d", c->vi.ubi_num,
+                                  c->vi.vol_id);
         if (err)
                 goto out_close;
-       err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d",
-                          c->vi.ubi_num, c->vi.vol_id);
-       if (err)
-               goto out_bdi;
-
-       err = ubifs_parse_options(c, data, 0);
-       if (err)
-               goto out_bdi;
  
-       sb->s_bdi = &c->bdi;
         sb->s_fs_info = c;
         sb->s_magic = UBIFS_SUPER_MAGIC;
         sb->s_blocksize = UBIFS_BLOCK_SIZE;
@@ -2080,8 +2075,6 @@ out_umount:
         ubifs_umount(c);
  out_unlock:
         mutex_unlock(&c->umount_mutex);
-out_bdi:
-       bdi_destroy(&c->bdi);
  out_close:
         ubi_close_volume(c->ubi);
  out:
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h

index 4d57e488038e342f3b5ed84f5554862a86e440f3..4da10a6d702a0330cbc223b535df7a2be2be3d52 100644 (file)
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -972,7 +972,6 @@ struct ubifs_debug_info;
   * struct ubifs_info - UBIFS file-system description data structure
   * (per-superblock).
   * @vfs_sb: VFS @struct super_block object
- * @bdi: backing device info object to make VFS happy and disable read-ahead
   *
   * @highest_inum: highest used inode number
   * @max_sqnum: current global sequence number
@@ -1220,7 +1219,6 @@ struct ubifs_debug_info;
   */
  struct ubifs_info {
         struct super_block *vfs_sb;
-       struct backing_dev_info bdi;
  
         ino_t highest_inum;
         unsigned long long max_sqnum;
@@ -1461,7 +1459,6 @@ extern const struct inode_operations ubifs_file_inode_operations;
  extern const struct file_operations ubifs_dir_operations;
  extern const struct inode_operations ubifs_dir_inode_operations;
  extern const struct inode_operations ubifs_symlink_inode_operations;
-extern struct backing_dev_info ubifs_backing_dev_info;
  extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
  
  /* io.c */
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c

index 973607df579db324e64da1c3c26999d60853fa80..f7555fc25877435e13b65cbe597ae9bdb11c6528 100644 (file)
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -138,8 +138,6 @@ out:
   * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd
   * context.
   * @ctx: [in] Pointer to the userfaultfd context.
- *
- * Returns: In case of success, returns not zero.
   */
  static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
  {
@@ -267,6 +265,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
  {
         struct mm_struct *mm = ctx->mm;
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
         pmd_t *pmd, _pmd;
         pte_t *pte;
@@ -277,7 +276,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
         pgd = pgd_offset(mm, address);
         if (!pgd_present(*pgd))
                 goto out;
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       if (!p4d_present(*p4d))
+               goto out;
+       pud = pud_offset(p4d, address);
         if (!pud_present(*pud))
                 goto out;
         pmd = pmd_offset(pud, address);
@@ -490,7 +492,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
                          * in such case.
                          */
                         down_read(&mm->mmap_sem);
-                       ret = 0;
+                       ret = VM_FAULT_NOPAGE;
                 }
         }
  
@@ -527,10 +529,11 @@ out:
         return ret;
  }
  
-static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
-                                            struct userfaultfd_wait_queue *ewq)
+static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
+                                             struct userfaultfd_wait_queue *ewq)
  {
-       int ret = 0;
+       if (WARN_ON_ONCE(current->flags & PF_EXITING))
+               goto out;
  
         ewq->ctx = ctx;
         init_waitqueue_entry(&ewq->wq, current);
@@ -547,8 +550,16 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
                         break;
                 if (ACCESS_ONCE(ctx->released) ||
                     fatal_signal_pending(current)) {
-                       ret = -1;
                         __remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+                       if (ewq->msg.event == UFFD_EVENT_FORK) {
+                               struct userfaultfd_ctx *new;
+
+                               new = (struct userfaultfd_ctx *)
+                                       (unsigned long)
+                                       ewq->msg.arg.reserved.reserved1;
+
+                               userfaultfd_ctx_put(new);
+                       }
                         break;
                 }
  
@@ -566,9 +577,8 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
          * ctx may go away after this if the userfault pseudo fd is
          * already released.
          */
-
+out:
         userfaultfd_ctx_put(ctx);
-       return ret;
  }
  
  static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx,
@@ -626,7 +636,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
         return 0;
  }
  
-static int dup_fctx(struct userfaultfd_fork_ctx *fctx)
+static void dup_fctx(struct userfaultfd_fork_ctx *fctx)
  {
         struct userfaultfd_ctx *ctx = fctx->orig;
         struct userfaultfd_wait_queue ewq;
@@ -636,17 +646,15 @@ static int dup_fctx(struct userfaultfd_fork_ctx *fctx)
         ewq.msg.event = UFFD_EVENT_FORK;
         ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new;
  
-       return userfaultfd_event_wait_completion(ctx, &ewq);
+       userfaultfd_event_wait_completion(ctx, &ewq);
  }
  
  void dup_userfaultfd_complete(struct list_head *fcs)
  {
-       int ret = 0;
         struct userfaultfd_fork_ctx *fctx, *n;
  
         list_for_each_entry_safe(fctx, n, fcs, list) {
-               if (!ret)
-                       ret = dup_fctx(fctx);
+               dup_fctx(fctx);
                 list_del(&fctx->list);
                 kfree(fctx);
         }
@@ -689,8 +697,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
         userfaultfd_event_wait_completion(ctx, &ewq);
  }
  
-void userfaultfd_remove(struct vm_area_struct *vma,
-                       struct vm_area_struct **prev,
+bool userfaultfd_remove(struct vm_area_struct *vma,
                         unsigned long start, unsigned long end)
  {
         struct mm_struct *mm = vma->vm_mm;
@@ -699,13 +706,11 @@ void userfaultfd_remove(struct vm_area_struct *vma,
  
         ctx = vma->vm_userfaultfd_ctx.ctx;
         if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
-               return;
+               return true;
  
         userfaultfd_ctx_get(ctx);
         up_read(&mm->mmap_sem);
  
-       *prev = NULL; /* We wait for ACK w/o the mmap semaphore */
-
         msg_init(&ewq.msg);
  
         ewq.msg.event = UFFD_EVENT_REMOVE;
@@ -714,7 +719,7 @@ void userfaultfd_remove(struct vm_area_struct *vma,
  
         userfaultfd_event_wait_completion(ctx, &ewq);
  
-       down_read(&mm->mmap_sem);
+       return false;
  }
  
  static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
@@ -775,34 +780,6 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
         }
  }
  
-void userfaultfd_exit(struct mm_struct *mm)
-{
-       struct vm_area_struct *vma = mm->mmap;
-
-       /*
-        * We can do the vma walk without locking because the caller
-        * (exit_mm) knows it now has exclusive access
-        */
-       while (vma) {
-               struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
-
-               if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) {
-                       struct userfaultfd_wait_queue ewq;
-
-                       userfaultfd_ctx_get(ctx);
-
-                       msg_init(&ewq.msg);
-                       ewq.msg.event = UFFD_EVENT_EXIT;
-
-                       userfaultfd_event_wait_completion(ctx, &ewq);
-
-                       ctx->features &= ~UFFD_FEATURE_EVENT_EXIT;
-               }
-
-               vma = vma->vm_next;
-       }
-}
-
  static int userfaultfd_release(struct inode *inode, struct file *file)
  {
         struct userfaultfd_ctx *ctx = file->private_data;
@@ -1779,7 +1756,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
          *      protocols: aa:... bb:...
          */
         seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n",
-                  pending, total, UFFD_API, UFFD_API_FEATURES,
+                  pending, total, UFFD_API, ctx->features,
                    UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS);
  }
  #endif
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c

index 2dfdc62f795e63177e3f2f58306840656644492d..70a5b55e0870a0523c0dd8ce629debf2fccebe25 100644 (file)
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -25,24 +25,6 @@
  #include "kmem.h"
  #include "xfs_message.h"
  
-/*
- * Greedy allocation.  May fail and may return vmalloced memory.
- */
-void *
-kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
-{
-       void            *ptr;
-       size_t          kmsize = maxsize;
-
-       while (!(ptr = vzalloc(kmsize))) {
-               if ((kmsize >>= 1) <= minsize)
-                       kmsize = minsize;
-       }
-       if (ptr)
-               *size = kmsize;
-       return ptr;
-}
-
  void *
  kmem_alloc(size_t size, xfs_km_flags_t flags)
  {
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h

index 689f746224e7ab8a0fbf3d2f9acb4f1dd68a9a16..f0fc84fcaac2553283f90bc3f157b924bd03d932 100644 (file)
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -69,8 +69,6 @@ static inline void  kmem_free(const void *ptr)
  }
  
  
-extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-
  static inline void *
  kmem_zalloc(size_t size, xfs_km_flags_t flags)
  {
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index a9c66d47757a757324e5fbf4224883e1d369588a..9bd104f32908962046af6d2dd4437a045fecdb36 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree(
                 args.type = XFS_ALLOCTYPE_START_BNO;
                 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
         } else if (dfops->dop_low) {
-try_another_ag:
                 args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
                 args.fsbno = *firstblock;
         } else {
                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -790,13 +790,17 @@ try_another_ag:
         if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
             args.fsbno == NULLFSBLOCK &&
             args.type == XFS_ALLOCTYPE_NEAR_BNO) {
-               dfops->dop_low = true;
+               args.type = XFS_ALLOCTYPE_FIRST_AG;
                 goto try_another_ag;
         }
+       if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
+               xfs_iroot_realloc(ip, -1, whichfork);
+               xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+               return -ENOSPC;
+       }
         /*
          * Allocation can't fail, the space was reserved.
          */
-       ASSERT(args.fsbno != NULLFSBLOCK);
         ASSERT(*firstblock == NULLFSBLOCK ||
                args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
         *firstblock = cur->bc_private.b.firstblock = args.fsbno;
@@ -4150,6 +4154,19 @@ xfs_bmapi_read(
         return 0;
  }
  
+/*
+ * Add a delayed allocation extent to an inode. Blocks are reserved from the
+ * global pool and the extent inserted into the inode in-core extent tree.
+ *
+ * On entry, got refers to the first extent beyond the offset of the extent to
+ * allocate or eof is specified if no such extent exists. On return, got refers
+ * to the extent record that was inserted to the inode fork.
+ *
+ * Note that the allocated extent may have been merged with contiguous extents
+ * during insertion into the inode fork. Thus, got does not reflect the current
+ * state of the inode fork on return. If necessary, the caller can use lastx to
+ * look up the updated record in the inode fork.
+ */
  int
  xfs_bmapi_reserve_delalloc(
         struct xfs_inode        *ip,
@@ -4236,13 +4253,8 @@ xfs_bmapi_reserve_delalloc(
         got->br_startblock = nullstartblock(indlen);
         got->br_blockcount = alen;
         got->br_state = XFS_EXT_NORM;
-       xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
  
-       /*
-        * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
-        * might have merged it into one of the neighbouring ones.
-        */
-       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+       xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
  
         /*
          * Tag the inode if blocks were preallocated. Note that COW fork
@@ -4254,10 +4266,6 @@ xfs_bmapi_reserve_delalloc(
         if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
                 xfs_inode_set_cowblocks_tag(ip);
  
-       ASSERT(got->br_startoff <= aoff);
-       ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
-       ASSERT(isnullstartblock(got->br_startblock));
-       ASSERT(got->br_state == XFS_EXT_NORM);
         return 0;
  
  out_unreserve_blocks:
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c

index f93072b58a58323ae952d55d568a9e53384f88d3..fd55db47938562868d25d4998407ce7650c4da4f 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -447,8 +447,8 @@ xfs_bmbt_alloc_block(
  
         if (args.fsbno == NULLFSBLOCK) {
                 args.fsbno = be64_to_cpu(start->l);
-try_another_ag:
                 args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
                 /*
                  * Make sure there is sufficient room left in the AG to
                  * complete a full tree split for an extent insert.  If
@@ -488,8 +488,8 @@ try_another_ag:
         if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
             args.fsbno == NULLFSBLOCK &&
             args.type == XFS_ALLOCTYPE_NEAR_BNO) {
-               cur->bc_private.b.dfops->dop_low = true;
                 args.fsbno = cur->bc_private.b.firstblock;
+               args.type = XFS_ALLOCTYPE_FIRST_AG;
                 goto try_another_ag;
         }
  
@@ -506,7 +506,7 @@ try_another_ag:
                         goto error0;
                 cur->bc_private.b.dfops->dop_low = true;
         }
-       if (args.fsbno == NULLFSBLOCK) {
+       if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
                 *stat = 0;
                 return 0;
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h

index d04547fcf274af0eaee18096c94b22652551b9f7..39f8604f764e13147fd576e95f5ad30f98dc4d1e 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -125,6 +125,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
  extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
  extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
  extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
  
  /* xfs_dir2_readdir.c */
  extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c

index c6809ff41197d934c068e84b19eb77986bc7dccf..e84af093b2ab99e5d7a75467bc6d7a490682611a 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -629,6 +629,112 @@ xfs_dir2_sf_check(
  }
  #endif /* DEBUG */
  
+/* Verify the consistency of an inline directory. */
+int
+xfs_dir2_sf_verify(
+       struct xfs_inode                *ip)
+{
+       struct xfs_mount                *mp = ip->i_mount;
+       struct xfs_dir2_sf_hdr          *sfp;
+       struct xfs_dir2_sf_entry        *sfep;
+       struct xfs_dir2_sf_entry        *next_sfep;
+       char                            *endp;
+       const struct xfs_dir_ops        *dops;
+       struct xfs_ifork                *ifp;
+       xfs_ino_t                       ino;
+       int                             i;
+       int                             i8count;
+       int                             offset;
+       int                             size;
+       int                             error;
+       __uint8_t                       filetype;
+
+       ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+       /*
+        * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops,
+        * so we can only trust the mountpoint to have the right pointer.
+        */
+       dops = xfs_dir_get_ops(mp, NULL);
+
+       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
+       size = ifp->if_bytes;
+
+       /*
+        * Give up if the directory is way too short.
+        */
+       if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) ||
+           size < xfs_dir2_sf_hdr_size(sfp->i8count))
+               return -EFSCORRUPTED;
+
+       endp = (char *)sfp + size;
+
+       /* Check .. entry */
+       ino = dops->sf_get_parent_ino(sfp);
+       i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+       error = xfs_dir_ino_validate(mp, ino);
+       if (error)
+               return error;
+       offset = dops->data_first_offset;
+
+       /* Check all reported entries */
+       sfep = xfs_dir2_sf_firstentry(sfp);
+       for (i = 0; i < sfp->count; i++) {
+               /*
+                * struct xfs_dir2_sf_entry has a variable length.
+                * Check the fixed-offset parts of the structure are
+                * within the data buffer.
+                */
+               if (((char *)sfep + sizeof(*sfep)) >= endp)
+                       return -EFSCORRUPTED;
+
+               /* Don't allow names with known bad length. */
+               if (sfep->namelen == 0)
+                       return -EFSCORRUPTED;
+
+               /*
+                * Check that the variable-length part of the structure is
+                * within the data buffer.  The next entry starts after the
+                * name component, so nextentry is an acceptable test.
+                */
+               next_sfep = dops->sf_nextentry(sfp, sfep);
+               if (endp < (char *)next_sfep)
+                       return -EFSCORRUPTED;
+
+               /* Check that the offsets always increase. */
+               if (xfs_dir2_sf_get_offset(sfep) < offset)
+                       return -EFSCORRUPTED;
+
+               /* Check the inode number. */
+               ino = dops->sf_get_ino(sfp, sfep);
+               i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+               error = xfs_dir_ino_validate(mp, ino);
+               if (error)
+                       return error;
+
+               /* Check the file type. */
+               filetype = dops->sf_get_ftype(sfep);
+               if (filetype >= XFS_DIR3_FT_MAX)
+                       return -EFSCORRUPTED;
+
+               offset = xfs_dir2_sf_get_offset(sfep) +
+                               dops->data_entsize(sfep->namelen);
+
+               sfep = next_sfep;
+       }
+       if (i8count != sfp->i8count)
+               return -EFSCORRUPTED;
+       if ((void *)sfep != (void *)endp)
+               return -EFSCORRUPTED;
+
+       /* Make sure this whole thing ought to be in local format. */
+       if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+           (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize)
+               return -EFSCORRUPTED;
+
+       return 0;
+}
+
  /*
   * Create a new (shortform) directory.
   */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c

index 25c1e078aef6a5925c12f2cc91b0d18b8b38711b..8a37efe04de3235d72357914647e5f480cbbd512 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -33,6 +33,8 @@
  #include "xfs_trace.h"
  #include "xfs_attr_sf.h"
  #include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
  
  kmem_zone_t *xfs_ifork_zone;
  
@@ -210,6 +212,16 @@ xfs_iformat_fork(
         if (error)
                 return error;
  
+       /* Check inline dir contents. */
+       if (S_ISDIR(VFS_I(ip)->i_mode) &&
+           dip->di_format == XFS_DINODE_FMT_LOCAL) {
+               error = xfs_dir2_sf_verify(ip);
+               if (error) {
+                       xfs_idestroy_fork(ip, XFS_DATA_FORK);
+                       return error;
+               }
+       }
+
         if (xfs_is_reflink_inode(ip)) {
                 ASSERT(ip->i_cowfp == NULL);
                 xfs_ifork_init_cow(ip);
@@ -320,7 +332,6 @@ xfs_iformat_local(
         int             whichfork,
         int             size)
  {
-
         /*
          * If the size is unreasonable, then something
          * is wrong and we just bail out rather than crash in
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index bf65a9ea864293d48e5326178336680c2eb29758..61494295d92fe1acb7d343bc3a4e1594f09027ab 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -274,54 +274,49 @@ xfs_end_io(
         struct xfs_ioend        *ioend =
                 container_of(work, struct xfs_ioend, io_work);
         struct xfs_inode        *ip = XFS_I(ioend->io_inode);
+       xfs_off_t               offset = ioend->io_offset;
+       size_t                  size = ioend->io_size;
         int                     error = ioend->io_bio->bi_error;
  
         /*
-        * Set an error if the mount has shut down and proceed with end I/O
-        * processing so it can perform whatever cleanups are necessary.
+        * Just clean up the in-memory strutures if the fs has been shut down.
          */
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
                 error = -EIO;
+               goto done;
+       }
  
         /*
-        * For a CoW extent, we need to move the mapping from the CoW fork
-        * to the data fork.  If instead an error happened, just dump the
-        * new blocks.
+        * Clean up any COW blocks on an I/O error.
          */
-       if (ioend->io_type == XFS_IO_COW) {
-               if (error)
-                       goto done;
-               if (ioend->io_bio->bi_error) {
-                       error = xfs_reflink_cancel_cow_range(ip,
-                                       ioend->io_offset, ioend->io_size);
-                       goto done;
+       if (unlikely(error)) {
+               switch (ioend->io_type) {
+               case XFS_IO_COW:
+                       xfs_reflink_cancel_cow_range(ip, offset, size, true);
+                       break;
                 }
-               error = xfs_reflink_end_cow(ip, ioend->io_offset,
-                               ioend->io_size);
-               if (error)
-                       goto done;
+
+               goto done;
         }
  
         /*
-        * For unwritten extents we need to issue transactions to convert a
-        * range to normal written extens after the data I/O has finished.
-        * Detecting and handling completion IO errors is done individually
-        * for each case as different cleanup operations need to be performed
-        * on error.
+        * Success:  commit the COW or unwritten blocks if needed.
          */
-       if (ioend->io_type == XFS_IO_UNWRITTEN) {
-               if (error)
-                       goto done;
-               error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
-                                                 ioend->io_size);
-       } else if (ioend->io_append_trans) {
-               error = xfs_setfilesize_ioend(ioend, error);
-       } else {
-               ASSERT(!xfs_ioend_is_append(ioend) ||
-                      ioend->io_type == XFS_IO_COW);
+       switch (ioend->io_type) {
+       case XFS_IO_COW:
+               error = xfs_reflink_end_cow(ip, offset, size);
+               break;
+       case XFS_IO_UNWRITTEN:
+               error = xfs_iomap_write_unwritten(ip, offset, size);
+               break;
+       default:
+               ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
+               break;
         }
  
  done:
+       if (ioend->io_append_trans)
+               error = xfs_setfilesize_ioend(ioend, error);
         xfs_destroy_ioend(ioend, error);
  }
  
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index 8b75dcea596680f1332412870546181d2f0a579c..8795e9cd867cdafba1e84c3313e33a41532b6132 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -81,7 +81,7 @@ xfs_zero_extent(
         return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)),
                 block << (mp->m_super->s_blocksize_bits - 9),
                 count_fsb << (mp->m_super->s_blocksize_bits - 9),
-               GFP_NOFS, true);
+               GFP_NOFS, 0);
  }
  
  int
@@ -1311,8 +1311,16 @@ xfs_free_file_space(
         /*
          * Now that we've unmap all full blocks we'll have to zero out any
          * partial block at the beginning and/or end.  xfs_zero_range is
-        * smart enough to skip any holes, including those we just created.
+        * smart enough to skip any holes, including those we just created,
+        * but we must take care not to zero beyond EOF and enlarge i_size.
          */
+
+       if (offset >= XFS_ISIZE(ip))
+               return 0;
+
+       if (offset + len > XFS_ISIZE(ip))
+               len = XFS_ISIZE(ip) - offset;
+
         return xfs_zero_range(ip, offset, len, NULL);
  }
  
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c

index 003a99b83bd8845e22d6311be1d474679521242d..ad9396e516f6e389b88bca5dc2dc41d3372ed714 100644 (file)
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -71,22 +71,11 @@ xfs_dir2_sf_getdents(
         struct xfs_da_geometry  *geo = args->geo;
  
         ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-       /*
-        * Give up if the directory is way too short.
-        */
-       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-               return -EIO;
-       }
-
         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
         ASSERT(dp->i_df.if_u1.if_data != NULL);
  
         sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
  
-       if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
-               return -EFSCORRUPTED;
-
         /*
          * If the block number in the offset is out of range, we're done.
          */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index 7234b9748c36e048b15b376e4408ef3626422cf4..3531f8f72fa5e10b83f0fa8bd37afc560b2dbf0a 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks(
         xfs_ilock(ip, XFS_IOLOCK_EXCL);
         xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
  
-       ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+       ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
  
         xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
         xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index edfa6a55b0646d0d444ea0b2c12e46a62a2c4474..7605d83965963566e1d6acef679591cf9787d161 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -50,6 +50,7 @@
  #include "xfs_log.h"
  #include "xfs_bmap_btree.h"
  #include "xfs_reflink.h"
+#include "xfs_dir2_priv.h"
  
  kmem_zone_t *xfs_inode_zone;
  
@@ -1615,7 +1616,7 @@ xfs_itruncate_extents(
  
         /* Remove all pending CoW reservations. */
         error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
-                       last_block);
+                       last_block, true);
         if (error)
                 goto out;
  
@@ -3546,6 +3547,12 @@ xfs_iflush_int(
         if (ip->i_d.di_version < 3)
                 ip->i_d.di_flushiter++;
  
+       /* Check the inline directory data. */
+       if (S_ISDIR(VFS_I(ip)->i_mode) &&
+           ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
+           xfs_dir2_sf_verify(ip))
+               goto corrupt_out;
+
         /*
          * Copy the dirty parts of the inode into the on-disk inode.  We always
          * copy out the core of the inode, because if the inode is dirty at all
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index 41662fb14e87d8b1546c42d6a65508db5c5a76bf..288ee5b840d738116b8981e9618fac36fb24614f 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -630,6 +630,11 @@ retry:
                 goto out_unlock;
         }
  
+       /*
+        * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+        * them out if the write happens to fail.
+        */
+       iomap->flags = IOMAP_F_NEW;
         trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
  done:
         if (isnullstartblock(got.br_startblock))
@@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc(
         struct xfs_inode        *ip,
         loff_t                  offset,
         loff_t                  length,
-       ssize_t                 written)
+       ssize_t                 written,
+       struct iomap            *iomap)
  {
         struct xfs_mount        *mp = ip->i_mount;
         xfs_fileoff_t           start_fsb;
         xfs_fileoff_t           end_fsb;
         int                     error = 0;
  
-       /* behave as if the write failed if drop writes is enabled */
-       if (xfs_mp_drop_writes(mp))
+       /*
+        * Behave as if the write failed if drop writes is enabled. Set the NEW
+        * flag to force delalloc cleanup.
+        */
+       if (xfs_mp_drop_writes(mp)) {
+               iomap->flags |= IOMAP_F_NEW;
                 written = 0;
+       }
  
         /*
          * start_fsb refers to the first unused block after a short write. If
@@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc(
         end_fsb = XFS_B_TO_FSB(mp, offset + length);
  
         /*
-        * Trim back delalloc blocks if we didn't manage to write the whole
-        * range reserved.
+        * Trim delalloc blocks if they were allocated by this write and we
+        * didn't manage to write the whole range.
          *
          * We don't need to care about racing delalloc as we hold i_mutex
          * across the reserve/allocate/unreserve calls. If there are delalloc
          * blocks in the range, they are ours.
          */
-       if (start_fsb < end_fsb) {
+       if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
                 truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
                                          XFS_FSB_TO_B(mp, end_fsb) - 1);
  
@@ -1131,7 +1142,7 @@ xfs_file_iomap_end(
  {
         if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
                 return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
-                               length, written);
+                               length, written, iomap);
         return 0;
  }
  
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index 229cc6a6d8ef03dc599b5195c5052796e2e19ba7..ebfc13350f9ae8b43b0229948588192560b51683 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -516,6 +516,20 @@ xfs_vn_getattr(
         stat->blocks =
                 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
  
+       if (ip->i_d.di_version == 3) {
+               if (request_mask & STATX_BTIME) {
+                       stat->result_mask |= STATX_BTIME;
+                       stat->btime.tv_sec = ip->i_d.di_crtime.t_sec;
+                       stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec;
+               }
+       }
+
+       if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+               stat->attributes |= STATX_ATTR_IMMUTABLE;
+       if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+               stat->attributes |= STATX_ATTR_APPEND;
+       if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP)
+               stat->attributes |= STATX_ATTR_NODUMP;
  
         switch (inode->i_mode & S_IFMT) {
         case S_IFBLK:
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c

index 66e881790c17109496e21bd5e2d7d21f5ecc7fe5..26d67ce3c18d901a85e94836b9e823a5806ae807 100644 (file)
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -361,7 +361,6 @@ xfs_bulkstat(
         xfs_agino_t             agino;  /* inode # in allocation group */
         xfs_agnumber_t          agno;   /* allocation group number */
         xfs_btree_cur_t         *cur;   /* btree cursor for ialloc btree */
-       size_t                  irbsize; /* size of irec buffer in bytes */
         xfs_inobt_rec_incore_t  *irbuf; /* start of irec buffer */
         int                     nirbuf; /* size of irbuf */
         int                     ubcount; /* size of user's buffer */
@@ -388,11 +387,10 @@ xfs_bulkstat(
         *ubcountp = 0;
         *done = 0;
  
-       irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
+       irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
         if (!irbuf)
                 return -ENOMEM;
-
-       nirbuf = irbsize / sizeof(*irbuf);
+       nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
  
         /*
          * Loop over the allocation groups, starting from the last
@@ -585,7 +583,7 @@ xfs_inumbers(
                 return error;
  
         bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
-       buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
+       buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP);
         do {
                 struct xfs_inobt_rec_incore     r;
                 int                             stat;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index 450bde68bb7528d70a47e0b38275ca75c7e757a1..688ebff1f66384a309cca74539cbe4d27172b177 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -513,8 +513,7 @@ STATIC void
  xfs_set_inoalignment(xfs_mount_t *mp)
  {
         if (xfs_sb_version_hasalign(&mp->m_sb) &&
-           mp->m_sb.sb_inoalignmt >=
-           XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+               mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
                 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
         else
                 mp->m_inoalign_mask = 0;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c

index da6d08fb359c8efdf42a53e283bb030780b5b064..4a84c5ea266d8f8fcec61aa55776fec339d27aaf 100644 (file)
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -548,14 +548,18 @@ xfs_reflink_trim_irec_to_next_cow(
  }
  
  /*
- * Cancel all pending CoW reservations for some block range of an inode.
+ * Cancel CoW reservations for some block range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
   */
  int
  xfs_reflink_cancel_cow_blocks(
         struct xfs_inode                *ip,
         struct xfs_trans                **tpp,
         xfs_fileoff_t                   offset_fsb,
-       xfs_fileoff_t                   end_fsb)
+       xfs_fileoff_t                   end_fsb,
+       bool                            cancel_real)
  {
         struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
         struct xfs_bmbt_irec            got, del;
@@ -579,7 +583,7 @@ xfs_reflink_cancel_cow_blocks(
                                         &idx, &got, &del);
                         if (error)
                                 break;
-               } else {
+               } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
                         xfs_trans_ijoin(*tpp, ip, 0);
                         xfs_defer_init(&dfops, &firstfsb);
  
@@ -621,13 +625,17 @@ xfs_reflink_cancel_cow_blocks(
  }
  
  /*
- * Cancel all pending CoW reservations for some byte range of an inode.
+ * Cancel CoW reservations for some byte range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
   */
  int
  xfs_reflink_cancel_cow_range(
         struct xfs_inode        *ip,
         xfs_off_t               offset,
-       xfs_off_t               count)
+       xfs_off_t               count,
+       bool                    cancel_real)
  {
         struct xfs_trans        *tp;
         xfs_fileoff_t           offset_fsb;
@@ -653,7 +661,8 @@ xfs_reflink_cancel_cow_range(
         xfs_trans_ijoin(tp, ip, 0);
  
         /* Scrape out the old CoW reservations */
-       error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb);
+       error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
+                       cancel_real);
         if (error)
                 goto out_cancel;
  
@@ -1450,7 +1459,7 @@ next:
          * We didn't find any shared blocks so turn off the reflink flag.
          * First, get rid of any leftover CoW mappings.
          */
-       error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF);
+       error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
         if (error)
                 return error;
  
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h

index 33ac9b8db68380185ad80073b1890cc70e4e3b09..d29a7967f0290ecb8b4ca7c4d4077723262c8ba2 100644 (file)
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -39,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
  
  extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
                 struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
-               xfs_fileoff_t end_fsb);
+               xfs_fileoff_t end_fsb, bool cancel_real);
  extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
-               xfs_off_t count);
+               xfs_off_t count, bool cancel_real);
  extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
                 xfs_off_t count);
  extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 890862f2447c193f374b4de64c58940521b203fb..685c042a120f16a8a9a8dad69d8ee7ce6f9274a0 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -953,7 +953,7 @@ xfs_fs_destroy_inode(
         XFS_STATS_INC(ip->i_mount, vn_remove);
  
         if (xfs_is_reflink_inode(ip)) {
-               error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+               error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
                 if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
                         xfs_warn(ip->i_mount,
  "Error %d while evicting CoW blocks for inode %llu.",
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h

index 5bdab6bffd23cfc708ad80d6a04b5ad3690bd74d..928fd66b12712241d100c5f4c9d3a857b9eabf1b 100644 (file)
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -15,7 +15,6 @@
         ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \
                 NULL: pmd_offset(pud, address))
  
-#define pud_alloc(mm, pgd, address)    (pgd)
  #define pud_offset(pgd, start)         (pgd)
  #define pud_none(pud)                  0
  #define pud_bad(pud)                   0
@@ -35,4 +34,6 @@
  #undef  pud_addr_end
  #define pud_addr_end(addr, end)                (end)
  
+#include <asm-generic/5level-fixup.h>
+
  #endif
diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h

new file mode 100644 (file)

index 0000000..b5ca82d
--- /dev/null
+++ b/include/asm-generic/5level-fixup.h
@@ -0,0 +1,41 @@
+#ifndef _5LEVEL_FIXUP_H
+#define _5LEVEL_FIXUP_H
+
+#define __ARCH_HAS_5LEVEL_HACK
+#define __PAGETABLE_P4D_FOLDED
+
+#define P4D_SHIFT                      PGDIR_SHIFT
+#define P4D_SIZE                       PGDIR_SIZE
+#define P4D_MASK                       PGDIR_MASK
+#define PTRS_PER_P4D                   1
+
+#define p4d_t                          pgd_t
+
+#define pud_alloc(mm, p4d, address) \
+       ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
+               NULL : pud_offset(p4d, address))
+
+#define p4d_alloc(mm, pgd, address)    (pgd)
+#define p4d_offset(pgd, start)         (pgd)
+#define p4d_none(p4d)                  0
+#define p4d_bad(p4d)                   0
+#define p4d_present(p4d)               1
+#define p4d_ERROR(p4d)                 do { } while (0)
+#define p4d_clear(p4d)                 pgd_clear(p4d)
+#define p4d_val(p4d)                   pgd_val(p4d)
+#define p4d_populate(mm, p4d, pud)     pgd_populate(mm, p4d, pud)
+#define p4d_page(p4d)                  pgd_page(p4d)
+#define p4d_page_vaddr(p4d)            pgd_page_vaddr(p4d)
+
+#define __p4d(x)                       __pgd(x)
+#define set_p4d(p4dp, p4d)             set_pgd(p4dp, p4d)
+
+#undef p4d_free_tlb
+#define p4d_free_tlb(tlb, x, addr)     do { } while (0)
+#define p4d_free(mm, x)                        do { } while (0)
+#define __p4d_free_tlb(tlb, x, addr)   do { } while (0)
+
+#undef  p4d_addr_end
+#define p4d_addr_end(addr, end)                (end)
+
+#endif
diff --git a/include/asm-generic/pgtable-nop4d-hack.h b/include/asm-generic/pgtable-nop4d-hack.h

new file mode 100644 (file)

index 0000000..752fb75
--- /dev/null
+++ b/include/asm-generic/pgtable-nop4d-hack.h
@@ -0,0 +1,62 @@
+#ifndef _PGTABLE_NOP4D_HACK_H
+#define _PGTABLE_NOP4D_HACK_H
+
+#ifndef __ASSEMBLY__
+#include <asm-generic/5level-fixup.h>
+
+#define __PAGETABLE_PUD_FOLDED
+
+/*
+ * Having the pud type consist of a pgd gets the size right, and allows
+ * us to conceptually access the pgd entry that this pud is folded into
+ * without casting.
+ */
+typedef struct { pgd_t pgd; } pud_t;
+
+#define PUD_SHIFT      PGDIR_SHIFT
+#define PTRS_PER_PUD   1
+#define PUD_SIZE       (1UL << PUD_SHIFT)
+#define PUD_MASK       (~(PUD_SIZE-1))
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pud is never bad, and a pud always exists (as it's folded
+ * into the pgd entry)
+ */
+static inline int pgd_none(pgd_t pgd)          { return 0; }
+static inline int pgd_bad(pgd_t pgd)           { return 0; }
+static inline int pgd_present(pgd_t pgd)       { return 1; }
+static inline void pgd_clear(pgd_t *pgd)       { }
+#define pud_ERROR(pud)                         (pgd_ERROR((pud).pgd))
+
+#define pgd_populate(mm, pgd, pud)             do { } while (0)
+/*
+ * (puds are folded into pgds so this doesn't get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pgd(pgdptr, pgdval)        set_pud((pud_t *)(pgdptr), (pud_t) { pgdval })
+
+static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+{
+       return (pud_t *)pgd;
+}
+
+#define pud_val(x)                             (pgd_val((x).pgd))
+#define __pud(x)                               ((pud_t) { __pgd(x) })
+
+#define pgd_page(pgd)                          (pud_page((pud_t){ pgd }))
+#define pgd_page_vaddr(pgd)                    (pud_page_vaddr((pud_t){ pgd }))
+
+/*
+ * allocating and freeing a pud is trivial: the 1-entry pud is
+ * inside the pgd, so has no extra memory associated with it.
+ */
+#define pud_alloc_one(mm, address)             NULL
+#define pud_free(mm, x)                                do { } while (0)
+#define __pud_free_tlb(tlb, x, a)              do { } while (0)
+
+#undef  pud_addr_end
+#define pud_addr_end(addr, end)                        (end)
+
+#endif /* __ASSEMBLY__ */
+#endif /* _PGTABLE_NOP4D_HACK_H */
diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h

new file mode 100644 (file)

index 0000000..de364ec
--- /dev/null
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -0,0 +1,56 @@
+#ifndef _PGTABLE_NOP4D_H
+#define _PGTABLE_NOP4D_H
+
+#ifndef __ASSEMBLY__
+
+#define __PAGETABLE_P4D_FOLDED
+
+typedef struct { pgd_t pgd; } p4d_t;
+
+#define P4D_SHIFT      PGDIR_SHIFT
+#define PTRS_PER_P4D   1
+#define P4D_SIZE       (1UL << P4D_SHIFT)
+#define P4D_MASK       (~(P4D_SIZE-1))
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the p4d is never bad, and a p4d always exists (as it's folded
+ * into the pgd entry)
+ */
+static inline int pgd_none(pgd_t pgd)          { return 0; }
+static inline int pgd_bad(pgd_t pgd)           { return 0; }
+static inline int pgd_present(pgd_t pgd)       { return 1; }
+static inline void pgd_clear(pgd_t *pgd)       { }
+#define p4d_ERROR(p4d)                         (pgd_ERROR((p4d).pgd))
+
+#define pgd_populate(mm, pgd, p4d)             do { } while (0)
+/*
+ * (p4ds are folded into pgds so this doesn't get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pgd(pgdptr, pgdval)        set_p4d((p4d_t *)(pgdptr), (p4d_t) { pgdval })
+
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+{
+       return (p4d_t *)pgd;
+}
+
+#define p4d_val(x)                             (pgd_val((x).pgd))
+#define __p4d(x)                               ((p4d_t) { __pgd(x) })
+
+#define pgd_page(pgd)                          (p4d_page((p4d_t){ pgd }))
+#define pgd_page_vaddr(pgd)                    (p4d_page_vaddr((p4d_t){ pgd }))
+
+/*
+ * allocating and freeing a p4d is trivial: the 1-entry p4d is
+ * inside the pgd, so has no extra memory associated with it.
+ */
+#define p4d_alloc_one(mm, address)             NULL
+#define p4d_free(mm, x)                                do { } while (0)
+#define __p4d_free_tlb(tlb, x, a)              do { } while (0)
+
+#undef  p4d_addr_end
+#define p4d_addr_end(addr, end)                        (end)
+
+#endif /* __ASSEMBLY__ */
+#endif /* _PGTABLE_NOP4D_H */
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h

index 810431d8351b16c14c3d1954ddc2890866c41658..c2b9b96d6268f4e85de4e4e15a27bf4a283e9312 100644 (file)
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -3,52 +3,57 @@
  
  #ifndef __ASSEMBLY__
  
+#ifdef __ARCH_USE_5LEVEL_HACK
+#include <asm-generic/pgtable-nop4d-hack.h>
+#else
+#include <asm-generic/pgtable-nop4d.h>
+
  #define __PAGETABLE_PUD_FOLDED
  
  /*
- * Having the pud type consist of a pgd gets the size right, and allows
- * us to conceptually access the pgd entry that this pud is folded into
+ * Having the pud type consist of a p4d gets the size right, and allows
+ * us to conceptually access the p4d entry that this pud is folded into
   * without casting.
   */
-typedef struct { pgd_t pgd; } pud_t;
+typedef struct { p4d_t p4d; } pud_t;
  
-#define PUD_SHIFT      PGDIR_SHIFT
+#define PUD_SHIFT      P4D_SHIFT
  #define PTRS_PER_PUD   1
  #define PUD_SIZE       (1UL << PUD_SHIFT)
  #define PUD_MASK       (~(PUD_SIZE-1))
  
  /*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * The "p4d_xxx()" functions here are trivial for a folded two-level
   * setup: the pud is never bad, and a pud always exists (as it's folded
- * into the pgd entry)
+ * into the p4d entry)
   */
-static inline int pgd_none(pgd_t pgd)          { return 0; }
-static inline int pgd_bad(pgd_t pgd)           { return 0; }
-static inline int pgd_present(pgd_t pgd)       { return 1; }
-static inline void pgd_clear(pgd_t *pgd)       { }
-#define pud_ERROR(pud)                         (pgd_ERROR((pud).pgd))
+static inline int p4d_none(p4d_t p4d)          { return 0; }
+static inline int p4d_bad(p4d_t p4d)           { return 0; }
+static inline int p4d_present(p4d_t p4d)       { return 1; }
+static inline void p4d_clear(p4d_t *p4d)       { }
+#define pud_ERROR(pud)                         (p4d_ERROR((pud).p4d))
  
-#define pgd_populate(mm, pgd, pud)             do { } while (0)
+#define p4d_populate(mm, p4d, pud)             do { } while (0)
  /*
- * (puds are folded into pgds so this doesn't get actually called,
+ * (puds are folded into p4ds so this doesn't get actually called,
   * but the define is needed for a generic inline function.)
   */
-#define set_pgd(pgdptr, pgdval)                        set_pud((pud_t *)(pgdptr), (pud_t) { pgdval })
+#define set_p4d(p4dptr, p4dval)        set_pud((pud_t *)(p4dptr), (pud_t) { p4dval })
  
-static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
  {
-       return (pud_t *)pgd;
+       return (pud_t *)p4d;
  }
  
-#define pud_val(x)                             (pgd_val((x).pgd))
-#define __pud(x)                               ((pud_t) { __pgd(x) } )
+#define pud_val(x)                             (p4d_val((x).p4d))
+#define __pud(x)                               ((pud_t) { __p4d(x) })
  
-#define pgd_page(pgd)                          (pud_page((pud_t){ pgd }))
-#define pgd_page_vaddr(pgd)                    (pud_page_vaddr((pud_t){ pgd }))
+#define p4d_page(p4d)                          (pud_page((pud_t){ p4d }))
+#define p4d_page_vaddr(p4d)                    (pud_page_vaddr((pud_t){ p4d }))
  
  /*
   * allocating and freeing a pud is trivial: the 1-entry pud is
- * inside the pgd, so has no extra memory associated with it.
+ * inside the p4d, so has no extra memory associated with it.
   */
  #define pud_alloc_one(mm, address)             NULL
  #define pud_free(mm, x)                                do { } while (0)
@@ -58,4 +63,5 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
  #define pud_addr_end(addr, end)                        (end)
  
  #endif /* __ASSEMBLY__ */
+#endif /* !__ARCH_USE_5LEVEL_HACK */
  #endif /* _PGTABLE_NOPUD_H */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h

index f4ca23b158b3b7aace85c4899385e980fd4c42b8..1fad160f35de8e89953af075173a2ad219c9693b 100644 (file)
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -10,9 +10,9 @@
  #include <linux/bug.h>
  #include <linux/errno.h>
  
-#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \
-       CONFIG_PGTABLE_LEVELS
-#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED
+#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
+       defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
+#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
  #endif
  
  /*
@@ -424,6 +424,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
         (__boundary - 1 < (end) - 1)? __boundary: (end);                \
  })
  
+#ifndef p4d_addr_end
+#define p4d_addr_end(addr, end)                                                \
+({     unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK;      \
+       (__boundary - 1 < (end) - 1)? __boundary: (end);                \
+})
+#endif
+
  #ifndef pud_addr_end
  #define pud_addr_end(addr, end)                                                \
  ({     unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;      \
@@ -444,6 +451,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
   * Do the tests inline, but report and clear the bad entry in mm/memory.c.
   */
  void pgd_clear_bad(pgd_t *);
+void p4d_clear_bad(p4d_t *);
  void pud_clear_bad(pud_t *);
  void pmd_clear_bad(pmd_t *);
  
@@ -458,6 +466,17 @@ static inline int pgd_none_or_clear_bad(pgd_t *pgd)
         return 0;
  }
  
+static inline int p4d_none_or_clear_bad(p4d_t *p4d)
+{
+       if (p4d_none(*p4d))
+               return 1;
+       if (unlikely(p4d_bad(*p4d))) {
+               p4d_clear_bad(p4d);
+               return 1;
+       }
+       return 0;
+}
+
  static inline int pud_none_or_clear_bad(pud_t *pud)
  {
         if (pud_none(*pud))
@@ -844,11 +863,30 @@ static inline int pmd_protnone(pmd_t pmd)
  #endif /* CONFIG_MMU */
  
  #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+
+#ifndef __PAGETABLE_P4D_FOLDED
+int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
+int p4d_clear_huge(p4d_t *p4d);
+#else
+static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
+{
+       return 0;
+}
+static inline int p4d_clear_huge(p4d_t *p4d)
+{
+       return 0;
+}
+#endif /* !__PAGETABLE_P4D_FOLDED */
+
  int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
  int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
  int pud_clear_huge(pud_t *pud);
  int pmd_clear_huge(pmd_t *pmd);
  #else  /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
+static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
+{
+       return 0;
+}
  static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
  {
         return 0;
@@ -857,6 +895,10 @@ static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
  {
         return 0;
  }
+static inline int p4d_clear_huge(p4d_t *p4d)
+{
+       return 0;
+}
  static inline int pud_clear_huge(pud_t *pud)
  {
         return 0;
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h

index 4df64a1fc09e7aab7f88cd4afe73928228930147..532372c6cf15c8084f4910072e96fd0360bad2cd 100644 (file)
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -14,8 +14,8 @@
   * [_sdata, _edata]: contains .data.* sections, may also contain .rodata.*
   *                   and/or .init.* sections.
   * [__start_rodata, __end_rodata]: contains .rodata.* sections
- * [__start_data_ro_after_init, __end_data_ro_after_init]:
- *                  contains data.ro_after_init section
+ * [__start_ro_after_init, __end_ro_after_init]:
+ *                  contains .data..ro_after_init section
   * [__init_begin, __init_end]: contains .init.* sections, but .init.text.*
   *                   may be out of this range on some architectures.
   * [_sinittext, _einittext]: contains .init.text.* sections
@@ -33,7 +33,7 @@ extern char _data[], _sdata[], _edata[];
  extern char __bss_start[], __bss_stop[];
  extern char __init_begin[], __init_end[];
  extern char _sinittext[], _einittext[];
-extern char __start_data_ro_after_init[], __end_data_ro_after_init[];
+extern char __start_ro_after_init[], __end_ro_after_init[];
  extern char _end[];
  extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
  extern char __kprobes_text_start[], __kprobes_text_end[];
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h

index 4329bc6ef04b7b555337dc2f558ff7d7321668c4..8afa4335e5b2bfd0c42c00e1b1506d4e1f7377ac 100644 (file)
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -270,6 +270,12 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
                 __pte_free_tlb(tlb, ptep, address);             \
         } while (0)
  
+#define pmd_free_tlb(tlb, pmdp, address)                       \
+       do {                                                    \
+               __tlb_adjust_range(tlb, address, PAGE_SIZE);            \
+               __pmd_free_tlb(tlb, pmdp, address);             \
+       } while (0)
+
  #ifndef __ARCH_HAS_4LEVEL_HACK
  #define pud_free_tlb(tlb, pudp, address)                       \
         do {                                                    \
@@ -278,11 +284,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
         } while (0)
  #endif
  
-#define pmd_free_tlb(tlb, pmdp, address)                       \
+#ifndef __ARCH_HAS_5LEVEL_HACK
+#define p4d_free_tlb(tlb, pudp, address)                       \
         do {                                                    \
-               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
-               __pmd_free_tlb(tlb, pmdp, address);             \
+               __tlb_adjust_range(tlb, address, PAGE_SIZE);            \
+               __p4d_free_tlb(tlb, pudp, address);             \
         } while (0)
+#endif
  
  #define tlb_migrate_finish(mm) do {} while (0)
  
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h

index 0968d13b388591ae02b37f9dda0fe7a498cc7475..143db9c523e25f38488bd43302b82f316e3124a9 100644 (file)
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -173,6 +173,7 @@
         KEEP(*(__##name##_of_table_end))
  
  #define CLKSRC_OF_TABLES()     OF_TABLE(CONFIG_CLKSRC_OF, clksrc)
+#define CLKEVT_OF_TABLES()     OF_TABLE(CONFIG_CLKEVT_OF, clkevt)
  #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip)
  #define CLK_OF_TABLES()                OF_TABLE(CONFIG_COMMON_CLK, clk)
  #define IOMMU_OF_TABLES()      OF_TABLE(CONFIG_OF_IOMMU, iommu)
@@ -260,9 +261,9 @@
   */
  #ifndef RO_AFTER_INIT_DATA
  #define RO_AFTER_INIT_DATA                                             \
-       __start_data_ro_after_init = .;                                 \
+       VMLINUX_SYMBOL(__start_ro_after_init) = .;                      \
         *(.data..ro_after_init)                                         \
-       __end_data_ro_after_init = .;
+       VMLINUX_SYMBOL(__end_ro_after_init) = .;
  #endif
  
  /*
@@ -559,6 +560,7 @@
         CLK_OF_TABLES()                                                 \
         RESERVEDMEM_OF_TABLES()                                         \
         CLKSRC_OF_TABLES()                                              \
+       CLKEVT_OF_TABLES()                                              \
         IOMMU_OF_TABLES()                                               \
         CPU_METHOD_OF_TABLES()                                          \
         CPUIDLE_METHOD_OF_TABLES()                                      \
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h

index a2bfd7843f18f6e79d8bc7e5743b031345153053..e2b9c6fe271496e45dca5abb94d814dd8a9c3c04 100644 (file)
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -73,7 +73,7 @@ int af_alg_unregister_type(const struct af_alg_type *type);
  
  int af_alg_release(struct socket *sock);
  void af_alg_release_parent(struct sock *sk);
-int af_alg_accept(struct sock *sk, struct socket *newsock);
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
  
  int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
  void af_alg_free_sg(struct af_alg_sgl *sgl);
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h

index 1d4f365d8f03a439ab0e20caf27ae34ad6e13e74..f6d9af3efa45a6cc8eb448a71f5d43d72d8fcbd1 100644 (file)
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -166,6 +166,16 @@ static inline struct ahash_instance *ahash_alloc_instance(
         return crypto_alloc_instance2(name, alg, ahash_instance_headroom());
  }
  
+static inline void ahash_request_complete(struct ahash_request *req, int err)
+{
+       req->base.complete(&req->base, err);
+}
+
+static inline u32 ahash_request_flags(struct ahash_request *req)
+{
+       return req->base.flags;
+}
+
  static inline struct crypto_ahash *crypto_spawn_ahash(
         struct crypto_ahash_spawn *spawn)
  {
diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h

index ed953f98f0e1446ce01630c005e406f941282347..1487011fe057ba4b35271bc85e52c6b90291c2c3 100644 (file)
--- a/include/drm/ttm/ttm_object.h
+++ b/include/drm/ttm/ttm_object.h
@@ -229,6 +229,8 @@ extern void ttm_base_object_unref(struct ttm_base_object **p_base);
   * @ref_type: The type of reference.
   * @existed: Upon completion, indicates that an identical reference object
   * already existed, and the refcount was upped on that object instead.
+ * @require_existed: Fail with -EPERM if an identical ref object didn't
+ * already exist.
   *
   * Checks that the base object is shareable and adds a ref object to it.
   *
@@ -243,7 +245,8 @@ extern void ttm_base_object_unref(struct ttm_base_object **p_base);
   */
  extern int ttm_ref_object_add(struct ttm_object_file *tfile,
                               struct ttm_base_object *base,
-                             enum ttm_ref_type ref_type, bool *existed);
+                             enum ttm_ref_type ref_type, bool *existed,
+                             bool require_existed);
  
  extern bool ttm_ref_object_exists(struct ttm_object_file *tfile,
                                   struct ttm_base_object *base);
diff --git a/include/dt-bindings/sound/cs42l42.h b/include/dt-bindings/sound/cs42l42.h

index 399a123aed5815f1f6d2b2784e0148794790bc43..db69d84ed7d14152626565529e0bc981c1976a33 100644 (file)
--- a/include/dt-bindings/sound/cs42l42.h
+++ b/include/dt-bindings/sound/cs42l42.h
@@ -20,7 +20,7 @@
  #define CS42L42_HPOUT_LOAD_1NF         0
  #define CS42L42_HPOUT_LOAD_10NF                1
  
-/* HPOUT Clamp to GND Overide */
+/* HPOUT Clamp to GND Override */
  #define CS42L42_HPOUT_CLAMP_EN         0
  #define CS42L42_HPOUT_CLAMP_DIS                1
  
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h

index b72dd2ad5f440cfcbf86407cad5aed8e98835510..c0b3d999c266f2271d61bd58b042352580faf5d2 100644 (file)
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -295,6 +295,7 @@ void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
  void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
  int kvm_vgic_map_resources(struct kvm *kvm);
  int kvm_vgic_hyp_init(void);
+void kvm_vgic_init_cpu_hardware(void);
  
  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
                         bool level);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h

index 673acda012af44efe4fb5a7fc5279d08e416cc86..9b05886f9773cde8439a0c3e21b39ad29460c440 100644 (file)
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -287,18 +287,15 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id)
  }
  
  /* Validate the processor object's proc_id */
-bool acpi_processor_validate_proc_id(int proc_id);
+bool acpi_duplicate_processor_id(int proc_id);
  
  #ifdef CONFIG_ACPI_HOTPLUG_CPU
  /* Arch dependent functions for cpu hotplug support */
  int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
                  int *pcpu);
  int acpi_unmap_cpu(int cpu);
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
  #endif /* CONFIG_ACPI_HOTPLUG_CPU */
  
-void acpi_set_processor_mapping(void);
-
  #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
  int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
  #endif
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h

index ad955817916d00c35b6b41d204ec61013c88a10f..866c433e7d3220b29170ed98ccd1b70803a43964 100644 (file)
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -21,6 +21,7 @@ struct dentry;
   */
  enum wb_state {
         WB_registered,          /* bdi_register() was done */
+       WB_shutting_down,       /* wb_shutdown() in progress */
         WB_writeback_running,   /* Writeback is in progress */
         WB_has_dirty_io,        /* Dirty inodes on ->b_{dirty|io|more_io} */
  };
@@ -54,7 +55,9 @@ struct bdi_writeback_congested {
         atomic_t refcnt;                /* nr of attached wb's and blkg */
  
  #ifdef CONFIG_CGROUP_WRITEBACK
-       struct backing_dev_info *bdi;   /* the associated bdi */
+       struct backing_dev_info *__bdi; /* the associated bdi, set to NULL
+                                        * on bdi unregistration. For memcg-wb
+                                        * internal use only! */
         int blkcg_id;                   /* ID of the associated blkcg */
         struct rb_node rb_node;         /* on bdi->cgwb_congestion_tree */
  #endif
@@ -143,7 +146,7 @@ struct backing_dev_info {
         congested_fn *congested_fn; /* Function pointer if device is md/dm */
         void *congested_data;   /* Pointer to aux data for congested func */
  
-       char *name;
+       const char *name;
  
         struct kref refcnt;     /* Reference counter for the structure */
         unsigned int capabilities; /* Device capabilities */
@@ -161,7 +164,6 @@ struct backing_dev_info {
  #ifdef CONFIG_CGROUP_WRITEBACK
         struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
         struct rb_root cgwb_congested_tree; /* their congested states */
-       atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */
  #else
         struct bdi_writeback_congested *wb_congested;
  #endif
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h

index c52a48cb9a66379971be13008bd8cdfba087807e..557d84063934c65c2aa96c2b6141425af8a567bf 100644 (file)
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -17,8 +17,6 @@
  #include <linux/backing-dev-defs.h>
  #include <linux/slab.h>
  
-int __must_check bdi_init(struct backing_dev_info *bdi);
-
  static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
  {
         kref_get(&bdi->refcnt);
@@ -27,16 +25,18 @@ static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
  
  void bdi_put(struct backing_dev_info *bdi);
  
-__printf(3, 4)
-int bdi_register(struct backing_dev_info *bdi, struct device *parent,
-               const char *fmt, ...);
-int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
+__printf(2, 3)
+int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...);
+int bdi_register_va(struct backing_dev_info *bdi, const char *fmt,
+                   va_list args);
  int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner);
  void bdi_unregister(struct backing_dev_info *bdi);
  
-int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
-void bdi_destroy(struct backing_dev_info *bdi);
  struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id);
+static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask)
+{
+       return bdi_alloc_node(gfp_mask, NUMA_NO_NODE);
+}
  
  void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
                         bool range_cyclic, enum wb_reason reason);
diff --git a/include/linux/bio.h b/include/linux/bio.h

index 8e521194f6fc4ad32138a51c962a365c74debaed..4931756d86d99a0194114229ec04001c223a3963 100644 (file)
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -383,7 +383,7 @@ extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
  extern void bioset_free(struct bio_set *);
  extern mempool_t *biovec_create_pool(int pool_entries);
  
-extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
+extern struct bio *bio_alloc_bioset(gfp_t, unsigned int, struct bio_set *);
  extern void bio_put(struct bio *);
  
  extern void __bio_clone_fast(struct bio *, struct bio *);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h

index b296a900611790e25639293fd51b0fa7a9beab41..f3e5e1de1bdb95480b9199a120b5b46c370646b7 100644 (file)
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -15,7 +15,7 @@ struct blk_mq_hw_ctx {
                 unsigned long           state;          /* BLK_MQ_S_* flags */
         } ____cacheline_aligned_in_smp;
  
-       struct work_struct      run_work;
+       struct delayed_work     run_work;
         cpumask_var_t           cpumask;
         int                     next_cpu;
         int                     next_cpu_batch;
@@ -51,8 +51,6 @@ struct blk_mq_hw_ctx {
  
         atomic_t                nr_active;
  
-       struct delayed_work     delay_work;
-
         struct hlist_node       cpuhp_dead;
         struct kobject          kobj;
  
@@ -81,7 +79,6 @@ struct blk_mq_tag_set {
  
  struct blk_mq_queue_data {
         struct request *rq;
-       struct list_head *list;
         bool last;
  };
  
@@ -142,6 +139,14 @@ struct blk_mq_ops {
         reinit_request_fn       *reinit_request;
  
         map_queues_fn           *map_queues;
+
+#ifdef CONFIG_BLK_DEBUG_FS
+       /*
+        * Used by the debugfs implementation to show driver-specific
+        * information about a request.
+        */
+       void (*show_rq)(struct seq_file *m, struct request *rq);
+#endif
  };
  
  enum {
@@ -152,7 +157,6 @@ enum {
         BLK_MQ_F_SHOULD_MERGE   = 1 << 0,
         BLK_MQ_F_TAG_SHARED     = 1 << 1,
         BLK_MQ_F_SG_MERGE       = 1 << 2,
-       BLK_MQ_F_DEFER_ISSUE    = 1 << 4,
         BLK_MQ_F_BLOCKING       = 1 << 5,
         BLK_MQ_F_NO_SCHED       = 1 << 6,
         BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
@@ -162,6 +166,7 @@ enum {
         BLK_MQ_S_TAG_ACTIVE     = 1,
         BLK_MQ_S_SCHED_RESTART  = 2,
         BLK_MQ_S_TAG_WAITING    = 3,
+       BLK_MQ_S_START_ON_RUN   = 4,
  
         BLK_MQ_MAX_DEPTH        = 10240,
  
@@ -229,7 +234,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
  void blk_mq_kick_requeue_list(struct request_queue *q);
  void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
  void blk_mq_abort_requeue_list(struct request_queue *q);
-void blk_mq_complete_request(struct request *rq, int error);
+void blk_mq_complete_request(struct request *rq);
  
  bool blk_mq_queue_stopped(struct request_queue *q);
  void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
@@ -238,13 +243,15 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
  void blk_mq_start_hw_queues(struct request_queue *q);
  void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
  void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
+void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
  void blk_mq_run_hw_queues(struct request_queue *q, bool async);
  void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
  void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
                 busy_tag_iter_fn *fn, void *priv);
  void blk_mq_freeze_queue(struct request_queue *q);
  void blk_mq_unfreeze_queue(struct request_queue *q);
-void blk_mq_freeze_queue_start(struct request_queue *q);
+void blk_freeze_queue_start(struct request_queue *q);
  void blk_mq_freeze_queue_wait(struct request_queue *q);
  int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
                                      unsigned long timeout);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h

index d703acb55d0f0d196296ef4d82f4e97a5efd9c3a..61339bc444006a477bf3e84b32a1d8199a9e5f01 100644 (file)
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,6 +17,10 @@ struct io_context;
  struct cgroup_subsys_state;
  typedef void (bio_end_io_t) (struct bio *);
  
+struct blk_issue_stat {
+       u64 stat;
+};
+
  /*
   * main unit of I/O for the block layer and lower layers (ie drivers and
   * stacking drivers)
@@ -29,7 +33,7 @@ struct bio {
                                                  * top bits REQ_OP. Use
                                                  * accessors.
                                                  */
-       unsigned short          bi_flags;       /* status, command, etc */
+       unsigned short          bi_flags;       /* status, etc and bvec pool number */
         unsigned short          bi_ioprio;
  
         struct bvec_iter        bi_iter;
@@ -58,6 +62,10 @@ struct bio {
          */
         struct io_context       *bi_ioc;
         struct cgroup_subsys_state *bi_css;
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+       void                    *bi_cg_private;
+       struct blk_issue_stat   bi_issue_stat;
+#endif
  #endif
         union {
  #if defined(CONFIG_BLK_DEV_INTEGRITY)
@@ -102,12 +110,9 @@ struct bio {
  #define BIO_REFFED     8       /* bio has elevated ->bi_cnt */
  #define BIO_THROTTLED  9       /* This bio has already been subjected to
                                  * throttling rules. Don't do it again. */
-
-/*
- * Flags starting here get preserved by bio_reset() - this includes
- * BVEC_POOL_IDX()
- */
-#define BIO_RESET_BITS 10
+#define BIO_TRACE_COMPLETION 10        /* bio_endio() should trace the final completion
+                                * of this bio. */
+/* See BVEC_POOL_OFFSET below before adding new flags */
  
  /*
   * We support 6 different bvec pools, the last one is magic in that it
@@ -117,13 +122,22 @@ struct bio {
  #define BVEC_POOL_MAX          (BVEC_POOL_NR - 1)
  
  /*
- * Top 4 bits of bio flags indicate the pool the bvecs came from.  We add
+ * Top 3 bits of bio flags indicate the pool the bvecs came from.  We add
   * 1 to the actual index so that 0 indicates that there are no bvecs to be
   * freed.
   */
-#define BVEC_POOL_BITS         (4)
+#define BVEC_POOL_BITS         (3)
  #define BVEC_POOL_OFFSET       (16 - BVEC_POOL_BITS)
  #define BVEC_POOL_IDX(bio)     ((bio)->bi_flags >> BVEC_POOL_OFFSET)
+#if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1)
+# error "BVEC_POOL_BITS is too small"
+#endif
+
+/*
+ * Flags starting here get preserved by bio_reset() - this includes
+ * only BVEC_POOL_IDX()
+ */
+#define BIO_RESET_BITS BVEC_POOL_OFFSET
  
  /*
   * Operations and flags common to the bio and request structures.
@@ -160,7 +174,7 @@ enum req_opf {
         /* write the same sector many times */
         REQ_OP_WRITE_SAME       = 7,
         /* write the zero filled sector many times */
-       REQ_OP_WRITE_ZEROES     = 8,
+       REQ_OP_WRITE_ZEROES     = 9,
  
         /* SCSI passthrough using struct scsi_request */
         REQ_OP_SCSI_IN          = 32,
@@ -187,6 +201,10 @@ enum req_flag_bits {
         __REQ_PREFLUSH,         /* request for cache flush */
         __REQ_RAHEAD,           /* read ahead, can fail anytime */
         __REQ_BACKGROUND,       /* background IO */
+
+       /* command specific flags for REQ_OP_WRITE_ZEROES: */
+       __REQ_NOUNMAP,          /* do not free blocks when zeroing */
+
         __REQ_NR_BITS,          /* stops here */
  };
  
@@ -204,6 +222,8 @@ enum req_flag_bits {
  #define REQ_RAHEAD             (1ULL << __REQ_RAHEAD)
  #define REQ_BACKGROUND         (1ULL << __REQ_BACKGROUND)
  
+#define REQ_NOUNMAP            (1ULL << __REQ_NOUNMAP)
+
  #define REQ_FAILFAST_MASK \
         (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
  
@@ -283,12 +303,6 @@ static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
         return (cookie & BLK_QC_T_INTERNAL) != 0;
  }
  
-struct blk_issue_stat {
-       u64 time;
-};
-
-#define BLK_RQ_STAT_BATCH      64
-
  struct blk_rq_stat {
         s64 mean;
         u64 min;
@@ -296,7 +310,6 @@ struct blk_rq_stat {
         s32 nr_samples;
         s32 nr_batch;
         u64 batch;
-       s64 time;
  };
  
  #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 796016e63c1da7b64c59f8d9a1b4979d8059027b..83d28623645f33f0b54596116905012bcc800826 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -40,15 +40,20 @@ struct blkcg_gq;
  struct blk_flush_queue;
  struct pr_ops;
  struct rq_wb;
+struct blk_queue_stats;
+struct blk_stat_callback;
  
  #define BLKDEV_MIN_RQ  4
  #define BLKDEV_MAX_RQ  128     /* Default maximum */
  
+/* Must be consisitent with blk_mq_poll_stats_bkt() */
+#define BLK_MQ_POLL_STATS_BKTS 16
+
  /*
   * Maximum number of blkcg policies allowed to be registered concurrently.
   * Defined here to simplify include dependency.
   */
-#define BLKCG_MAX_POLS         2
+#define BLKCG_MAX_POLS         3
  
  typedef void (rq_end_io_fn)(struct request *, int);
  
@@ -173,6 +178,7 @@ struct request {
                 struct rb_node rb_node; /* sort/lookup */
                 struct bio_vec special_vec;
                 void *completion_data;
+               int error_count; /* for legacy drivers, don't use */
         };
  
         /*
@@ -213,16 +219,14 @@ struct request {
  
         unsigned short ioprio;
  
-       void *special;          /* opaque pointer available for LLD use */
+       unsigned int timeout;
  
-       int errors;
+       void *special;          /* opaque pointer available for LLD use */
  
         unsigned int extra_len; /* length of alignment and padding */
  
         unsigned long deadline;
         struct list_head timeout_list;
-       unsigned int timeout;
-       int retries;
  
         /*
          * completion callback.
@@ -337,7 +341,6 @@ struct queue_limits {
         unsigned char           misaligned;
         unsigned char           discard_misaligned;
         unsigned char           cluster;
-       unsigned char           discard_zeroes_data;
         unsigned char           raid_partial_stripes_expensive;
         enum blk_zoned_model    zoned;
  };
@@ -388,6 +391,7 @@ struct request_queue {
         int                     nr_rqs[2];      /* # allocated [a]sync rqs */
         int                     nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
  
+       struct blk_queue_stats  *stats;
         struct rq_wb            *rq_wb;
  
         /*
@@ -435,7 +439,6 @@ struct request_queue {
         struct delayed_work     delay_work;
  
         struct backing_dev_info *backing_dev_info;
-       struct disk_devt        *disk_devt;
  
         /*
          * The queue owner gets to use this for whatever they like.
@@ -506,8 +509,6 @@ struct request_queue {
         unsigned int            nr_sorted;
         unsigned int            in_flight[2];
  
-       struct blk_rq_stat      rq_stats[2];
-
         /*
          * Number of active block driver functions for which blk_drain_queue()
          * must wait. Must be incremented around functions that unlock the
@@ -517,6 +518,10 @@ struct request_queue {
  
         unsigned int            rq_timeout;
         int                     poll_nsec;
+
+       struct blk_stat_callback        *poll_cb;
+       struct blk_rq_stat      poll_stat[BLK_MQ_POLL_STATS_BKTS];
+
         struct timer_list       timeout;
         struct work_struct      timeout_work;
         struct list_head        timeout_list;
@@ -611,7 +616,8 @@ struct request_queue {
  #define QUEUE_FLAG_FLUSH_NQ    25      /* flush not queueuable */
  #define QUEUE_FLAG_DAX         26      /* device supports DAX */
  #define QUEUE_FLAG_STATS       27      /* track rq completion times */
-#define QUEUE_FLAG_RESTART     28      /* queue needs restart at completion */
+#define QUEUE_FLAG_POLL_STATS  28      /* collecting stats for hybrid polling */
+#define QUEUE_FLAG_REGISTERED  29      /* queue has been registered to a disk */
  
  #define QUEUE_FLAG_DEFAULT     ((1 << QUEUE_FLAG_IO_STAT) |            \
                                  (1 << QUEUE_FLAG_STACKABLE)    |       \
@@ -920,6 +926,7 @@ extern int blk_register_queue(struct gendisk *disk);
  extern void blk_unregister_queue(struct gendisk *disk);
  extern blk_qc_t generic_make_request(struct bio *bio);
  extern void blk_rq_init(struct request_queue *q, struct request *rq);
+extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
  extern void blk_put_request(struct request *);
  extern void __blk_put_request(struct request_queue *, struct request *);
  extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
@@ -965,7 +972,7 @@ extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, uns
  extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
                                struct rq_map_data *, const struct iov_iter *,
                                gfp_t);
-extern int blk_execute_rq(struct request_queue *, struct gendisk *,
+extern void blk_execute_rq(struct request_queue *, struct gendisk *,
                           struct request *, int);
  extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
                                   struct request *, int, rq_end_io_fn *);
@@ -1082,20 +1089,6 @@ static inline unsigned int blk_rq_count_bios(struct request *rq)
         return nr_bios;
  }
  
-/*
- * blk_rq_set_prio - associate a request with prio from ioc
- * @rq: request of interest
- * @ioc: target iocontext
- *
- * Assocate request prio with ioc prio so request based drivers
- * can leverage priority information.
- */
-static inline void blk_rq_set_prio(struct request *rq, struct io_context *ioc)
-{
-       if (ioc)
-               rq->ioprio = ioc->ioprio;
-}
-
  /*
   * Request issue related functions.
   */
@@ -1122,13 +1115,10 @@ extern void blk_finish_request(struct request *rq, int error);
  extern bool blk_end_request(struct request *rq, int error,
                             unsigned int nr_bytes);
  extern void blk_end_request_all(struct request *rq, int error);
-extern bool blk_end_request_cur(struct request *rq, int error);
-extern bool blk_end_request_err(struct request *rq, int error);
  extern bool __blk_end_request(struct request *rq, int error,
                               unsigned int nr_bytes);
  extern void __blk_end_request_all(struct request *rq, int error);
  extern bool __blk_end_request_cur(struct request *rq, int error);
-extern bool __blk_end_request_err(struct request *rq, int error);
  
  extern void blk_complete_request(struct request *);
  extern void __blk_complete_request(struct request *);
@@ -1331,23 +1321,27 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
         return bqt->tag_index[tag];
  }
  
+extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
+extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp_mask, struct page *page);
  
  #define BLKDEV_DISCARD_SECURE  (1 << 0)        /* issue a secure erase */
-#define BLKDEV_DISCARD_ZERO    (1 << 1)        /* must reliably zero data */
  
-extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
  extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
  extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                 sector_t nr_sects, gfp_t gfp_mask, int flags,
                 struct bio **biop);
-extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
-               sector_t nr_sects, gfp_t gfp_mask, struct page *page);
+
+#define BLKDEV_ZERO_NOUNMAP    (1 << 0)  /* do not free blocks */
+#define BLKDEV_ZERO_NOFALLBACK (1 << 1)  /* don't write explicit zeroes */
+
  extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
                 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
-               bool discard);
+               unsigned flags);
  extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
-               sector_t nr_sects, gfp_t gfp_mask, bool discard);
+               sector_t nr_sects, gfp_t gfp_mask, unsigned flags);
+
  static inline int sb_issue_discard(struct super_block *sb, sector_t block,
                 sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
  {
@@ -1361,7 +1355,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
         return blkdev_issue_zeroout(sb->s_bdev,
                                     block << (sb->s_blocksize_bits - 9),
                                     nr_blocks << (sb->s_blocksize_bits - 9),
-                                   gfp_mask, true);
+                                   gfp_mask, 0);
  }
  
  extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
@@ -1531,19 +1525,6 @@ static inline int bdev_discard_alignment(struct block_device *bdev)
         return q->limits.discard_alignment;
  }
  
-static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
-{
-       if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
-               return 1;
-
-       return 0;
-}
-
-static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
-{
-       return queue_discard_zeroes_data(bdev_get_queue(bdev));
-}
-
  static inline unsigned int bdev_write_same(struct block_device *bdev)
  {
         struct request_queue *q = bdev_get_queue(bdev);
@@ -1674,12 +1655,36 @@ static inline bool bios_segs_mergeable(struct request_queue *q,
         return true;
  }
  
-static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
-                        struct bio *next)
+static inline bool bio_will_gap(struct request_queue *q,
+                               struct request *prev_rq,
+                               struct bio *prev,
+                               struct bio *next)
  {
         if (bio_has_data(prev) && queue_virt_boundary(q)) {
                 struct bio_vec pb, nb;
  
+               /*
+                * don't merge if the 1st bio starts with non-zero
+                * offset, otherwise it is quite difficult to respect
+                * sg gap limit. We work hard to merge a huge number of small
+                * single bios in case of mkfs.
+                */
+               if (prev_rq)
+                       bio_get_first_bvec(prev_rq->bio, &pb);
+               else
+                       bio_get_first_bvec(prev, &pb);
+               if (pb.bv_offset)
+                       return true;
+
+               /*
+                * We don't need to worry about the situation that the
+                * merged segment ends in unaligned virt boundary:
+                *
+                * - if 'pb' ends aligned, the merged segment ends aligned
+                * - if 'pb' ends unaligned, the next bio must include
+                *   one single bvec of 'nb', otherwise the 'nb' can't
+                *   merge with 'pb'
+                */
                 bio_get_last_bvec(prev, &pb);
                 bio_get_first_bvec(next, &nb);
  
@@ -1692,18 +1697,19 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
  
  static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
  {
-       return bio_will_gap(req->q, req->biotail, bio);
+       return bio_will_gap(req->q, req, req->biotail, bio);
  }
  
  static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
  {
-       return bio_will_gap(req->q, bio, req->bio);
+       return bio_will_gap(req->q, NULL, bio, req->bio);
  }
  
  int kblockd_schedule_work(struct work_struct *work);
  int kblockd_schedule_work_on(int cpu, struct work_struct *work);
  int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
  int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
+int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
  
  #ifdef CONFIG_BLK_CGROUP
  /*
diff --git a/include/linux/ccp.h b/include/linux/ccp.h

index c71dd8fa57640eab059ca21b2847609340b6b506..c41b8d99dd0e7f352bc8e57e4e2ffc4dd08c63b7 100644 (file)
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -556,7 +556,7 @@ enum ccp_engine {
   * struct ccp_cmd - CCP operation request
   * @entry: list element (ccp driver use only)
   * @work: work element used for callbacks (ccp driver use only)
- * @ccp: CCP device to be run on (ccp driver use only)
+ * @ccp: CCP device to be run on
   * @ret: operation return code (ccp driver use only)
   * @flags: cmd processing flags
   * @engine: CCP operation to perform
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h

index 1816c5e26581716b24d6d3fd0f036b9b739c467c..88cd5dc8e238a2fa7e8c66a034a8e5b0ae248f21 100644 (file)
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -48,6 +48,7 @@ struct ceph_options {
         unsigned long mount_timeout;            /* jiffies */
         unsigned long osd_idle_ttl;             /* jiffies */
         unsigned long osd_keepalive_timeout;    /* jiffies */
+       unsigned long osd_request_timeout;      /* jiffies */
  
         /*
          * any type that can't be simply compared or doesn't need need
@@ -68,6 +69,7 @@ struct ceph_options {
  #define CEPH_MOUNT_TIMEOUT_DEFAULT     msecs_to_jiffies(60 * 1000)
  #define CEPH_OSD_KEEPALIVE_DEFAULT     msecs_to_jiffies(5 * 1000)
  #define CEPH_OSD_IDLE_TTL_DEFAULT      msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0  /* no timeout */
  
  #define CEPH_MONC_HUNT_INTERVAL                msecs_to_jiffies(3 * 1000)
  #define CEPH_MONC_PING_INTERVAL                msecs_to_jiffies(10 * 1000)
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h

index 2ea0c282f3dc9326f7b3c4b7a3883758831ed251..c125b5d9e13ceddacd921286f19133607835dee1 100644 (file)
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -189,6 +189,7 @@ struct ceph_osd_request {
  
         /* internal */
         unsigned long r_stamp;                /* jiffies, send or check time */
+       unsigned long r_start_stamp;          /* jiffies */
         int r_attempts;
         struct ceph_eversion r_replay_version; /* aka reassert_version */
         u32 r_last_force_resend;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index f6b43fbb141c9ad03c1e05880e8c2228743d076e..af9c86e958bdad3be90cfb5f19aad99ede457339 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -570,6 +570,25 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
         pr_cont_kernfs_path(cgrp->kn);
  }
  
+static inline void cgroup_init_kthreadd(void)
+{
+       /*
+        * kthreadd is inherited by all kthreads, keep it in the root so
+        * that the new kthreads are guaranteed to stay in the root until
+        * initialization is finished.
+        */
+       current->no_cgroup_migration = 1;
+}
+
+static inline void cgroup_kthread_ready(void)
+{
+       /*
+        * This kthread finished initialization.  The creator should have
+        * set PF_NO_SETAFFINITY if this kthread should stay in the root.
+        */
+       current->no_cgroup_migration = 0;
+}
+
  #else /* !CONFIG_CGROUPS */
  
  struct cgroup_subsys_state;
@@ -590,6 +609,8 @@ static inline void cgroup_free(struct task_struct *p) {}
  
  static inline int cgroup_init_early(void) { return 0; }
  static inline int cgroup_init(void) { return 0; }
+static inline void cgroup_init_kthreadd(void) {}
+static inline void cgroup_kthread_ready(void) {}
  
  static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
                                                struct cgroup *ancestor)
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h

index 5d3053c34fb3d5c365ad1f5a44e39a03fb6926c8..6d7edc3082f98466566cd1ae67640d7e0bd976d3 100644 (file)
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -229,7 +229,7 @@ static inline void tick_setup_hrtimer_broadcast(void) { }
  
  #ifdef CONFIG_CLKEVT_PROBE
  extern int clockevent_probe(void);
-#els
+#else
  static inline int clockevent_probe(void) { return 0; }
  #endif
  
diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h

index 5b8721efa948ec7d93fa7caeb79a535c51e8d69b..31e4e1f1547cc1698514d854898110446ba4a4f7 100644 (file)
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -15,7 +15,6 @@ struct venus_comm {
         struct list_head    vc_processing;
         int                 vc_inuse;
         struct super_block *vc_sb;
-       struct backing_dev_info bdi;
         struct mutex        vc_mutex;
  };
  
diff --git a/include/linux/dccp.h b/include/linux/dccp.h

index 61d042bbbf607253033d9948b291cab2322814ba..68449293c4b6233c1a1d4133b1819376a9310225 100644 (file)
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -163,6 +163,7 @@ struct dccp_request_sock {
         __u64                    dreq_isr;
         __u64                    dreq_gsr;
         __be32                   dreq_service;
+       spinlock_t               dreq_lock;
         struct list_head         dreq_featneg;
         __u32                    dreq_timestamp_echo;
         __u32                    dreq_timestamp_time;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h

index a7e6903866fdc98689bb5189cffd4b8cb7a715fa..c7ea33e38fb9e705baf2921f6cefe167574f9483 100644 (file)
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -254,6 +254,12 @@ struct dm_target {
          */
         unsigned num_write_same_bios;
  
+       /*
+        * The number of WRITE ZEROES bios that will be submitted to the target.
+        * The bio number can be accessed with dm_bio_get_target_bio_nr.
+        */
+       unsigned num_write_zeroes_bios;
+
         /*
          * The minimum number of extra bytes allocated in each io for the
          * target to use.
@@ -290,11 +296,6 @@ struct dm_target {
          * on max_io_len boundary.
          */
         bool split_discard_bios:1;
-
-       /*
-        * Set if this target does not return zeroes on discarded blocks.
-        */
-       bool discard_zeroes_data_unsupported:1;
  };
  
  /* Each target can link one of these into the table */
diff --git a/include/linux/device.h b/include/linux/device.h

index 30c4570e928dfe871bc84382f14eb49b5cac018e..9ef518af5515a01e202dee3cf4c27ffcd8c56441 100644 (file)
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1140,7 +1140,6 @@ static inline bool device_supports_offline(struct device *dev)
  extern void lock_device_hotplug(void);
  extern void unlock_device_hotplug(void);
  extern int lock_device_hotplug_sysfs(void);
-void assert_held_device_hotplug(void);
  extern int device_offline(struct device *dev);
  extern int device_online(struct device *dev);
  extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
diff --git a/include/linux/edac.h b/include/linux/edac.h

index 5b6adf964248dd478dd6d266ccc59468d26951ce..8ae0f45fafd650f57b10b9db50cef9d91578506c 100644 (file)
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -28,12 +28,10 @@ struct device;
  #define EDAC_OPSTATE_INT       2
  
  extern int edac_op_state;
-extern int edac_err_assert;
-extern atomic_t edac_handlers;
  
-extern int edac_handler_set(void);
-extern void edac_atomic_assert_error(void);
-extern struct bus_type *edac_get_sysfs_subsys(void);
+struct bus_type *edac_get_sysfs_subsys(void);
+int edac_get_report_status(void);
+void edac_set_report_status(int new);
  
  enum {
         EDAC_REPORTING_ENABLED,
@@ -41,28 +39,6 @@ enum {
         EDAC_REPORTING_FORCE
  };
  
-extern int edac_report_status;
-#ifdef CONFIG_EDAC
-static inline int get_edac_report_status(void)
-{
-       return edac_report_status;
-}
-
-static inline void set_edac_report_status(int new)
-{
-       edac_report_status = new;
-}
-#else
-static inline int get_edac_report_status(void)
-{
-       return EDAC_REPORTING_DISABLED;
-}
-
-static inline void set_edac_report_status(int new)
-{
-}
-#endif
-
  static inline void opstate_init(void)
  {
         switch (edac_op_state) {
diff --git a/include/linux/elevator.h b/include/linux/elevator.h

index aebecc4ed088f45c189162e560095363ac0e6d2a..3a216318ae739a05daa9e65a9f4c872bebc01c14 100644 (file)
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -93,6 +93,8 @@ struct blk_mq_hw_ctx;
  struct elevator_mq_ops {
         int (*init_sched)(struct request_queue *, struct elevator_type *);
         void (*exit_sched)(struct elevator_queue *);
+       int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
+       void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
  
         bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
         bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
@@ -104,7 +106,7 @@ struct elevator_mq_ops {
         void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
         struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
         bool (*has_work)(struct blk_mq_hw_ctx *);
-       void (*completed_request)(struct blk_mq_hw_ctx *, struct request *);
+       void (*completed_request)(struct request *);
         void (*started_request)(struct request *);
         void (*requeue_request)(struct request *);
         struct request *(*former_request)(struct request_queue *, struct request *);
@@ -211,7 +213,7 @@ extern ssize_t elv_iosched_show(struct request_queue *, char *);
  extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
  
  extern int elevator_init(struct request_queue *, char *);
-extern void elevator_exit(struct elevator_queue *);
+extern void elevator_exit(struct request_queue *, struct elevator_queue *);
  extern int elevator_change(struct request_queue *, const char *);
  extern bool elv_bio_merge_ok(struct request *, struct bio *);
  extern struct elevator_queue *elevator_alloc(struct request_queue *,
diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h

index 9ca23fcfb5d73131b564ad81d887929abc0e954b..6fdfc884fdeb3d3cf81dcbd40c52c0b8c8d203b1 100644 (file)
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -20,6 +20,8 @@ struct sock_exterr_skb {
         struct sock_extended_err        ee;
         u16                             addr_offset;
         __be16                          port;
+       u8                              opt_stats:1,
+                                       unused:7;
  };
  
  #endif
diff --git a/include/linux/extcon.h b/include/linux/extcon.h

index 7010fb01a81a342e20abd9ac917d5c3f7aa3de14..7e206a9f88db81d2feb152ef4a98a9f0119af09e 100644 (file)
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -236,11 +236,11 @@ extern int extcon_set_property_capability(struct extcon_dev *edev,
                                 unsigned int id, unsigned int prop);
  
  /*
- * Following APIs are to monitor every action of a notifier.
- * Registrar gets notified for every external port of a connection device.
- * Probably this could be used to debug an action of notifier; however,
- * we do not recommend to use this for normal 'notifiee' device drivers who
- * want to be notified by a specific external port of the notifier.
+ * Following APIs are to monitor the status change of the external connectors.
+ * extcon_register_notifier(*edev, id, *nb) : Register a notifier block
+ *                     for specific external connector of the extcon.
+ * extcon_register_notifier_all(*edev, *nb) : Register a notifier block
+ *                     for all supported external connectors of the extcon.
   */
  extern int extcon_register_notifier(struct extcon_dev *edev, unsigned int id,
                                     struct notifier_block *nb);
@@ -253,6 +253,17 @@ extern void devm_extcon_unregister_notifier(struct device *dev,
                                 struct extcon_dev *edev, unsigned int id,
                                 struct notifier_block *nb);
  
+extern int extcon_register_notifier_all(struct extcon_dev *edev,
+                               struct notifier_block *nb);
+extern int extcon_unregister_notifier_all(struct extcon_dev *edev,
+                               struct notifier_block *nb);
+extern int devm_extcon_register_notifier_all(struct device *dev,
+                               struct extcon_dev *edev,
+                               struct notifier_block *nb);
+extern void devm_extcon_unregister_notifier_all(struct device *dev,
+                               struct extcon_dev *edev,
+                               struct notifier_block *nb);
+
  /*
   * Following API get the extcon device from devicetree.
   * This function use phandle of devicetree to get extcon device directly.
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 0c167fdee5f7d126ed4de7e1201d514d1402a5ca..fbf7b39e81035506b73ddc860e3029119104b255 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -409,6 +409,7 @@ struct bpf_prog {
         u16                     pages;          /* Number of allocated pages */
         kmemcheck_bitfield_begin(meta);
         u16                     jited:1,        /* Is our filter JIT'ed? */
+                               locked:1,       /* Program image locked? */
                                 gpl_compatible:1, /* Is filter GPL compatible? */
                                 cb_access:1,    /* Is control block accessed? */
                                 dst_needed:1,   /* Do we need dst entry? */
@@ -554,22 +555,29 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
  #ifdef CONFIG_ARCH_HAS_SET_MEMORY
  static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
  {
-       set_memory_ro((unsigned long)fp, fp->pages);
+       fp->locked = 1;
+       WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
  }
  
  static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
  {
-       set_memory_rw((unsigned long)fp, fp->pages);
+       if (fp->locked) {
+               WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
+               /* In case set_memory_rw() fails, we want to be the first
+                * to crash here instead of some random place later on.
+                */
+               fp->locked = 0;
+       }
  }
  
  static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
  {
-       set_memory_ro((unsigned long)hdr, hdr->pages);
+       WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
  }
  
  static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
  {
-       set_memory_rw((unsigned long)hdr, hdr->pages);
+       WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
  }
  #else
  static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
diff --git a/include/linux/fs.h b/include/linux/fs.h

index aad3fd0ff5f8314975c93af81d94b3ce88ddbdda..30e5c14bd743364efd77b554fbb97a8649a0f2d1 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2121,6 +2121,9 @@ extern int vfs_ustat(dev_t, struct kstatfs *);
  extern int freeze_super(struct super_block *super);
  extern int thaw_super(struct super_block *super);
  extern bool our_mnt(struct vfsmount *mnt);
+extern __printf(2, 3)
+int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
+extern int super_setup_bdi(struct super_block *sb);
  
  extern int current_umask(void);
  
@@ -2678,7 +2681,7 @@ static const char * const kernel_read_file_str[] = {
  
  static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id)
  {
-       if (id < 0 || id >= READING_MAX_ID)
+       if ((unsigned)id >= READING_MAX_ID)
                 return kernel_read_file_str[READING_UNKNOWN];
  
         return kernel_read_file_str[id];
diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h

index 547f81592ba134bce5d222a66405c782a91b80d8..10c1abfbac6c45d1049fdf9f1b1f133371521461 100644 (file)
--- a/include/linux/fscrypt_common.h
+++ b/include/linux/fscrypt_common.h
@@ -87,7 +87,6 @@ struct fscrypt_operations {
         unsigned int flags;
         const char *key_prefix;
         int (*get_context)(struct inode *, void *, size_t);
-       int (*prepare_context)(struct inode *);
         int (*set_context)(struct inode *, const void *, size_t, void *);
         int (*dummy_context)(struct inode *);
         bool (*is_encrypted)(struct inode *);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h

index a999d281a2f1e41ce6cb7613dc5ecd8e0d4797c8..acff9437e5c3776e48a9357af40291165719416d 100644 (file)
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -159,21 +159,14 @@ struct badblocks;
  #if defined(CONFIG_BLK_DEV_INTEGRITY)
  
  struct blk_integrity {
-       struct blk_integrity_profile    *profile;
-       unsigned char                   flags;
-       unsigned char                   tuple_size;
-       unsigned char                   interval_exp;
-       unsigned char                   tag_size;
+       const struct blk_integrity_profile      *profile;
+       unsigned char                           flags;
+       unsigned char                           tuple_size;
+       unsigned char                           interval_exp;
+       unsigned char                           tag_size;
  };
  
  #endif /* CONFIG_BLK_DEV_INTEGRITY */
-struct disk_devt {
-       atomic_t count;
-       void (*release)(struct disk_devt *disk_devt);
-};
-
-void put_disk_devt(struct disk_devt *disk_devt);
-void get_disk_devt(struct disk_devt *disk_devt);
  
  struct gendisk {
         /* major, first_minor and minors are input parameters only,
@@ -183,7 +176,6 @@ struct gendisk {
         int first_minor;
         int minors;                     /* maximum number of minors, =1 for
                                           * disks that can't be partitioned. */
-       struct disk_devt *disk_devt;
  
         char disk_name[DISK_NAME_LEN];  /* name of major driver */
         char *(*devnode)(struct gendisk *gd, umode_t *mode);
@@ -730,11 +722,9 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
  #if defined(CONFIG_BLK_DEV_INTEGRITY)
  extern void blk_integrity_add(struct gendisk *);
  extern void blk_integrity_del(struct gendisk *);
-extern void blk_integrity_revalidate(struct gendisk *);
  #else  /* CONFIG_BLK_DEV_INTEGRITY */
  static inline void blk_integrity_add(struct gendisk *disk) { }
  static inline void blk_integrity_del(struct gendisk *disk) { }
-static inline void blk_integrity_revalidate(struct gendisk *disk) { }
  #endif /* CONFIG_BLK_DEV_INTEGRITY */
  
  #else /* CONFIG_BLOCK */
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h

index 2484b2fcc6eb58d0139605359fe97b285df8e5f5..933d936566055de430f9db64ae152eb31785b7ff 100644 (file)
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
                                                 struct fwnode_handle *child,
                                                 enum gpiod_flags flags,
                                                 const char *label);
-/* FIXME: delete this helper when users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-                         const char *con_id, struct fwnode_handle *child)
-{
-       return devm_fwnode_get_index_gpiod_from_child(dev, con_id,
-                                                     0, child,
-                                                     GPIOD_ASIS,
-                                                     "?");
-}
  
  #else /* CONFIG_GPIOLIB */
  
@@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
         return ERR_PTR(-ENOSYS);
  }
  
-/* FIXME: delete this when all users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-                         const char *con_id, struct fwnode_handle *child)
-{
-       return ERR_PTR(-ENOSYS);
-}
-
  #endif /* CONFIG_GPIOLIB */
  
  static inline
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index 503099d8aada5351b2e30b04cf79e651d57a23d7..b857fc8cc2ecaef504a12e1a88d6e87fa38045ad 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
  struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
                                 pud_t *pud, int flags);
  int pmd_huge(pmd_t pmd);
-int pud_huge(pud_t pmd);
+int pud_huge(pud_t pud);
  unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                 unsigned long address, unsigned long end, pgprot_t newprot);
  
@@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
  #ifndef pgd_huge
  #define pgd_huge(x)    0
  #endif
+#ifndef p4d_huge
+#define p4d_huge(x)    0
+#endif
  
  #ifndef pgd_write
  static inline int pgd_write(pgd_t pgd)
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h

index 78d59dba563e33e96f6658bfb764268e97f99573..ceb751987c401e4015fc0c70ffb13d8d5b3eef27 100644 (file)
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -88,6 +88,7 @@ enum hwmon_temp_attributes {
  #define HWMON_T_CRIT_HYST      BIT(hwmon_temp_crit_hyst)
  #define HWMON_T_EMERGENCY      BIT(hwmon_temp_emergency)
  #define HWMON_T_EMERGENCY_HYST BIT(hwmon_temp_emergency_hyst)
+#define HWMON_T_ALARM          BIT(hwmon_temp_alarm)
  #define HWMON_T_MIN_ALARM      BIT(hwmon_temp_min_alarm)
  #define HWMON_T_MAX_ALARM      BIT(hwmon_temp_max_alarm)
  #define HWMON_T_CRIT_ALARM     BIT(hwmon_temp_crit_alarm)
@@ -336,7 +337,7 @@ struct hwmon_ops {
         int (*read)(struct device *dev, enum hwmon_sensor_types type,
                     u32 attr, int channel, long *val);
         int (*read_string)(struct device *dev, enum hwmon_sensor_types type,
-                   u32 attr, int channel, char **str);
+                   u32 attr, int channel, const char **str);
         int (*write)(struct device *dev, enum hwmon_sensor_types type,
                      u32 attr, int channel, long val);
  };
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h

index 62bbf3c1aa4a04409fac1a001b03a87cf0162fd1..970771a5f7390268abb5c43ba1e1c8df48f2caa4 100644 (file)
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -845,6 +845,13 @@ struct vmbus_channel {
          * link up channels based on their CPU affinity.
          */
         struct list_head percpu_list;
+
+       /*
+        * Defer freeing channel until after all cpu's have
+        * gone through grace period.
+        */
+       struct rcu_head rcu;
+
         /*
          * For performance critical channels (storage, networking
          * etc,), Hyper-V has a mechanism to enhance the throughput
@@ -1430,9 +1437,6 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
                                 const int *srv_version, int srv_vercnt,
                                 int *nego_fw_version, int *nego_srv_version);
  
-void hv_event_tasklet_disable(struct vmbus_channel *channel);
-void hv_event_tasklet_enable(struct vmbus_channel *channel);
-
  void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
  
  void vmbus_setevent(struct vmbus_channel *channel);
diff --git a/include/linux/ide.h b/include/linux/ide.h

index 2f51c1724b5af647423770dfadafaa7a8837b600..6980ca322074b9cd80f69517195a02ea837c3f34 100644 (file)
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -88,7 +88,7 @@ static inline bool ata_pm_request(struct request *rq)
                  ide_req(rq)->type == ATA_PRIV_PM_RESUME);
  }
  
-/* Error codes returned in rq->errors to the higher part of the driver. */
+/* Error codes returned in result to the higher part of the driver. */
  enum {
         IDE_DRV_ERROR_GENERAL   = 101,
         IDE_DRV_ERROR_FILEMARK  = 102,
diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h

index 23ca41515527965239a56e92b909d53e75055cd4..fa7931933067982b2bac57f4c73aac039db8f692 100644 (file)
--- a/include/linux/iio/sw_device.h
+++ b/include/linux/iio/sw_device.h
@@ -62,7 +62,7 @@ void iio_swd_group_init_type_name(struct iio_sw_device *d,
                                   const char *name,
                                   struct config_item_type *type)
  {
-#ifdef CONFIG_CONFIGFS_FS
+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
         config_group_init_type_name(&d->group, name, type);
  #endif
  }
diff --git a/include/linux/inet.h b/include/linux/inet.h

index 4cca05c9678e760a07c0c8dcd88f73cc90ddfdc1..636ebe87e6f886a2edbb6e3e8bd01b360998127a 100644 (file)
--- a/include/linux/inet.h
+++ b/include/linux/inet.h
@@ -43,6 +43,8 @@
  #define _LINUX_INET_H
  
  #include <linux/types.h>
+#include <net/net_namespace.h>
+#include <linux/socket.h>
  
  /*
   * These mimic similar macros defined in user-space for inet_ntop(3).
@@ -54,4 +56,8 @@
  extern __be32 in_aton(const char *str);
  extern int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end);
  extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end);
+
+extern int inet_pton_with_scope(struct net *net, unsigned short af,
+               const char *src, const char *port, struct sockaddr_storage *addr);
+
  #endif /* _LINUX_INET_H */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h

index 6a6de187ddc0ff1e0e737f94261211b7ea4408e9..2e4de0deee531adbd7c1cd8ff1d25e0d0eb98d47 100644 (file)
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -125,9 +125,16 @@ enum iommu_attr {
  };
  
  /* These are the possible reserved region types */
-#define IOMMU_RESV_DIRECT      (1 << 0)
-#define IOMMU_RESV_RESERVED    (1 << 1)
-#define IOMMU_RESV_MSI         (1 << 2)
+enum iommu_resv_type {
+       /* Memory regions which must be mapped 1:1 at all times */
+       IOMMU_RESV_DIRECT,
+       /* Arbitrary "never map this or give it to a device" address ranges */
+       IOMMU_RESV_RESERVED,
+       /* Hardware MSI region (untranslated) */
+       IOMMU_RESV_MSI,
+       /* Software-managed MSI translation window */
+       IOMMU_RESV_SW_MSI,
+};
  
  /**
   * struct iommu_resv_region - descriptor for a reserved memory region
@@ -142,7 +149,7 @@ struct iommu_resv_region {
         phys_addr_t             start;
         size_t                  length;
         int                     prot;
-       int                     type;
+       enum iommu_resv_type    type;
  };
  
  #ifdef CONFIG_IOMMU_API
@@ -288,7 +295,8 @@ extern void iommu_get_resv_regions(struct device *dev, struct list_head *list);
  extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
  extern int iommu_request_dm_for_dev(struct device *dev);
  extern struct iommu_resv_region *
-iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type);
+iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot,
+                       enum iommu_resv_type type);
  extern int iommu_get_group_resv_regions(struct iommu_group *group,
                                         struct list_head *head);
  
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h

index 672cfef72fc85da6a30579b00bf7e1349758a3ac..97cbca19430d82aa2b4c835db1edf2d6620517ba 100644 (file)
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -373,6 +373,8 @@
  #define ICC_IGRPEN0_EL1_MASK           (1 << ICC_IGRPEN0_EL1_SHIFT)
  #define ICC_IGRPEN1_EL1_SHIFT          0
  #define ICC_IGRPEN1_EL1_MASK           (1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB                        (1U << 2)
+#define ICC_SRE_EL1_DFB                        (1U << 1)
  #define ICC_SRE_EL1_SRE                        (1U << 0)
  
  /*
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h

index eafc965b3eb8487b67ae0db870e74f0471199fc2..dc30f3d057eb0801e9ae8d22fc3ce11943d188d1 100644 (file)
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -96,6 +96,9 @@
  #define GICH_MISR_EOI                  (1 << 0)
  #define GICH_MISR_U                    (1 << 1)
  
+#define GICV_PMR_PRIORITY_SHIFT                3
+#define GICV_PMR_PRIORITY_MASK         (0x1f << GICV_PMR_PRIORITY_SHIFT)
+
  #ifndef __ASSEMBLY__
  
  #include <linux/irqdomain.h>
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h

index 188eced6813eddb9c313fdb59016b972835e7674..9f3616085423cfca654264a4f5b9fed022431997 100644 (file)
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode(
  {
         return NULL;
  }
+static inline bool irq_domain_check_msi_remap(void)
+{
+       return false;
+}
  #endif /* !CONFIG_IRQ_DOMAIN */
  
  #endif /* _LINUX_IRQDOMAIN_H */
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h

index 8e06d758ee48a2d92da7b9cfba79175334423b48..2afd74b9d844095375be39342e78b5a6749e2a52 100644 (file)
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -90,6 +90,13 @@ extern bool static_key_initialized;
  struct static_key {
         atomic_t enabled;
  /*
+ * Note:
+ *   To make anonymous unions work with old compilers, the static
+ *   initialization of them requires brackets. This creates a dependency
+ *   on the order of the struct with the initializers. If any fields
+ *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
+ *   to be modified.
+ *
   * bit 0 => 1 if key is initially true
   *         0 if initially false
   * bit 1 => 1 if points to struct static_key_mod
@@ -166,10 +173,10 @@ extern void static_key_disable(struct static_key *key);
   */
  #define STATIC_KEY_INIT_TRUE                                   \
         { .enabled = { 1 },                                     \
-         .entries = (void *)JUMP_TYPE_TRUE }
+         { .entries = (void *)JUMP_TYPE_TRUE } }
  #define STATIC_KEY_INIT_FALSE                                  \
         { .enabled = { 0 },                                     \
-         .entries = (void *)JUMP_TYPE_FALSE }
+         { .entries = (void *)JUMP_TYPE_FALSE } }
  
  #else  /* !HAVE_JUMP_LABEL */
  
diff --git a/include/linux/kasan.h b/include/linux/kasan.h

index ceb3fe78a0d39d6b1268c0b92485b8ee110e0b4a..a5c7046f26b4b93f9199c2f4800b7a1da77791da 100644 (file)
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -6,6 +6,7 @@
  struct kmem_cache;
  struct page;
  struct vm_struct;
+struct task_struct;
  
  #ifdef CONFIG_KASAN
  
@@ -18,6 +19,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
  extern pte_t kasan_zero_pte[PTRS_PER_PTE];
  extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
  extern pud_t kasan_zero_pud[PTRS_PER_PUD];
+extern p4d_t kasan_zero_p4d[PTRS_PER_P4D];
  
  void kasan_populate_zero_shadow(const void *shadow_start,
                                 const void *shadow_end);
@@ -74,6 +76,9 @@ size_t ksize(const void *);
  static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
  size_t kasan_metadata_size(struct kmem_cache *cache);
  
+bool kasan_save_enable_multi_shot(void);
+void kasan_restore_multi_shot(bool enabled);
+
  #else /* CONFIG_KASAN */
  
  static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
diff --git a/include/linux/kobject.h b/include/linux/kobject.h

index e6284591599ec5361b9abcffb2f731512b76eec9..ca85cb80e99a60e33df79e2ed0d8815953d1d21d 100644 (file)
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -108,6 +108,8 @@ extern int __must_check kobject_rename(struct kobject *, const char *new_name);
  extern int __must_check kobject_move(struct kobject *, struct kobject *);
  
  extern struct kobject *kobject_get(struct kobject *kobj);
+extern struct kobject * __must_check kobject_get_unless_zero(
+                                               struct kobject *kobj);
  extern void kobject_put(struct kobject *kobj);
  
  extern const void *kobject_namespace(struct kobject *kobj);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index 2c14ad9809da94bde727f3ebc744fabd47673f98..d0250744507a284138d0e7e702bb64e1d205e769 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
                     int len, void *val);
  int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                             int len, struct kvm_io_device *dev);
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-                             struct kvm_io_device *dev);
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+                              struct kvm_io_device *dev);
  struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
                                          gpa_t addr);
  
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h

index ca45e4a088a91eb929a208de5ffb061b33c9ff2c..7dfa56ebbc6d0f1bd4e215c93d5e642b3231334b 100644 (file)
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -56,7 +56,6 @@ typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
  typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
  typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
  typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *);
  typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
  typedef void (nvm_destroy_dma_pool_fn)(void *);
  typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
@@ -70,7 +69,6 @@ struct nvm_dev_ops {
         nvm_op_set_bb_fn        *set_bb_tbl;
  
         nvm_submit_io_fn        *submit_io;
-       nvm_erase_blk_fn        *erase_block;
  
         nvm_create_dma_pool_fn  *create_dma_pool;
         nvm_destroy_dma_pool_fn *destroy_dma_pool;
@@ -125,7 +123,7 @@ enum {
         /* NAND Access Modes */
         NVM_IO_SUSPEND          = 0x80,
         NVM_IO_SLC_MODE         = 0x100,
-       NVM_IO_SCRAMBLE_DISABLE = 0x200,
+       NVM_IO_SCRAMBLE_ENABLE  = 0x200,
  
         /* Block Types */
         NVM_BLK_T_FREE          = 0x0,
@@ -438,7 +436,8 @@ static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
  
  typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
  typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *);
+typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
+                               int flags);
  typedef void (nvm_tgt_exit_fn)(void *);
  typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *);
  typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *);
@@ -479,10 +478,10 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
                               int, int);
  extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
  extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
-extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
+extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int);
+extern int nvm_set_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *,
                                         const struct ppa_addr *, int, int);
-extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
-extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int);
+extern void nvm_free_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *);
  extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
                            void *);
  extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h

index b01fe100908430708df0df5162594b497ffdad62..87ff4f58a2f0182ec0586c0dee923bc30e004149 100644 (file)
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -29,6 +29,11 @@ struct hlist_nulls_node {
         ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
  
  #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_nulls_entry_safe(ptr, type, member) \
+       ({ typeof(ptr) ____ptr = (ptr); \
+          !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
+       })
  /**
   * ptr_is_a_nulls - Test if a ptr is a nulls
   * @ptr: ptr to be tested
diff --git a/include/linux/mailbox/brcm-message.h b/include/linux/mailbox/brcm-message.h

index 6b55c938b4015f23ae2f3deee224817902a958c8..c20b4843fc2d60ab9a0249ff7d08799a6bcab1d8 100644 (file)
--- a/include/linux/mailbox/brcm-message.h
+++ b/include/linux/mailbox/brcm-message.h
@@ -16,6 +16,7 @@
  
  enum brcm_message_type {
         BRCM_MESSAGE_UNKNOWN = 0,
+       BRCM_MESSAGE_BATCH,
         BRCM_MESSAGE_SPU,
         BRCM_MESSAGE_SBA,
         BRCM_MESSAGE_MAX,
@@ -23,23 +24,28 @@ enum brcm_message_type {
  
  struct brcm_sba_command {
         u64 cmd;
+       u64 *cmd_dma;
+       dma_addr_t cmd_dma_addr;
  #define BRCM_SBA_CMD_TYPE_A            BIT(0)
  #define BRCM_SBA_CMD_TYPE_B            BIT(1)
  #define BRCM_SBA_CMD_TYPE_C            BIT(2)
  #define BRCM_SBA_CMD_HAS_RESP          BIT(3)
  #define BRCM_SBA_CMD_HAS_OUTPUT                BIT(4)
         u64 flags;
-       dma_addr_t input;
-       size_t input_len;
         dma_addr_t resp;
         size_t resp_len;
-       dma_addr_t output;
-       size_t output_len;
+       dma_addr_t data;
+       size_t data_len;
  };
  
  struct brcm_message {
         enum brcm_message_type type;
         union {
+               struct {
+                       struct brcm_message *msgs;
+                       unsigned int msgs_queued;
+                       unsigned int msgs_count;
+               } batch;
                 struct {
                         struct scatterlist *src;
                         struct scatterlist *dst;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index 5af37730388074ff65d4e42143c40c2880b5aff7..bb7250c45cb8356b03df2d4b9f1325dc6e30069b 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -740,6 +740,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
         return false;
  }
  
+static inline void mem_cgroup_update_page_stat(struct page *page,
+                                              enum mem_cgroup_stat_index idx,
+                                              int nr)
+{
+}
+
  static inline void mem_cgroup_inc_page_stat(struct page *page,
                                             enum mem_cgroup_stat_index idx)
  {
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h

index 7a01c94496f14eac3c1ed13b37a9811cc1b6f853..3eef9fb9968ae730716a79bc9e3aef8be4e2e650 100644 (file)
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -35,10 +35,11 @@
   * Max bus-specific overhead incurred by request/responses.
   * I2C requires 1 additional byte for requests.
   * I2C requires 2 additional bytes for responses.
+ * SPI requires up to 32 additional bytes for responses.
   * */
  #define EC_PROTO_VERSION_UNKNOWN       0
  #define EC_MAX_REQUEST_OVERHEAD                1
-#define EC_MAX_RESPONSE_OVERHEAD       2
+#define EC_MAX_RESPONSE_OVERHEAD       32
  
  /*
   * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
diff --git a/include/linux/mg_disk.h b/include/linux/mg_disk.h

deleted file mode 100644 (file)

index e11f4d9..0000000
--- a/include/linux/mg_disk.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- *  include/linux/mg_disk.c
- *
- *  Private data for mflash platform driver
- *
- * (c) 2008 mGine Co.,LTD
- * (c) 2008 unsik Kim <donari75@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#ifndef __MG_DISK_H__
-#define __MG_DISK_H__
-
-/* name for platform device */
-#define MG_DEV_NAME "mg_disk"
-
-/* names of GPIO resource */
-#define MG_RST_PIN     "mg_rst"
-/* except MG_BOOT_DEV, reset-out pin should be assigned */
-#define MG_RSTOUT_PIN  "mg_rstout"
-
-/* device attribution */
-/* use mflash as boot device */
-#define MG_BOOT_DEV            (1 << 0)
-/* use mflash as storage device */
-#define MG_STORAGE_DEV         (1 << 1)
-/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */
-#define MG_STORAGE_DEV_SKIP_RST        (1 << 2)
-
-/* private driver data */
-struct mg_drv_data {
-       /* disk resource */
-       u32 use_polling;
-
-       /* device attribution */
-       u32 dev_attr;
-
-       /* internally used */
-       void *host;
-};
-
-#endif
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h

index 7e66e4f62858f395cd000226e9580785b03a4cf1..1beb1ec2fbdf339b34affc69508a5f5462b409b0 100644 (file)
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -476,6 +476,7 @@ enum {
  enum {
         MLX4_INTERFACE_STATE_UP         = 1 << 0,
         MLX4_INTERFACE_STATE_DELETION   = 1 << 1,
+       MLX4_INTERFACE_STATE_NOWAIT     = 1 << 2,
  };
  
  #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 0d65dd72c0f49e230613ac268d29c7b377962836..00a8fa7e366a0320210941ca39dd53fed97d4e2c 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -32,6 +32,8 @@ struct user_struct;
  struct writeback_control;
  struct bdi_writeback;
  
+void init_mm_internals(void);
+
  #ifndef CONFIG_NEED_MULTIPLE_NODES     /* Don't use mapnrs, do it properly */
  extern unsigned long max_mapnr;
  
@@ -1560,14 +1562,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
         return ptep;
  }
  
+#ifdef __PAGETABLE_P4D_FOLDED
+static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
+                                               unsigned long address)
+{
+       return 0;
+}
+#else
+int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+#endif
+
  #ifdef __PAGETABLE_PUD_FOLDED
-static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
+static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
                                                 unsigned long address)
  {
         return 0;
  }
  #else
-int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
  #endif
  
  #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
@@ -1619,11 +1631,22 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
   * Remove it when 4level-fixup.h has been removed.
   */
  #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
-static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+
+#ifndef __ARCH_HAS_5LEVEL_HACK
+static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
+               unsigned long address)
+{
+       return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?
+               NULL : p4d_offset(pgd, address);
+}
+
+static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
+               unsigned long address)
  {
-       return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
-               NULL: pud_offset(pgd, address);
+       return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
+               NULL : pud_offset(p4d, address);
  }
+#endif /* !__ARCH_HAS_5LEVEL_HACK */
  
  static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
  {
@@ -2385,7 +2408,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
  
  struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
  pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
-pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
+p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
+pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
  pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
  pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
  void *vmemmap_alloc_block(unsigned long size, int node);
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h

index aab032a6ae6124de63b7934f49d0dc564b0d2dc3..97ca105347a6c5e608297ae1a3562925dc6f6834 100644 (file)
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -53,7 +53,7 @@ struct sdio_func {
         unsigned int            state;          /* function state */
  #define SDIO_STATE_PRESENT     (1<<0)          /* present in sysfs */
  
-       u8                      tmpbuf[4];      /* DMA:able scratch buffer */
+       u8                      *tmpbuf;        /* DMA:able scratch buffer */
  
         unsigned                num_info;       /* number of info strings */
         const char              **info;         /* info strings */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h

index 51891fb0d3ce075e9343495e54de6d8d03211ac9..c91b3bcd158f8fa8b2dbc3d4738ade5fdcc600cb 100644 (file)
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -394,18 +394,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
         ___pud;                                                         \
  })
  
-#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd)           \
-({                                                                     \
-       unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;              \
-       pmd_t ___pmd;                                                   \
-                                                                       \
-       ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd);         \
-       mmu_notifier_invalidate_range(__mm, ___haddr,                   \
-                                     ___haddr + HPAGE_PMD_SIZE);       \
-                                                                       \
-       ___pmd;                                                         \
-})
-
  /*
   * set_pte_at_notify() sets the pte _after_ running the notifier.
   * This is safe to start by updating the secondary MMUs, because the primary MMU
@@ -489,7 +477,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
  #define        ptep_clear_flush_notify ptep_clear_flush
  #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
  #define pudp_huge_clear_flush_notify pudp_huge_clear_flush
-#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
  #define set_pte_at_notify set_pte_at
  
  #endif /* CONFIG_MMU_NOTIFIER */
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h

index eebdc63cf6af94a5ea9a5c4703c416f87e0079a8..79b176eca04a18c0c49c98a693e6ebe35e3eeee3 100644 (file)
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -334,11 +334,6 @@ struct mtd_info {
         int (*_get_device) (struct mtd_info *mtd);
         void (*_put_device) (struct mtd_info *mtd);
  
-       /* Backing device capabilities for this device
-        * - provides mmap capabilities
-        */
-       struct backing_dev_info *backing_dev_info;
-
         struct notifier_block reboot_notifier;  /* default mode before reboot */
  
         /* ECC status information */
diff --git a/include/linux/net.h b/include/linux/net.h

index cd0c8bd0a1dec0d2047509177ee8d33de1f185d4..0620f5e18c96b706b70fb71005b29008a11fffb1 100644 (file)
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -146,7 +146,7 @@ struct proto_ops {
         int             (*socketpair)(struct socket *sock1,
                                       struct socket *sock2);
         int             (*accept)    (struct socket *sock,
-                                     struct socket *newsock, int flags);
+                                     struct socket *newsock, int flags, bool kern);
         int             (*getname)   (struct socket *sock,
                                       struct sockaddr *addr,
                                       int *sockaddr_len, int peer);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h

index b34097c6784864193e67053f8fa8e5bbbc092071..e1502c55741ef805eb2fcc2c2dadfc07d2b0d0dc 100644 (file)
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -133,7 +133,6 @@ struct nfs_server {
         struct rpc_clnt *       client_acl;     /* ACL RPC client handle */
         struct nlm_host         *nlm_host;      /* NLM client handle */
         struct nfs_iostats __percpu *io_stats;  /* I/O statistics */
-       struct backing_dev_info backing_dev_info;
         atomic_long_t           writeback;      /* number of writeback pages */
         int                     flags;          /* various flags */
         unsigned int            caps;           /* server capabilities */
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h

index f21471f7ee407a80534047004ecd784ebdf347d5..0db37158a61d4e4e5af0d16d14c07510ef660330 100644 (file)
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -137,9 +137,9 @@ enum nvmefc_fcp_datadir {
   *             transferred. Should equal payload_length on success.
   * @rcv_rsplen: length, in bytes, of the FCP RSP IU received.
   * @status:    Completion status of the FCP operation. must be 0 upon success,
- *             NVME_SC_FC_xxx value upon failure. Note: this is NOT a
- *             reflection of the NVME CQE completion status. Only the status
- *             of the FCP operation at the NVME-FC level.
+ *             negative errno value upon failure (ex: -EIO). Note: this is
+ *             NOT a reflection of the NVME CQE completion status. Only the
+ *             status of the FCP operation at the NVME-FC level.
   */
  struct nvmefc_fcp_req {
         void                    *cmdaddr;
@@ -533,9 +533,6 @@ enum {
                                          * rsp as well
                                          */
         NVMET_FCOP_RSP          = 4,    /* send rsp frame */
-       NVMET_FCOP_ABORT        = 5,    /* abort exchange via ABTS */
-       NVMET_FCOP_BA_ACC       = 6,    /* send BA_ACC */
-       NVMET_FCOP_BA_RJT       = 7,    /* send BA_RJT */
  };
  
  /**
@@ -572,8 +569,6 @@ enum {
   *     upon compeletion of the operation.  The nvmet-fc layer will also set a
   *     private pointer for its own use in the done routine.
   *
- * Note: the LLDD must never fail a NVMET_FCOP_ABORT request !!
- *
   * Values set by the NVMET-FC layer prior to calling the LLDD fcp_op
   * entrypoint.
   * @op:       Indicates the FCP IU operation to perform (see NVMET_FCOP_xxx)
@@ -655,6 +650,22 @@ enum {
                  * on. The transport should pick a cpu to schedule the work
                  * on.
                  */
+       NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 2),
+               /* Bit 2: When 0, the LLDD is calling the cmd rcv handler
+                * in a non-isr context, allowing the transport to finish
+                * op completion in the calling context. When 1, the LLDD
+                * is calling the cmd rcv handler in an ISR context,
+                * requiring the transport to transition to a workqueue
+                * for op completion.
+                */
+       NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 3),
+               /* Bit 3: When 0, the LLDD is calling the op done handler
+                * in a non-isr context, allowing the transport to finish
+                * op completion in the calling context. When 1, the LLDD
+                * is calling the op done handler in an ISR context,
+                * requiring the transport to transition to a workqueue
+                * for op completion.
+                */
  };
  
  
@@ -725,12 +736,12 @@ struct nvmet_fc_target_port {
   *       be freed/released.
   *       Entrypoint is Mandatory.
   *
- * @fcp_op:  Called to perform a data transfer, transmit a response, or
- *       abort an FCP opertion. The nvmefc_tgt_fcp_req structure is the same
- *       LLDD-supplied exchange structure specified in the
- *       nvmet_fc_rcv_fcp_req() call made when the FCP CMD IU was received.
- *       The op field in the structure shall indicate the operation for
- *       the LLDD to perform relative to the io.
+ * @fcp_op:  Called to perform a data transfer or transmit a response.
+ *       The nvmefc_tgt_fcp_req structure is the same LLDD-supplied
+ *       exchange structure specified in the nvmet_fc_rcv_fcp_req() call
+ *       made when the FCP CMD IU was received. The op field in the
+ *       structure shall indicate the operation for the LLDD to perform
+ *       relative to the io.
   *         NVMET_FCOP_READDATA operation: the LLDD is to send the
   *           payload data (described by sglist) to the host in 1 or
   *           more FC sequences (preferrably 1).  Note: the fc-nvme layer
@@ -752,29 +763,31 @@ struct nvmet_fc_target_port {
   *           successfully, the LLDD is to update the nvmefc_tgt_fcp_req
   *           transferred_length field and may subsequently transmit the
   *           FCP_RSP iu payload (described by rspbuf, rspdma, rsplen).
- *           The LLDD is to await FCP_CONF reception to confirm the RSP
- *           reception by the host. The LLDD may retramsit the FCP_RSP iu
- *           if necessary per FC-NVME. Upon reception of FCP_CONF, or upon
- *           FCP_CONF failure, the LLDD is to set the nvmefc_tgt_fcp_req
- *           fcp_error field and consider the operation complete..
+ *           If FCP_CONF is supported, the LLDD is to await FCP_CONF
+ *           reception to confirm the RSP reception by the host. The LLDD
+ *           may retramsit the FCP_RSP iu if necessary per FC-NVME. Upon
+ *           transmission of the FCP_RSP iu if FCP_CONF is not supported,
+ *           or upon success/failure of FCP_CONF if it is supported, the
+ *           LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and
+ *           consider the operation complete.
   *         NVMET_FCOP_RSP: the LLDD is to transmit the FCP_RSP iu payload
- *           (described by rspbuf, rspdma, rsplen).  The LLDD is to await
- *           FCP_CONF reception to confirm the RSP reception by the host.
- *           The LLDD may retramsit the FCP_RSP iu if necessary per FC-NVME.
- *           Upon reception of FCP_CONF, or upon FCP_CONF failure, the
+ *           (described by rspbuf, rspdma, rsplen). If FCP_CONF is
+ *           supported, the LLDD is to await FCP_CONF reception to confirm
+ *           the RSP reception by the host. The LLDD may retramsit the
+ *           FCP_RSP iu if FCP_CONF is not received per FC-NVME. Upon
+ *           transmission of the FCP_RSP iu if FCP_CONF is not supported,
+ *           or upon success/failure of FCP_CONF if it is supported, the
   *           LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and
- *           consider the operation complete..
- *         NVMET_FCOP_ABORT: the LLDD is to terminate the exchange
- *           corresponding to the fcp operation. The LLDD shall send
- *           ABTS and follow FC exchange abort-multi rules, including
- *           ABTS retries and possible logout.
+ *           consider the operation complete.
   *       Upon completing the indicated operation, the LLDD is to set the
   *       status fields for the operation (tranferred_length and fcp_error
- *       status) in the request, then all the "done" routine
- *       indicated in the fcp request.  Upon return from the "done"
- *       routine for either a NVMET_FCOP_RSP or NVMET_FCOP_ABORT operation
- *       the fc-nvme layer will not longer reference the fcp request,
- *       allowing the LLDD to free/release the fcp request.
+ *       status) in the request, then call the "done" routine
+ *       indicated in the fcp request. After the operation completes,
+ *       regardless of whether the FCP_RSP iu was successfully transmit,
+ *       the LLDD-supplied exchange structure must remain valid until the
+ *       transport calls the fcp_req_release() callback to return ownership
+ *       of the exchange structure back to the LLDD so that it may be used
+ *       for another fcp command.
   *       Note: when calling the done routine for READDATA or WRITEDATA
   *       operations, the fc-nvme layer may immediate convert, in the same
   *       thread and before returning to the LLDD, the fcp operation to
@@ -786,6 +799,22 @@ struct nvmet_fc_target_port {
   *       Returns 0 on success, -<errno> on failure (Ex: -EIO)
   *       Entrypoint is Mandatory.
   *
+ * @fcp_abort:  Called by the transport to abort an active command.
+ *       The command may be in-between operations (nothing active in LLDD)
+ *       or may have an active WRITEDATA operation pending. The LLDD is to
+ *       initiate the ABTS process for the command and return from the
+ *       callback. The ABTS does not need to be complete on the command.
+ *       The fcp_abort callback inherently cannot fail. After the
+ *       fcp_abort() callback completes, the transport will wait for any
+ *       outstanding operation (if there was one) to complete, then will
+ *       call the fcp_req_release() callback to return the command's
+ *       exchange context back to the LLDD.
+ *
+ * @fcp_req_release:  Called by the transport to return a nvmefc_tgt_fcp_req
+ *       to the LLDD after all operations on the fcp operation are complete.
+ *       This may be due to the command completing or upon completion of
+ *       abort cleanup.
+ *
   * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
   *       supports for cpu affinitization.
   *       Value is Mandatory. Must be at least 1.
@@ -820,7 +849,11 @@ struct nvmet_fc_target_template {
         int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport,
                                 struct nvmefc_tgt_ls_req *tls_req);
         int (*fcp_op)(struct nvmet_fc_target_port *tgtport,
-                               struct nvmefc_tgt_fcp_req *);
+                               struct nvmefc_tgt_fcp_req *fcpreq);
+       void (*fcp_abort)(struct nvmet_fc_target_port *tgtport,
+                               struct nvmefc_tgt_fcp_req *fcpreq);
+       void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport,
+                               struct nvmefc_tgt_fcp_req *fcpreq);
  
         u32     max_hw_queues;
         u16     max_sgl_segments;
@@ -848,4 +881,7 @@ int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport,
                         struct nvmefc_tgt_fcp_req *fcpreq,
                         void *cmdiubuf, u32 cmdiubuf_len);
  
+void nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *tgtport,
+                       struct nvmefc_tgt_fcp_req *fcpreq);
+
  #endif /* _NVME_FC_DRIVER_H */
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h

index 4b45226bd604c5630fe73015e3a9876ced1cef2e..e997c4a49a8884e3b1167a830e7fdf0431365a3d 100644 (file)
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h
@@ -16,8 +16,7 @@
   */
  
  /*
- * This file contains definitions relative to FC-NVME r1.11 and a few
- * newer items
+ * This file contains definitions relative to FC-NVME r1.14 (16-020vB).
   */
  
  #ifndef _NVME_FC_H
@@ -47,8 +46,15 @@ struct nvme_fc_cmd_iu {
  
  #define NVME_FC_SIZEOF_ZEROS_RSP       12
  
+enum {
+       FCNVME_SC_SUCCESS               = 0,
+       FCNVME_SC_INVALID_FIELD         = 1,
+       FCNVME_SC_INVALID_CONNID        = 2,
+};
+
  struct nvme_fc_ersp_iu {
-       __u8                    rsvd0[2];
+       __u8                    status_code;
+       __u8                    rsvd1;
         __be16                  iu_len;
         __be32                  rsn;
         __be32                  xfrd_len;
@@ -58,7 +64,7 @@ struct nvme_fc_ersp_iu {
  };
  
  
-/* FC-NVME r1.03/16-119v0 NVME Link Services */
+/* FC-NVME Link Services */
  enum {
         FCNVME_LS_RSVD                  = 0,
         FCNVME_LS_RJT                   = 1,
@@ -68,7 +74,7 @@ enum {
         FCNVME_LS_DISCONNECT            = 5,
  };
  
-/* FC-NVME r1.03/16-119v0 NVME Link Service Descriptors */
+/* FC-NVME Link Service Descriptors */
  enum {
         FCNVME_LSDESC_RSVD              = 0x0,
         FCNVME_LSDESC_RQST              = 0x1,
@@ -92,7 +98,6 @@ static inline __be32 fcnvme_lsdesc_len(size_t sz)
         return cpu_to_be32(sz - (2 * sizeof(u32)));
  }
  
-
  struct fcnvme_ls_rqst_w0 {
         u8      ls_cmd;                 /* FCNVME_LS_xxx */
         u8      zeros[3];
@@ -106,8 +111,53 @@ struct fcnvme_lsdesc_rqst {
         __be32  rsvd12;
  };
  
+/* FC-NVME LS RJT reason_code values */
+enum fcnvme_ls_rjt_reason {
+       FCNVME_RJT_RC_NONE              = 0,
+       /* no reason - not to be sent */
+
+       FCNVME_RJT_RC_INVAL             = 0x01,
+       /* invalid NVMe_LS command code */
+
+       FCNVME_RJT_RC_LOGIC             = 0x03,
+       /* logical error */
+
+       FCNVME_RJT_RC_UNAB              = 0x09,
+       /* unable to perform command request */
+
+       FCNVME_RJT_RC_UNSUP             = 0x0b,
+       /* command not supported */
+
+       FCNVME_RJT_RC_INPROG            = 0x0e,
+       /* command already in progress */
  
+       FCNVME_RJT_RC_INV_ASSOC         = 0x40,
+       /* Invalid Association ID*/
  
+       FCNVME_RJT_RC_INV_CONN          = 0x41,
+       /* Invalid Connection ID*/
+
+       FCNVME_RJT_RC_VENDOR            = 0xff,
+       /* vendor specific error */
+};
+
+/* FC-NVME LS RJT reason_explanation values */
+enum fcnvme_ls_rjt_explan {
+       FCNVME_RJT_EXP_NONE             = 0x00,
+       /* No additional explanation */
+
+       FCNVME_RJT_EXP_OXID_RXID        = 0x17,
+       /* invalid OX_ID-RX_ID combination */
+
+       FCNVME_RJT_EXP_INSUF_RES        = 0x29,
+       /* insufficient resources */
+
+       FCNVME_RJT_EXP_UNAB_DATA        = 0x2a,
+       /* unable to supply requested data */
+
+       FCNVME_RJT_EXP_INV_LEN          = 0x2d,
+       /* Invalid payload length */
+};
  
  /* FCNVME_LSDESC_RJT */
  struct fcnvme_lsdesc_rjt {
@@ -119,15 +169,15 @@ struct fcnvme_lsdesc_rjt {
          * Reject reason and explanaction codes are generic
          * to ELs's from LS-3.
          */
-       u8      reason_code;
-       u8      reason_explanation;
+       u8      reason_code;            /* fcnvme_ls_rjt_reason */
+       u8      reason_explanation;     /* fcnvme_ls_rjt_explan */
  
         u8      vendor;
         __be32  rsvd12;
  };
  
  
-#define FCNVME_ASSOC_HOSTID_LEN                64
+#define FCNVME_ASSOC_HOSTID_LEN                16
  #define FCNVME_ASSOC_HOSTNQN_LEN       256
  #define FCNVME_ASSOC_SUBNQN_LEN                256
  
diff --git a/include/linux/nvme.h b/include/linux/nvme.h

index c43d435d422552d029bd569157d9aa352348747f..b625bacf37efaabd84e8735b2b304401cdb195fd 100644 (file)
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -64,26 +64,26 @@ enum {
   * RDMA_QPTYPE field
   */
  enum {
-       NVMF_RDMA_QPTYPE_CONNECTED      = 0, /* Reliable Connected */
-       NVMF_RDMA_QPTYPE_DATAGRAM       = 1, /* Reliable Datagram */
+       NVMF_RDMA_QPTYPE_CONNECTED      = 1, /* Reliable Connected */
+       NVMF_RDMA_QPTYPE_DATAGRAM       = 2, /* Reliable Datagram */
  };
  
  /* RDMA QP Service Type codes for Discovery Log Page entry TSAS
   * RDMA_QPTYPE field
   */
  enum {
-       NVMF_RDMA_PRTYPE_NOT_SPECIFIED  = 0, /* No Provider Specified */
-       NVMF_RDMA_PRTYPE_IB             = 1, /* InfiniBand */
-       NVMF_RDMA_PRTYPE_ROCE           = 2, /* InfiniBand RoCE */
-       NVMF_RDMA_PRTYPE_ROCEV2         = 3, /* InfiniBand RoCEV2 */
-       NVMF_RDMA_PRTYPE_IWARP          = 4, /* IWARP */
+       NVMF_RDMA_PRTYPE_NOT_SPECIFIED  = 1, /* No Provider Specified */
+       NVMF_RDMA_PRTYPE_IB             = 2, /* InfiniBand */
+       NVMF_RDMA_PRTYPE_ROCE           = 3, /* InfiniBand RoCE */
+       NVMF_RDMA_PRTYPE_ROCEV2         = 4, /* InfiniBand RoCEV2 */
+       NVMF_RDMA_PRTYPE_IWARP          = 5, /* IWARP */
  };
  
  /* RDMA Connection Management Service Type codes for Discovery Log Page
   * entry TSAS RDMA_CMS field
   */
  enum {
-       NVMF_RDMA_CMS_RDMA_CM   = 0, /* Sockets based enpoint addressing */
+       NVMF_RDMA_CMS_RDMA_CM   = 1, /* Sockets based endpoint addressing */
  };
  
  #define NVMF_AQ_DEPTH          32
@@ -245,6 +245,7 @@ enum {
         NVME_CTRL_ONCS_WRITE_ZEROES             = 1 << 3,
         NVME_CTRL_VWC_PRESENT                   = 1 << 0,
         NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
+       NVME_CTRL_OACS_DBBUF_SUPP               = 1 << 7,
  };
  
  struct nvme_lbaf {
@@ -603,6 +604,7 @@ enum nvme_admin_opcode {
         nvme_admin_download_fw          = 0x11,
         nvme_admin_ns_attach            = 0x15,
         nvme_admin_keep_alive           = 0x18,
+       nvme_admin_dbbuf                = 0x7C,
         nvme_admin_format_nvm           = 0x80,
         nvme_admin_security_send        = 0x81,
         nvme_admin_security_recv        = 0x82,
@@ -874,6 +876,16 @@ struct nvmf_property_get_command {
         __u8            resv4[16];
  };
  
+struct nvme_dbbuf {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __u32                   rsvd1[5];
+       __le64                  prp1;
+       __le64                  prp2;
+       __u32                   rsvd12[6];
+};
+
  struct nvme_command {
         union {
                 struct nvme_common_command common;
@@ -893,6 +905,7 @@ struct nvme_command {
                 struct nvmf_connect_command connect;
                 struct nvmf_property_set_command prop_set;
                 struct nvmf_property_get_command prop_get;
+               struct nvme_dbbuf dbbuf;
         };
  };
  
diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h

index 35d0fd7a4948e6e49eb7e75512461d935cccd161..fd0de00c0d777ed72cfab3fdbeb54e885c14be77 100644 (file)
--- a/include/linux/omap-gpmc.h
+++ b/include/linux/omap-gpmc.h
@@ -76,22 +76,12 @@ struct gpmc_timings;
  struct omap_nand_platform_data;
  struct omap_onenand_platform_data;
  
-#if IS_ENABLED(CONFIG_MTD_NAND_OMAP2)
-extern int gpmc_nand_init(struct omap_nand_platform_data *d,
-                         struct gpmc_timings *gpmc_t);
-#else
-static inline int gpmc_nand_init(struct omap_nand_platform_data *d,
-                                struct gpmc_timings *gpmc_t)
-{
-       return 0;
-}
-#endif
-
  #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
-extern void gpmc_onenand_init(struct omap_onenand_platform_data *d);
+extern int gpmc_onenand_init(struct omap_onenand_platform_data *d);
  #else
  #define board_onenand_data     NULL
-static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d)
+static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d)
  {
+       return 0;
  }
  #endif
diff --git a/include/linux/phy.h b/include/linux/phy.h

index 772476028a6507f356fe3946372a024588679c3f..fb38573371512338a7876652d95c7f811be1bd3f 100644 (file)
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -837,6 +837,10 @@ int genphy_read_status(struct phy_device *phydev);
  int genphy_suspend(struct phy_device *phydev);
  int genphy_resume(struct phy_device *phydev);
  int genphy_soft_reset(struct phy_device *phydev);
+static inline int genphy_no_soft_reset(struct phy_device *phydev)
+{
+       return 0;
+}
  void phy_driver_unregister(struct phy_driver *drv);
  void phy_drivers_unregister(struct phy_driver *drv, int n);
  int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
@@ -848,6 +852,7 @@ void phy_change_work(struct work_struct *work);
  void phy_mac_interrupt(struct phy_device *phydev, int new_link);
  void phy_start_machine(struct phy_device *phydev);
  void phy_stop_machine(struct phy_device *phydev);
+void phy_trigger_machine(struct phy_device *phydev, bool sync);
  int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
  int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
  int phy_ethtool_ksettings_get(struct phy_device *phydev,
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h

index 8ce2d87a238b84d432abca342f9920e42a8d0c42..5e45385c5bdc7af10cb9c75d53d933a16d6ba107 100644 (file)
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -145,8 +145,9 @@ struct pinctrl_desc {
  extern int pinctrl_register_and_init(struct pinctrl_desc *pctldesc,
                                      struct device *dev, void *driver_data,
                                      struct pinctrl_dev **pctldev);
+extern int pinctrl_enable(struct pinctrl_dev *pctldev);
  
-/* Please use pinctrl_register_and_init() instead */
+/* Please use pinctrl_register_and_init() and pinctrl_enable() instead */
  extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc,
                                 struct device *dev, void *driver_data);
  
diff --git a/include/linux/power/bq24190_charger.h b/include/linux/power/bq24190_charger.h

deleted file mode 100644 (file)

index 9f02837..0000000
--- a/include/linux/power/bq24190_charger.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Platform data for the TI bq24190 battery charger driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _BQ24190_CHARGER_H_
-#define _BQ24190_CHARGER_H_
-
-struct bq24190_platform_data {
-       unsigned int    gpio_int;       /* GPIO pin that's connected to INT# */
-};
-
-#endif
diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h

new file mode 100644 (file)

index 0000000..d60d4e2
--- /dev/null
+++ b/include/linux/purgatory.h
@@ -0,0 +1,23 @@
+#ifndef _LINUX_PURGATORY_H
+#define _LINUX_PURGATORY_H
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <uapi/linux/kexec.h>
+
+struct kexec_sha_region {
+       unsigned long start;
+       unsigned long len;
+};
+
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ *    lookup in kexec works
+ */
+extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX];
+extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE];
+
+#endif
diff --git a/include/linux/random.h b/include/linux/random.h

index 7bd2403e4fef1ad7fb0a5f03b4e104e96234d26b..ed5c3838780de5ba9509071bef56e8d521dc5782 100644 (file)
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -37,14 +37,26 @@ extern void get_random_bytes(void *buf, int nbytes);
  extern int add_random_ready_callback(struct random_ready_callback *rdy);
  extern void del_random_ready_callback(struct random_ready_callback *rdy);
  extern void get_random_bytes_arch(void *buf, int nbytes);
-extern int random_int_secret_init(void);
  
  #ifndef MODULE
  extern const struct file_operations random_fops, urandom_fops;
  #endif
  
-unsigned int get_random_int(void);
-unsigned long get_random_long(void);
+u32 get_random_u32(void);
+u64 get_random_u64(void);
+static inline unsigned int get_random_int(void)
+{
+       return get_random_u32();
+}
+static inline unsigned long get_random_long(void)
+{
+#if BITS_PER_LONG == 64
+       return get_random_u64();
+#else
+       return get_random_u32();
+#endif
+}
+
  unsigned long randomize_page(unsigned long start, unsigned long range);
  
  u32 prandom_u32(void);
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h

index 4ae95f7e8597b0b43575d04aaf524cf252761e6e..a23a3315318048eec1fc18678e17967f03eaa89b 100644 (file)
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
                 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
                 pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
  
+/**
+ * hlist_nulls_for_each_entry_safe -
+ *   iterate over list of given type safe against removal of list entry
+ * @tpos:      the type * to use as a loop cursor.
+ * @pos:       the &struct hlist_nulls_node to use as a loop cursor.
+ * @head:      the head for your list.
+ * @member:    the name of the hlist_nulls_node within the struct.
+ */
+#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member)               \
+       for (({barrier();}),                                                    \
+            pos = rcu_dereference_raw(hlist_nulls_first_rcu(head));            \
+               (!is_a_nulls(pos)) &&                                           \
+               ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member);        \
+                  pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
  #endif
  #endif
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h

index ad3e5158e586dc841e9cd37492ec7104d60e7a81..c9f795e9a2ee26aaf562e9a97a2fe2f963a2f054 100644 (file)
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -65,7 +65,7 @@ struct regulator_state {
         int uV; /* suspend voltage */
         unsigned int mode; /* suspend regulator operating mode */
         int enabled; /* is regulator enabled in this suspend state */
-       int disabled; /* is the regulator disbled in this suspend state */
+       int disabled; /* is the regulator disabled in this suspend state */
  };
  
  /**
diff --git a/include/linux/reset.h b/include/linux/reset.h

index 86b4ed75359e85345afb839483e5910bcf5fe6cb..13d8681210d545ab2dcedb472fa127b408446eae 100644 (file)
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -15,6 +15,9 @@ int reset_control_status(struct reset_control *rstc);
  struct reset_control *__of_reset_control_get(struct device_node *node,
                                      const char *id, int index, bool shared,
                                      bool optional);
+struct reset_control *__reset_control_get(struct device *dev, const char *id,
+                                         int index, bool shared,
+                                         bool optional);
  void reset_control_put(struct reset_control *rstc);
  struct reset_control *__devm_reset_control_get(struct device *dev,
                                      const char *id, int index, bool shared,
@@ -31,31 +34,26 @@ static inline int device_reset_optional(struct device *dev)
  
  static inline int reset_control_reset(struct reset_control *rstc)
  {
-       WARN_ON(1);
         return 0;
  }
  
  static inline int reset_control_assert(struct reset_control *rstc)
  {
-       WARN_ON(1);
         return 0;
  }
  
  static inline int reset_control_deassert(struct reset_control *rstc)
  {
-       WARN_ON(1);
         return 0;
  }
  
  static inline int reset_control_status(struct reset_control *rstc)
  {
-       WARN_ON(1);
         return 0;
  }
  
  static inline void reset_control_put(struct reset_control *rstc)
  {
-       WARN_ON(1);
  }
  
  static inline int __must_check device_reset(struct device *dev)
@@ -74,14 +72,21 @@ static inline struct reset_control *__of_reset_control_get(
                                         const char *id, int index, bool shared,
                                         bool optional)
  {
-       return ERR_PTR(-ENOTSUPP);
+       return optional ? NULL : ERR_PTR(-ENOTSUPP);
+}
+
+static inline struct reset_control *__reset_control_get(
+                                       struct device *dev, const char *id,
+                                       int index, bool shared, bool optional)
+{
+       return optional ? NULL : ERR_PTR(-ENOTSUPP);
  }
  
  static inline struct reset_control *__devm_reset_control_get(
                                         struct device *dev, const char *id,
                                         int index, bool shared, bool optional)
  {
-       return ERR_PTR(-ENOTSUPP);
+       return optional ? NULL : ERR_PTR(-ENOTSUPP);
  }
  
  #endif /* CONFIG_RESET_CONTROLLER */
@@ -107,8 +112,7 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id)
  #ifndef CONFIG_RESET_CONTROLLER
         WARN_ON(1);
  #endif
-       return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false,
-                                                                       false);
+       return __reset_control_get(dev, id, 0, false, false);
  }
  
  /**
@@ -136,22 +140,19 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id)
  static inline struct reset_control *reset_control_get_shared(
                                         struct device *dev, const char *id)
  {
-       return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true,
-                                                                       false);
+       return __reset_control_get(dev, id, 0, true, false);
  }
  
  static inline struct reset_control *reset_control_get_optional_exclusive(
                                         struct device *dev, const char *id)
  {
-       return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false,
-                                                                       true);
+       return __reset_control_get(dev, id, 0, false, true);
  }
  
  static inline struct reset_control *reset_control_get_optional_shared(
                                         struct device *dev, const char *id)
  {
-       return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true,
-                                                                       true);
+       return __reset_control_get(dev, id, 0, true, true);
  }
  
  /**
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h

index d4e0a204c118c7244e7e5d167cc3d0429832de01..a1904aadbc45004ba18c8db06f184a164908cd46 100644 (file)
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -175,6 +175,25 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth);
   */
  int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin);
  
+/**
+ * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap,
+ * limiting the depth used from each word.
+ * @sb: Bitmap to allocate from.
+ * @alloc_hint: Hint for where to start searching for a free bit.
+ * @shallow_depth: The maximum number of bits to allocate from a single word.
+ *
+ * This rather specific operation allows for having multiple users with
+ * different allocation limits. E.g., there can be a high-priority class that
+ * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow()
+ * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority
+ * class can only allocate half of the total bits in the bitmap, preventing it
+ * from starving out the high-priority class.
+ *
+ * Return: Non-negative allocated bit number if successful, -1 otherwise.
+ */
+int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
+                       unsigned long shallow_depth);
+
  /**
   * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap.
   * @sb: Bitmap to check.
@@ -325,6 +344,19 @@ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth);
   */
  int __sbitmap_queue_get(struct sbitmap_queue *sbq);
  
+/**
+ * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
+ * sbitmap_queue, limiting the depth used from each word, with preemption
+ * already disabled.
+ * @sbq: Bitmap queue to allocate from.
+ * @shallow_depth: The maximum number of bits to allocate from a single word.
+ * See sbitmap_get_shallow().
+ *
+ * Return: Non-negative allocated bit number if successful, -1 otherwise.
+ */
+int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
+                               unsigned int shallow_depth);
+
  /**
   * sbitmap_queue_get() - Try to allocate a free bit from a &struct
   * sbitmap_queue.
@@ -345,6 +377,29 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq,
         return nr;
  }
  
+/**
+ * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
+ * sbitmap_queue, limiting the depth used from each word.
+ * @sbq: Bitmap queue to allocate from.
+ * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to
+ *       sbitmap_queue_clear()).
+ * @shallow_depth: The maximum number of bits to allocate from a single word.
+ * See sbitmap_get_shallow().
+ *
+ * Return: Non-negative allocated bit number if successful, -1 otherwise.
+ */
+static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
+                                           unsigned int *cpu,
+                                           unsigned int shallow_depth)
+{
+       int nr;
+
+       *cpu = get_cpu();
+       nr = __sbitmap_queue_get_shallow(sbq, shallow_depth);
+       put_cpu();
+       return nr;
+}
+
  /**
   * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a
   * &struct sbitmap_queue.
diff --git a/include/linux/sched.h b/include/linux/sched.h

index d67eee84fd430f3c44b77d4ba007ec5d2dcabb2b..4cf9a59a4d08ed181f30d7cefa56db92f48408d2 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -604,6 +604,10 @@ struct task_struct {
  #ifdef CONFIG_COMPAT_BRK
         unsigned                        brk_randomized:1;
  #endif
+#ifdef CONFIG_CGROUPS
+       /* disallow userland-initiated cgroup migration */
+       unsigned                        no_cgroup_migration:1;
+#endif
  
         unsigned long                   atomic_flags; /* Flags requiring atomic access. */
  
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h

index 4a68c67912078ec765e25ad59b88da70feacb0b6..34fe92ce1ebd7c6e9dfa0dac94ff4512deb6f95f 100644 (file)
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -54,15 +54,16 @@ static inline u64 local_clock(void)
  }
  #else
  extern void sched_clock_init_late(void);
-/*
- * Architectures can set this to 1 if they have specified
- * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
- * but then during bootup it turns out that sched_clock()
- * is reliable after all:
- */
  extern int sched_clock_stable(void);
  extern void clear_sched_clock_stable(void);
  
+/*
+ * When sched_clock_stable(), __sched_clock_offset provides the offset
+ * between local_clock() and sched_clock().
+ */
+extern u64 __sched_clock_offset;
+
+
  extern void sched_clock_tick(void);
  extern void sched_clock_idle_sleep_event(void);
  extern void sched_clock_idle_wakeup_event(u64 delta_ns);
diff --git a/include/linux/stat.h b/include/linux/stat.h

index c76e524fb34b6af362c82103662454750826ac9e..64b6b3aece21aee52a2b7c9246862afb41d606f1 100644 (file)
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -26,6 +26,7 @@ struct kstat {
         unsigned int    nlink;
         uint32_t        blksize;        /* Preferred I/O size */
         u64             attributes;
+       u64             attributes_mask;
  #define KSTAT_ATTR_FS_IOC_FLAGS                                \
         (STATX_ATTR_COMPRESSED |                        \
          STATX_ATTR_IMMUTABLE |                         \
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h

index 9fba9dd33544087dc5dfc55733ec3e4cdb9a317f..9375d23a24e7aba97504d743e77d3b78f4d72493 100644 (file)
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -34,9 +34,9 @@ struct t10_pi_tuple {
  };
  
  
-extern struct blk_integrity_profile t10_pi_type1_crc;
-extern struct blk_integrity_profile t10_pi_type1_ip;
-extern struct blk_integrity_profile t10_pi_type3_crc;
-extern struct blk_integrity_profile t10_pi_type3_ip;
+extern const struct blk_integrity_profile t10_pi_type1_crc;
+extern const struct blk_integrity_profile t10_pi_type1_ip;
+extern const struct blk_integrity_profile t10_pi_type3_crc;
+extern const struct blk_integrity_profile t10_pi_type3_ip;
  
  #endif
diff --git a/include/linux/uio.h b/include/linux/uio.h

index 804e34c6f981de7402ba6a39bd0375b3a3dd0efe..f2d36a3d30052db827fab68e9278fc1b693068ca 100644 (file)
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -39,7 +39,10 @@ struct iov_iter {
         };
         union {
                 unsigned long nr_segs;
-               int idx;
+               struct {
+                       int idx;
+                       int start_idx;
+               };
         };
  };
  
@@ -81,6 +84,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
  size_t iov_iter_copy_from_user_atomic(struct page *page,
                 struct iov_iter *i, unsigned long offset, size_t bytes);
  void iov_iter_advance(struct iov_iter *i, size_t bytes);
+void iov_iter_revert(struct iov_iter *i, size_t bytes);
  int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
  size_t iov_iter_single_seg_count(const struct iov_iter *i);
  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h

index 1d0043dc34e427403a1c82458249bed83e87d3a0..de2a722fe3cf7c457352eb02548f0351a669ddf5 100644 (file)
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -50,4 +50,10 @@
  /* device can't handle Link Power Management */
  #define USB_QUIRK_NO_LPM                       BIT(10)
  
+/*
+ * Device reports its bInterval as linear frames instead of the
+ * USB 2.0 calculation.
+ */
+#define USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL  BIT(11)
+
  #endif /* __LINUX_USB_QUIRKS_H */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h

index be765234c0a2b999f0f2bfcdb9f36b2cc16fa0c6..32354b4b4b2ba5ae72034d00c3b1d43fa8c2a15c 100644 (file)
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -72,7 +72,7 @@ struct ucounts {
         struct hlist_node node;
         struct user_namespace *ns;
         kuid_t uid;
-       atomic_t count;
+       int count;
         atomic_t ucount[UCOUNT_COUNTS];
  };
  
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h

index 0468548acebfef5431ea7bfd6f565cfdfb73f348..48a3483dccb12360e288ffdd97a9bf8d9d9080a4 100644 (file)
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -61,8 +61,7 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
                                         unsigned long from, unsigned long to,
                                         unsigned long len);
  
-extern void userfaultfd_remove(struct vm_area_struct *vma,
-                              struct vm_area_struct **prev,
+extern bool userfaultfd_remove(struct vm_area_struct *vma,
                                unsigned long start,
                                unsigned long end);
  
@@ -72,8 +71,6 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
  extern void userfaultfd_unmap_complete(struct mm_struct *mm,
                                        struct list_head *uf);
  
-extern void userfaultfd_exit(struct mm_struct *mm);
-
  #else /* CONFIG_USERFAULTFD */
  
  /* mm helpers */
@@ -120,11 +117,11 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
  {
  }
  
-static inline void userfaultfd_remove(struct vm_area_struct *vma,
-                                     struct vm_area_struct **prev,
+static inline bool userfaultfd_remove(struct vm_area_struct *vma,
                                       unsigned long start,
                                       unsigned long end)
  {
+       return true;
  }
  
  static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
@@ -139,10 +136,6 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
  {
  }
  
-static inline void userfaultfd_exit(struct mm_struct *mm)
-{
-}
-
  #endif /* CONFIG_USERFAULTFD */
  
  #endif /* _LINUX_USERFAULTFD_K_H */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h

index 04b0d3f95043c66856c6a6f4cab078c4791d35e8..7edfbdb55a995d436bf9e999ce202d0ca0bf2550 100644 (file)
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -167,6 +167,7 @@ struct virtio_driver {
         unsigned int feature_table_size;
         const unsigned int *feature_table_legacy;
         unsigned int feature_table_size_legacy;
+       int (*validate)(struct virtio_device *dev);
         int (*probe)(struct virtio_device *dev);
         void (*scan)(struct virtio_device *dev);
         void (*remove)(struct virtio_device *dev);
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h

index 9638bfeb0d1f639ae310d1586b4e2fca567ba2f7..584f9a647ad4acca191ff6116a47c14da1385fa3 100644 (file)
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -48,6 +48,8 @@ struct virtio_vsock_pkt {
         struct virtio_vsock_hdr hdr;
         struct work_struct work;
         struct list_head list;
+       /* socket refcnt not held, only use for cancellation */
+       struct vsock_sock *vsk;
         void *buf;
         u32 len;
         u32 off;
@@ -56,6 +58,7 @@ struct virtio_vsock_pkt {
  
  struct virtio_vsock_pkt_info {
         u32 remote_cid, remote_port;
+       struct vsock_sock *vsk;
         struct msghdr *msg;
         u32 pkt_len;
         u16 type;
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h

index 6aa1b6cb58285d92ccd4a53d8de660669f518a6b..a80b7b59cf33418811217faca1b9c6b041dad814 100644 (file)
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -79,6 +79,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
                 THP_SPLIT_PAGE_FAILED,
                 THP_DEFERRED_SPLIT_PAGE,
                 THP_SPLIT_PMD,
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+               THP_SPLIT_PUD,
+#endif
                 THP_ZERO_PAGE_ALLOC,
                 THP_ZERO_PAGE_ALLOC_FAILED,
  #endif
diff --git a/include/linux/wait.h b/include/linux/wait.h

index aacb1282d19a38d7b633bf7aedcc40fa258584f8..db076ca7f11da03f474be67f792e1189b96425eb 100644 (file)
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -620,30 +620,19 @@ do {                                                                      \
         __ret;                                                          \
  })
  
+extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *);
+extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  
-#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
+#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \
  ({                                                                     \
-       int __ret = 0;                                                  \
+       int __ret;                                                      \
         DEFINE_WAIT(__wait);                                            \
         if (exclusive)                                                  \
                 __wait.flags |= WQ_FLAG_EXCLUSIVE;                      \
         do {                                                            \
-               if (likely(list_empty(&__wait.task_list)))              \
-                       __add_wait_queue_tail(&(wq), &__wait);          \
-               set_current_state(TASK_INTERRUPTIBLE);                  \
-               if (signal_pending(current)) {                          \
-                       __ret = -ERESTARTSYS;                           \
+               __ret = fn(&(wq), &__wait);                             \
+               if (__ret)                                              \
                         break;                                          \
-               }                                                       \
-               if (irq)                                                \
-                       spin_unlock_irq(&(wq).lock);                    \
-               else                                                    \
-                       spin_unlock(&(wq).lock);                        \
-               schedule();                                             \
-               if (irq)                                                \
-                       spin_lock_irq(&(wq).lock);                      \
-               else                                                    \
-                       spin_lock(&(wq).lock);                          \
         } while (!(condition));                                         \
         __remove_wait_queue(&(wq), &__wait);                            \
         __set_current_state(TASK_RUNNING);                              \
@@ -676,7 +665,7 @@ do {                                                                        \
   */
  #define wait_event_interruptible_locked(wq, condition)                 \
         ((condition)                                                    \
-        ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0))
+        ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr))
  
  /**
   * wait_event_interruptible_locked_irq - sleep until a condition gets true
@@ -703,7 +692,7 @@ do {                                                                        \
   */
  #define wait_event_interruptible_locked_irq(wq, condition)             \
         ((condition)                                                    \
-        ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1))
+        ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq))
  
  /**
   * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true
@@ -734,7 +723,7 @@ do {                                                                        \
   */
  #define wait_event_interruptible_exclusive_locked(wq, condition)       \
         ((condition)                                                    \
-        ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0))
+        ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr))
  
  /**
   * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true
@@ -765,7 +754,7 @@ do {                                                                        \
   */
  #define wait_event_interruptible_exclusive_locked_irq(wq, condition)   \
         ((condition)                                                    \
-        ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
+        ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq))
  
  
  #define __wait_event_killable(wq, condition)                           \
diff --git a/include/linux/writeback.h b/include/linux/writeback.h

index a3c0cbd7c88824a297cd725e36c931c223c3e106..d5815794416c9b8430c7ca9153e3be3271a76899 100644 (file)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -237,6 +237,7 @@ static inline void inode_attach_wb(struct inode *inode, struct page *page)
  static inline void inode_detach_wb(struct inode *inode)
  {
         if (inode->i_wb) {
+               WARN_ON_ONCE(!(inode->i_state & I_CLEAR));
                 wb_put(inode->i_wb);
                 inode->i_wb = NULL;
         }
diff --git a/include/media/vsp1.h b/include/media/vsp1.h

index 458b400373d44daf6d2fee8b2a971eb0e326d2c6..38aac554dbbab6384f1a16bd7d914b632b6a6d56 100644 (file)
--- a/include/media/vsp1.h
+++ b/include/media/vsp1.h
@@ -20,8 +20,17 @@ struct device;
  
  int vsp1_du_init(struct device *dev);
  
-int vsp1_du_setup_lif(struct device *dev, unsigned int width,
-                     unsigned int height);
+/**
+ * struct vsp1_du_lif_config - VSP LIF configuration
+ * @width: output frame width
+ * @height: output frame height
+ */
+struct vsp1_du_lif_config {
+       unsigned int width;
+       unsigned int height;
+};
+
+int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg);
  
  struct vsp1_du_atomic_config {
         u32 pixelformat;
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h

index f2758964ce6f890e3b11df5ba5bf2eefe636abd1..f32ed9ac181a47c00757596fc3b8c5733426c468 100644 (file)
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -100,6 +100,9 @@ struct vsock_transport {
         void (*destruct)(struct vsock_sock *);
         void (*release)(struct vsock_sock *);
  
+       /* Cancel all pending packets sent on vsock. */
+       int (*cancel_pkt)(struct vsock_sock *vsk);
+
         /* Connections. */
         int (*connect)(struct vsock_sock *);
  
diff --git a/include/net/inet_common.h b/include/net/inet_common.h

index b7952d55b9c00039a9eca46544997c10722682b6..f39ae697347f6590459ee4178de84160b43841e2 100644 (file)
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -20,7 +20,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
                           int addr_len, int flags, int is_sendmsg);
  int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
                        int addr_len, int flags);
-int inet_accept(struct socket *sock, struct socket *newsock, int flags);
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+               bool kern);
  int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
  ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
                       size_t size, int flags);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h

index 826f198374f809a4b7ca23ada4a46433b972ef35..c7a577976bec0887218a969bc8197dc1c8eb13f0 100644 (file)
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -258,7 +258,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
          return (unsigned long)min_t(u64, when, max_when);
  }
  
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern);
  
  int inet_csk_get_port(struct sock *sk, unsigned short snum);
  
diff --git a/include/net/irda/timer.h b/include/net/irda/timer.h

index cb2615ccf761d68123406d3600646d147a6e2f47..d784f242cf7b4dc114e355c52b319cb11d6faca7 100644 (file)
--- a/include/net/irda/timer.h
+++ b/include/net/irda/timer.h
@@ -59,7 +59,7 @@ struct lap_cb;
   *  Slot timer must never exceed 85 ms, and must always be at least 25 ms, 
   *  suggested to  75-85 msec by IrDA lite. This doesn't work with a lot of
   *  devices, and other stackes uses a lot more, so it's best we do it as well
- *  (Note : this is the default value and sysctl overides it - Jean II)
+ *  (Note : this is the default value and sysctl overrides it - Jean II)
   */
  #define SLOT_TIMEOUT            (90*HZ/1000)
  
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h

index f540f9ad2af4f673a204875864ce73f423204a74..19605878da4739d04f0642b20f8641ed8601d2eb 100644 (file)
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -244,7 +244,7 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
                                u32 seq);
  
  /* Fake conntrack entry for untracked connections */
-DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+DECLARE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
  static inline struct nf_conn *nf_ct_untracked_get(void)
  {
         return raw_cpu_ptr(&nf_conntrack_untracked);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h

index 2aa8a9d80fbe8263a4b0e1c65f44e1ee2d9295d4..0136028652bdb8b3c20813b01b2fa8cfb16ba012 100644 (file)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -103,6 +103,35 @@ struct nft_regs {
         };
  };
  
+/* Store/load an u16 or u8 integer to/from the u32 data register.
+ *
+ * Note, when using concatenations, register allocation happens at 32-bit
+ * level. So for store instruction, pad the rest part with zero to avoid
+ * garbage values.
+ */
+
+static inline void nft_reg_store16(u32 *dreg, u16 val)
+{
+       *dreg = 0;
+       *(u16 *)dreg = val;
+}
+
+static inline void nft_reg_store8(u32 *dreg, u8 val)
+{
+       *dreg = 0;
+       *(u8 *)dreg = val;
+}
+
+static inline u16 nft_reg_load16(u32 *sreg)
+{
+       return *(u16 *)sreg;
+}
+
+static inline u8 nft_reg_load8(u32 *sreg)
+{
+       return *(u8 *)sreg;
+}
+
  static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
                                  unsigned int len)
  {
@@ -203,7 +232,6 @@ struct nft_set_elem {
  struct nft_set;
  struct nft_set_iter {
         u8              genmask;
-       bool            flush;
         unsigned int    count;
         unsigned int    skip;
         int             err;
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h

index d150b50662017378644c8f3ccf0218ecceaa2331..97983d1c05e4d327147110440f4a3925b13f6951 100644 (file)
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -9,12 +9,13 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
                      struct sk_buff *skb,
                      const struct nf_hook_state *state)
  {
+       unsigned int flags = IP6_FH_F_AUTH;
         int protohdr, thoff = 0;
         unsigned short frag_off;
  
         nft_set_pktinfo(pkt, skb, state);
  
-       protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
+       protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
         if (protohdr < 0) {
                 nft_set_pktinfo_proto_unspec(pkt, skb);
                 return;
@@ -32,6 +33,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
                                 const struct nf_hook_state *state)
  {
  #if IS_ENABLED(CONFIG_IPV6)
+       unsigned int flags = IP6_FH_F_AUTH;
         struct ipv6hdr *ip6h, _ip6h;
         unsigned int thoff = 0;
         unsigned short frag_off;
@@ -50,7 +52,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
         if (pkt_len + sizeof(*ip6h) > skb->len)
                 return -1;
  
-       protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
+       protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
         if (protohdr < 0)
                 return -1;
  
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h

index 1f71ee5ab518410ecd2fe0cab6064681025b43b3..069582ee5d7fd5b0e92edea68cb2406fbbe6db00 100644 (file)
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -448,10 +448,9 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
         return frag;
  }
  
-static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_association *asoc)
+static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc)
  {
-
-       sctp_assoc_sync_pmtu(sk, asoc);
+       sctp_assoc_sync_pmtu(asoc);
         asoc->pmtu_pending = 0;
  }
  
@@ -596,12 +595,23 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr)
   */
  static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t)
  {
-       if (t->dst && (!dst_check(t->dst, t->dst_cookie) ||
-                      t->pathmtu != max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)),
-                                          SCTP_DEFAULT_MINSEGMENT)))
+       if (t->dst && !dst_check(t->dst, t->dst_cookie))
                 sctp_transport_dst_release(t);
  
         return t->dst;
  }
  
+static inline bool sctp_transport_pmtu_check(struct sctp_transport *t)
+{
+       __u32 pmtu = max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)),
+                          SCTP_DEFAULT_MINSEGMENT);
+
+       if (t->pathmtu == pmtu)
+               return true;
+
+       t->pathmtu = pmtu;
+
+       return false;
+}
+
  #endif /* __net_sctp_h__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h

index a244db5e5ff7fa65bcf0a4124cbbe5f4682e6d9d..138f8615acf0993d8015f6c1d2eee32966dccad0 100644 (file)
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -83,6 +83,7 @@ struct sctp_bind_addr;
  struct sctp_ulpq;
  struct sctp_ep_common;
  struct crypto_shash;
+struct sctp_stream;
  
  
  #include <net/sctp/tsnmap.h>
@@ -376,7 +377,8 @@ typedef struct sctp_sender_hb_info {
         __u64 hb_nonce;
  } sctp_sender_hb_info_t;
  
-struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp);
+int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp);
+int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp);
  void sctp_stream_free(struct sctp_stream *stream);
  void sctp_stream_clear(struct sctp_stream *stream);
  
@@ -476,7 +478,8 @@ struct sctp_pf {
         int  (*send_verify) (struct sctp_sock *, union sctp_addr *);
         int  (*supported_addrs)(const struct sctp_sock *, __be16 *);
         struct sock *(*create_accept_sk) (struct sock *sk,
-                                         struct sctp_association *asoc);
+                                         struct sctp_association *asoc,
+                                         bool kern);
         int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr);
         void (*to_sk_saddr)(union sctp_addr *, struct sock *sk);
         void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);
@@ -497,7 +500,6 @@ struct sctp_datamsg {
         /* Did the messenge fail to send? */
         int send_error;
         u8 send_failed:1,
-          force_delay:1,
            can_delay;       /* should this message be Nagle delayed */
  };
  
@@ -752,6 +754,8 @@ struct sctp_transport {
                 /* Is the Path MTU update pending on this tranport */
                 pmtu_pending:1,
  
+               dst_pending_confirm:1,  /* need to confirm neighbour */
+
                 /* Has this transport moved the ctsn since we last sacked */
                 sack_generation:1;
         u32 dst_cookie;
@@ -805,8 +809,6 @@ struct sctp_transport {
  
         __u32 burst_limited;    /* Holds old cwnd when max.burst is applied */
  
-       __u32 dst_pending_confirm;      /* need to confirm neighbour */
-
         /* Destination */
         struct dst_entry *dst;
         /* Source address. */
@@ -950,8 +952,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t);
  void sctp_transport_burst_limited(struct sctp_transport *);
  void sctp_transport_burst_reset(struct sctp_transport *);
  unsigned long sctp_transport_timeout(struct sctp_transport *);
-void sctp_transport_reset(struct sctp_transport *);
-void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32);
+void sctp_transport_reset(struct sctp_transport *t);
+void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
  void sctp_transport_immediate_rtx(struct sctp_transport *);
  void sctp_transport_dst_release(struct sctp_transport *t);
  void sctp_transport_dst_confirm(struct sctp_transport *t);
@@ -1876,6 +1878,7 @@ struct sctp_association {
  
         __u8 need_ecne:1,       /* Need to send an ECNE Chunk? */
              temp:1,            /* Is it a temporary association? */
+            force_delay:1,
              prsctp_enable:1,
              reconf_enable:1;
  
@@ -1951,7 +1954,7 @@ void sctp_assoc_update(struct sctp_association *old,
  
  __u32 sctp_association_get_next_tsn(struct sctp_association *);
  
-void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *);
+void sctp_assoc_sync_pmtu(struct sctp_association *asoc);
  void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
  void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
  void sctp_assoc_set_primary(struct sctp_association *,
diff --git a/include/net/sock.h b/include/net/sock.h

index 5e5997654db6454f82179cc35c4bc22e89d0c06f..03252d53975de7ad0da66d35802738830b0e3367 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -236,6 +236,7 @@ struct sock_common {
    *    @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
    *    @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
    *    @sk_lock:       synchronizer
+  *    @sk_kern_sock: True if sock is using kernel lock classes
    *    @sk_rcvbuf: size of receive buffer in bytes
    *    @sk_wq: sock wait queue and async head
    *    @sk_rx_dst: receive input route used by early demux
@@ -430,7 +431,8 @@ struct sock {
  #endif
  
         kmemcheck_bitfield_begin(flags);
-       unsigned int            sk_padding : 2,
+       unsigned int            sk_padding : 1,
+                               sk_kern_sock : 1,
                                 sk_no_check_tx : 1,
                                 sk_no_check_rx : 1,
                                 sk_userlocks : 4,
@@ -1015,7 +1017,8 @@ struct proto {
                                         int addr_len);
         int                     (*disconnect)(struct sock *sk, int flags);
  
-       struct sock *           (*accept)(struct sock *sk, int flags, int *err);
+       struct sock *           (*accept)(struct sock *sk, int flags, int *err,
+                                         bool kern);
  
         int                     (*ioctl)(struct sock *sk, int cmd,
                                          unsigned long arg);
@@ -1573,7 +1576,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
  int sock_no_bind(struct socket *, struct sockaddr *, int);
  int sock_no_connect(struct socket *, struct sockaddr *, int, int);
  int sock_no_socketpair(struct socket *, struct socket *);
-int sock_no_accept(struct socket *, struct socket *, int);
+int sock_no_accept(struct socket *, struct socket *, int, bool);
  int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
  unsigned int sock_no_poll(struct file *, struct socket *,
                           struct poll_table_struct *);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h

index 0f1813c1368795994e012d00c607499879c130aa..99e4423eb2b80b142024bed892ddc4a84ac5e576 100644 (file)
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1863,6 +1863,9 @@ struct ib_port_immutable {
  };
  
  struct ib_device {
+       /* Do not access @dma_device directly from ULP nor from HW drivers. */
+       struct device                *dma_device;
+
         char                          name[IB_DEVICE_NAME_MAX];
  
         struct list_head              event_handler_list;
@@ -3007,7 +3010,7 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
   */
  static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
  {
-       return dma_mapping_error(&dev->dev, dma_addr);
+       return dma_mapping_error(dev->dma_device, dma_addr);
  }
  
  /**
@@ -3021,7 +3024,7 @@ static inline u64 ib_dma_map_single(struct ib_device *dev,
                                     void *cpu_addr, size_t size,
                                     enum dma_data_direction direction)
  {
-       return dma_map_single(&dev->dev, cpu_addr, size, direction);
+       return dma_map_single(dev->dma_device, cpu_addr, size, direction);
  }
  
  /**
@@ -3035,7 +3038,7 @@ static inline void ib_dma_unmap_single(struct ib_device *dev,
                                        u64 addr, size_t size,
                                        enum dma_data_direction direction)
  {
-       dma_unmap_single(&dev->dev, addr, size, direction);
+       dma_unmap_single(dev->dma_device, addr, size, direction);
  }
  
  /**
@@ -3052,7 +3055,7 @@ static inline u64 ib_dma_map_page(struct ib_device *dev,
                                   size_t size,
                                          enum dma_data_direction direction)
  {
-       return dma_map_page(&dev->dev, page, offset, size, direction);
+       return dma_map_page(dev->dma_device, page, offset, size, direction);
  }
  
  /**
@@ -3066,7 +3069,7 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
                                      u64 addr, size_t size,
                                      enum dma_data_direction direction)
  {
-       dma_unmap_page(&dev->dev, addr, size, direction);
+       dma_unmap_page(dev->dma_device, addr, size, direction);
  }
  
  /**
@@ -3080,7 +3083,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev,
                                 struct scatterlist *sg, int nents,
                                 enum dma_data_direction direction)
  {
-       return dma_map_sg(&dev->dev, sg, nents, direction);
+       return dma_map_sg(dev->dma_device, sg, nents, direction);
  }
  
  /**
@@ -3094,7 +3097,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev,
                                    struct scatterlist *sg, int nents,
                                    enum dma_data_direction direction)
  {
-       dma_unmap_sg(&dev->dev, sg, nents, direction);
+       dma_unmap_sg(dev->dma_device, sg, nents, direction);
  }
  
  static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
@@ -3102,7 +3105,8 @@ static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
                                       enum dma_data_direction direction,
                                       unsigned long dma_attrs)
  {
-       return dma_map_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
+       return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
+                               dma_attrs);
  }
  
  static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
@@ -3110,7 +3114,7 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
                                          enum dma_data_direction direction,
                                          unsigned long dma_attrs)
  {
-       dma_unmap_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
+       dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
  }
  /**
   * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
@@ -3152,7 +3156,7 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
                                               size_t size,
                                               enum dma_data_direction dir)
  {
-       dma_sync_single_for_cpu(&dev->dev, addr, size, dir);
+       dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
  }
  
  /**
@@ -3167,7 +3171,7 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
                                                  size_t size,
                                                  enum dma_data_direction dir)
  {
-       dma_sync_single_for_device(&dev->dev, addr, size, dir);
+       dma_sync_single_for_device(dev->dma_device, addr, size, dir);
  }
  
  /**
@@ -3182,7 +3186,7 @@ static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
                                            dma_addr_t *dma_handle,
                                            gfp_t flag)
  {
-       return dma_alloc_coherent(&dev->dev, size, dma_handle, flag);
+       return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag);
  }
  
  /**
@@ -3196,7 +3200,7 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
                                         size_t size, void *cpu_addr,
                                         dma_addr_t dma_handle)
  {
-       dma_free_coherent(&dev->dev, size, cpu_addr, dma_handle);
+       dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
  }
  
  /**
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h

index b0e275de6dec0d2be9adf09810e889c89d7ad06c..583875ea136ab228ea14a727581c384f50527211 100644 (file)
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -196,6 +196,7 @@ struct iscsi_conn {
         struct iscsi_task       *task;          /* xmit task in progress */
  
         /* xmit */
+       spinlock_t              taskqueuelock;  /* protects the next three lists */
         struct list_head        mgmtqueue;      /* mgmt (control) xmit queue */
         struct list_head        cmdqueue;       /* data-path cmd queue */
         struct list_head        requeue;        /* tasks needing another run */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h

index 6f22b39f1b0c3bc8bb1812a631a638ef1987a784..080c7ce9bae8892a43838043d986282a1385283a 100644 (file)
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -472,6 +472,10 @@ static inline int scsi_device_created(struct scsi_device *sdev)
                 sdev->sdev_state == SDEV_CREATED_BLOCK;
  }
  
+int scsi_internal_device_block(struct scsi_device *sdev, bool wait);
+int scsi_internal_device_unblock(struct scsi_device *sdev,
+                                enum scsi_device_state new_state);
+
  /* accessor functions for the SCSI parameters */
  static inline int scsi_device_sync(struct scsi_device *sdev)
  {
diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h

index ba0aeb980f7e7cb654171697316f6089b9db01d0..f0c76f9dc28547301d4f67ac76e580560d081989 100644 (file)
--- a/include/scsi/scsi_request.h
+++ b/include/scsi/scsi_request.h
@@ -9,8 +9,10 @@ struct scsi_request {
         unsigned char   __cmd[BLK_MAX_CDB];
         unsigned char   *cmd;
         unsigned short  cmd_len;
+       int             result;
         unsigned int    sense_len;
         unsigned int    resid_len;      /* residual count */
+       int             retries;
         void            *sense;
  };
  
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h

index b54b98dc2d4a77681dd3ecf883d75e062589ee8c..1b0f447ce850f015e64dd27e47751fe945cbb2ec 100644 (file)
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -4,7 +4,12 @@
  #include <linux/types.h>
  #include <target/target_core_base.h>
  
-#define TRANSPORT_FLAG_PASSTHROUGH             1
+#define TRANSPORT_FLAG_PASSTHROUGH             0x1
+/*
+ * ALUA commands, state checks and setup operations are handled by the
+ * backend module.
+ */
+#define TRANSPORT_FLAG_PASSTHROUGH_ALUA                0x2
  
  struct request_queue;
  struct scatterlist;
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h

index 37c274e61acceee74d792a240b8f3695f0d78085..ccfad0e9c2cdbd68f13c809c7ed6414b2c0c97c1 100644 (file)
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -117,6 +117,7 @@ enum transport_state_table {
         TRANSPORT_ISTATE_PROCESSING = 11,
         TRANSPORT_COMPLETE_QF_WP = 18,
         TRANSPORT_COMPLETE_QF_OK = 19,
+       TRANSPORT_COMPLETE_QF_ERR = 20,
  };
  
  /* Used for struct se_cmd->se_cmd_flags */
@@ -279,8 +280,6 @@ struct t10_alua_tg_pt_gp {
         u16     tg_pt_gp_id;
         int     tg_pt_gp_valid_id;
         int     tg_pt_gp_alua_supported_states;
-       int     tg_pt_gp_alua_pending_state;
-       int     tg_pt_gp_alua_previous_state;
         int     tg_pt_gp_alua_access_status;
         int     tg_pt_gp_alua_access_type;
         int     tg_pt_gp_nonop_delay_msecs;
@@ -289,18 +288,16 @@ struct t10_alua_tg_pt_gp {
         int     tg_pt_gp_pref;
         int     tg_pt_gp_write_metadata;
         u32     tg_pt_gp_members;
-       atomic_t tg_pt_gp_alua_access_state;
+       int     tg_pt_gp_alua_access_state;
         atomic_t tg_pt_gp_ref_cnt;
         spinlock_t tg_pt_gp_lock;
-       struct mutex tg_pt_gp_md_mutex;
+       struct mutex tg_pt_gp_transition_mutex;
         struct se_device *tg_pt_gp_dev;
         struct config_group tg_pt_gp_group;
         struct list_head tg_pt_gp_list;
         struct list_head tg_pt_gp_lun_list;
         struct se_lun *tg_pt_gp_alua_lun;
         struct se_node_acl *tg_pt_gp_alua_nacl;
-       struct delayed_work tg_pt_gp_transition_work;
-       struct completion *tg_pt_gp_transition_complete;
  };
  
  struct t10_vpd {
@@ -705,6 +702,7 @@ struct se_lun {
         u64                     unpacked_lun;
  #define SE_LUN_LINK_MAGIC                      0xffff7771
         u32                     lun_link_magic;
+       bool                    lun_shutdown;
         bool                    lun_access_ro;
         u32                     lun_index;
  
diff --git a/include/trace/events/block.h b/include/trace/events/block.h

index a88ed13446ff88e200ed642db63badf4c9a4b680..d0dbe60d8a6dd5ccbb89029796fcabd23d970ceb 100644 (file)
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -61,7 +61,16 @@ DEFINE_EVENT(block_buffer, block_dirty_buffer,
         TP_ARGS(bh)
  );
  
-DECLARE_EVENT_CLASS(block_rq_with_error,
+/**
+ * block_rq_requeue - place block IO request back on a queue
+ * @q: queue holding operation
+ * @rq: block IO operation request
+ *
+ * The block operation request @rq is being placed back into queue
+ * @q.  For some reason the request was not completed and needs to be
+ * put back in the queue.
+ */
+TRACE_EVENT(block_rq_requeue,
  
         TP_PROTO(struct request_queue *q, struct request *rq),
  
@@ -71,7 +80,6 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
                 __field(  dev_t,        dev                     )
                 __field(  sector_t,     sector                  )
                 __field(  unsigned int, nr_sector               )
-               __field(  int,          errors                  )
                 __array(  char,         rwbs,   RWBS_LEN        )
                 __dynamic_array( char,  cmd,    1               )
         ),
@@ -80,7 +88,6 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
                 __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
                 __entry->sector    = blk_rq_trace_sector(rq);
                 __entry->nr_sector = blk_rq_trace_nr_sectors(rq);
-               __entry->errors    = rq->errors;
  
                 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
                 __get_str(cmd)[0] = '\0';
@@ -90,46 +97,13 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->rwbs, __get_str(cmd),
                   (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->errors)
-);
-
-/**
- * block_rq_abort - abort block operation request
- * @q: queue containing the block operation request
- * @rq: block IO operation request
- *
- * Called immediately after pending block IO operation request @rq in
- * queue @q is aborted. The fields in the operation request @rq
- * can be examined to determine which device and sectors the pending
- * operation would access.
- */
-DEFINE_EVENT(block_rq_with_error, block_rq_abort,
-
-       TP_PROTO(struct request_queue *q, struct request *rq),
-
-       TP_ARGS(q, rq)
-);
-
-/**
- * block_rq_requeue - place block IO request back on a queue
- * @q: queue holding operation
- * @rq: block IO operation request
- *
- * The block operation request @rq is being placed back into queue
- * @q.  For some reason the request was not completed and needs to be
- * put back in the queue.
- */
-DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
-
-       TP_PROTO(struct request_queue *q, struct request *rq),
-
-       TP_ARGS(q, rq)
+                 __entry->nr_sector, 0)
  );
  
  /**
   * block_rq_complete - block IO operation completed by device driver
- * @q: queue containing the block operation request
   * @rq: block operations request
+ * @error: status code
   * @nr_bytes: number of completed bytes
   *
   * The block_rq_complete tracepoint event indicates that some portion
@@ -140,16 +114,15 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
   */
  TRACE_EVENT(block_rq_complete,
  
-       TP_PROTO(struct request_queue *q, struct request *rq,
-                unsigned int nr_bytes),
+       TP_PROTO(struct request *rq, int error, unsigned int nr_bytes),
  
-       TP_ARGS(q, rq, nr_bytes),
+       TP_ARGS(rq, error, nr_bytes),
  
         TP_STRUCT__entry(
                 __field(  dev_t,        dev                     )
                 __field(  sector_t,     sector                  )
                 __field(  unsigned int, nr_sector               )
-               __field(  int,          errors                  )
+               __field(  int,          error                   )
                 __array(  char,         rwbs,   RWBS_LEN        )
                 __dynamic_array( char,  cmd,    1               )
         ),
@@ -158,7 +131,7 @@ TRACE_EVENT(block_rq_complete,
                 __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
                 __entry->sector    = blk_rq_pos(rq);
                 __entry->nr_sector = nr_bytes >> 9;
-               __entry->errors    = rq->errors;
+               __entry->error     = error;
  
                 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
                 __get_str(cmd)[0] = '\0';
@@ -168,7 +141,7 @@ TRACE_EVENT(block_rq_complete,
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->rwbs, __get_str(cmd),
                   (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->errors)
+                 __entry->nr_sector, __entry->error)
  );
  
  DECLARE_EVENT_CLASS(block_rq,
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h

index 14e49c7981359ccdac1e2d9d87a3c284e7c682f3..b35533b9427719c3ddcd2c776a20f52d5465aea0 100644 (file)
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -1,5 +1,6 @@
  #undef TRACE_SYSTEM
  #define TRACE_SYSTEM raw_syscalls
+#undef TRACE_INCLUDE_FILE
  #define TRACE_INCLUDE_FILE syscalls
  
  #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ)
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h

index 9b1462e38b821a762b284b44a20a96de9f0930d9..a076cf1a3a23be2fbee73dab483e051b37b2370c 100644 (file)
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -730,9 +730,11 @@ __SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
  __SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
  #define __NR_pkey_free 290
  __SYSCALL(__NR_pkey_free,     sys_pkey_free)
+#define __NR_statx 291
+__SYSCALL(__NR_statx,     sys_statx)
  
  #undef __NR_syscalls
-#define __NR_syscalls 291
+#define __NR_syscalls 292
  
  /*
   * All syscalls below here should go away really,
diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h

index 407cb55df6ac178e11620fd9554cc913e30b401c..7fb97863c94577d7b9f583abe8a41fe14f54b734 100644 (file)
--- a/include/uapi/drm/omap_drm.h
+++ b/include/uapi/drm/omap_drm.h
@@ -33,8 +33,8 @@ extern "C" {
  #define OMAP_PARAM_CHIPSET_ID  1       /* ie. 0x3430, 0x4430, etc */
  
  struct drm_omap_param {
-       uint64_t param;                 /* in */
-       uint64_t value;                 /* in (set_param), out (get_param) */
+       __u64 param;                    /* in */
+       __u64 value;                    /* in (set_param), out (get_param) */
  };
  
  #define OMAP_BO_SCANOUT                0x00000001      /* scanout capable (phys contiguous) */
@@ -53,18 +53,18 @@ struct drm_omap_param {
  #define OMAP_BO_TILED          (OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32)
  
  union omap_gem_size {
-       uint32_t bytes;         /* (for non-tiled formats) */
+       __u32 bytes;            /* (for non-tiled formats) */
         struct {
-               uint16_t width;
-               uint16_t height;
+               __u16 width;
+               __u16 height;
         } tiled;                /* (for tiled formats) */
  };
  
  struct drm_omap_gem_new {
         union omap_gem_size size;       /* in */
-       uint32_t flags;                 /* in */
-       uint32_t handle;                /* out */
-       uint32_t __pad;
+       __u32 flags;                    /* in */
+       __u32 handle;                   /* out */
+       __u32 __pad;
  };
  
  /* mask of operations: */
@@ -74,33 +74,33 @@ enum omap_gem_op {
  };
  
  struct drm_omap_gem_cpu_prep {
-       uint32_t handle;                /* buffer handle (in) */
-       uint32_t op;                    /* mask of omap_gem_op (in) */
+       __u32 handle;                   /* buffer handle (in) */
+       __u32 op;                       /* mask of omap_gem_op (in) */
  };
  
  struct drm_omap_gem_cpu_fini {
-       uint32_t handle;                /* buffer handle (in) */
-       uint32_t op;                    /* mask of omap_gem_op (in) */
+       __u32 handle;                   /* buffer handle (in) */
+       __u32 op;                       /* mask of omap_gem_op (in) */
         /* TODO maybe here we pass down info about what regions are touched
          * by sw so we can be clever about cache ops?  For now a placeholder,
          * set to zero and we just do full buffer flush..
          */
-       uint32_t nregions;
-       uint32_t __pad;
+       __u32 nregions;
+       __u32 __pad;
  };
  
  struct drm_omap_gem_info {
-       uint32_t handle;                /* buffer handle (in) */
-       uint32_t pad;
-       uint64_t offset;                /* mmap offset (out) */
+       __u32 handle;                   /* buffer handle (in) */
+       __u32 pad;
+       __u64 offset;                   /* mmap offset (out) */
         /* note: in case of tiled buffers, the user virtual size can be
          * different from the physical size (ie. how many pages are needed
          * to back the object) which is returned in DRM_IOCTL_GEM_OPEN..
          * This size here is the one that should be used if you want to
          * mmap() the buffer:
          */
-       uint32_t size;                  /* virtual size for mmap'ing (out) */
-       uint32_t __pad;
+       __u32 size;                     /* virtual size for mmap'ing (out) */
+       __u32 __pad;
  };
  
  #define DRM_OMAP_GET_PARAM             0x00
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild

index dd9820b1c7796b87986443124ad18907b8b719c3..f8d9fed17ba99418d858ceb57a864b31a00d078a 100644 (file)
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -445,6 +445,7 @@ header-y += unistd.h
  header-y += unix_diag.h
  header-y += usbdevice_fs.h
  header-y += usbip.h
+header-y += userio.h
  header-y += utime.h
  header-y += utsname.h
  header-y += uuid.h
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h

index db4c253f8011b2f483ddd1ffc09f4f04a93fdc0a..dcfc3a5a9cb1d20f29bbac00c6ef315006e9d208 100644 (file)
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -713,33 +713,6 @@ enum btrfs_err_code {
         BTRFS_ERROR_DEV_ONLY_WRITABLE,
         BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS
  };
-/* An error code to error string mapping for the kernel
-*  error codes
-*/
-static inline char *btrfs_err_str(enum btrfs_err_code err_code)
-{
-       switch (err_code) {
-               case BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET:
-                       return "unable to go below two devices on raid1";
-               case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET:
-                       return "unable to go below four devices on raid10";
-               case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET:
-                       return "unable to go below two devices on raid5";
-               case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET:
-                       return "unable to go below three devices on raid6";
-               case BTRFS_ERROR_DEV_TGT_REPLACE:
-                       return "unable to remove the dev_replace target dev";
-               case BTRFS_ERROR_DEV_MISSING_NOT_FOUND:
-                       return "no missing devices found to remove";
-               case BTRFS_ERROR_DEV_ONLY_WRITABLE:
-                       return "unable to remove the only writeable device";
-               case BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS:
-                       return "add/delete/balance/replace/resize operation "\
-                               "in progress";
-               default:
-                       return NULL;
-       }
-}
  
  #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
                                    struct btrfs_ioctl_vol_args)
diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h

index 85bbb1799df3f93cb1eacbfec497278c2c20c098..d496c02e14bc44327fd3ab6c3faad0c1d7ac1e12 100644 (file)
--- a/include/uapi/linux/ipv6_route.h
+++ b/include/uapi/linux/ipv6_route.h
@@ -35,7 +35,7 @@
  #define RTF_PREF(pref) ((pref) << 27)
  #define RTF_PREF_MASK  0x18000000
  
-#define RTF_PCPU       0x40000000
+#define RTF_PCPU       0x40000000      /* read-only: can not be set by user */
  #define RTF_LOCAL      0x80000000
  
  
diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h

index fd19f36b3129278343f37ee5e59dfa280713eb9c..c8aec4b9e73b8c85189eb9b62fa98cf1fb6e7bfa 100644 (file)
--- a/include/uapi/linux/lightnvm.h
+++ b/include/uapi/linux/lightnvm.h
@@ -85,6 +85,10 @@ struct nvm_ioctl_create_conf {
         };
  };
  
+enum {
+       NVM_TARGET_FACTORY = 1 << 0,    /* Init target in factory mode */
+};
+
  struct nvm_ioctl_create {
         char dev[DISK_NAME_LEN];                /* open-channel SSD device */
         char tgttype[NVM_TTYPE_NAME_MAX];       /* target type name */
diff --git a/include/uapi/linux/nbd-netlink.h b/include/uapi/linux/nbd-netlink.h

new file mode 100644 (file)

index 0000000..6f7ca3d
--- /dev/null
+++ b/include/uapi/linux/nbd-netlink.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2017 Facebook.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#ifndef _UAPILINUX_NBD_NETLINK_H
+#define _UAPILINUX_NBD_NETLINK_H
+
+#define NBD_GENL_FAMILY_NAME           "nbd"
+#define NBD_GENL_VERSION               0x1
+#define NBD_GENL_MCAST_GROUP_NAME      "nbd_mc_group"
+
+/* Configuration policy attributes, used for CONNECT */
+enum {
+       NBD_ATTR_UNSPEC,
+       NBD_ATTR_INDEX,
+       NBD_ATTR_SIZE_BYTES,
+       NBD_ATTR_BLOCK_SIZE_BYTES,
+       NBD_ATTR_TIMEOUT,
+       NBD_ATTR_SERVER_FLAGS,
+       NBD_ATTR_CLIENT_FLAGS,
+       NBD_ATTR_SOCKETS,
+       NBD_ATTR_DEAD_CONN_TIMEOUT,
+       NBD_ATTR_DEVICE_LIST,
+       __NBD_ATTR_MAX,
+};
+#define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1)
+
+/*
+ * This is the format for multiple devices with NBD_ATTR_DEVICE_LIST
+ *
+ * [NBD_ATTR_DEVICE_LIST]
+ *   [NBD_DEVICE_ITEM]
+ *     [NBD_DEVICE_INDEX]
+ *     [NBD_DEVICE_CONNECTED]
+ */
+enum {
+       NBD_DEVICE_ITEM_UNSPEC,
+       NBD_DEVICE_ITEM,
+       __NBD_DEVICE_ITEM_MAX,
+};
+#define NBD_DEVICE_ITEM_MAX (__NBD_DEVICE_ITEM_MAX - 1)
+
+enum {
+       NBD_DEVICE_UNSPEC,
+       NBD_DEVICE_INDEX,
+       NBD_DEVICE_CONNECTED,
+       __NBD_DEVICE_MAX,
+};
+#define NBD_DEVICE_ATTR_MAX (__NBD_DEVICE_MAX - 1)
+
+/*
+ * This is the format for multiple sockets with NBD_ATTR_SOCKETS
+ *
+ * [NBD_ATTR_SOCKETS]
+ *   [NBD_SOCK_ITEM]
+ *     [NBD_SOCK_FD]
+ *   [NBD_SOCK_ITEM]
+ *     [NBD_SOCK_FD]
+ */
+enum {
+       NBD_SOCK_ITEM_UNSPEC,
+       NBD_SOCK_ITEM,
+       __NBD_SOCK_ITEM_MAX,
+};
+#define NBD_SOCK_ITEM_MAX (__NBD_SOCK_ITEM_MAX - 1)
+
+enum {
+       NBD_SOCK_UNSPEC,
+       NBD_SOCK_FD,
+       __NBD_SOCK_MAX,
+};
+#define NBD_SOCK_MAX (__NBD_SOCK_MAX - 1)
+
+enum {
+       NBD_CMD_UNSPEC,
+       NBD_CMD_CONNECT,
+       NBD_CMD_DISCONNECT,
+       NBD_CMD_RECONFIGURE,
+       NBD_CMD_LINK_DEAD,
+       NBD_CMD_STATUS,
+       __NBD_CMD_MAX,
+};
+#define NBD_CMD_MAX    (__NBD_CMD_MAX - 1)
+
+#endif /* _UAPILINUX_NBD_NETLINK_H */
diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h

index c91c642ea9003ed3ec5ba1be70b9899af3847948..155e33f819134a3aecdccf3c139974ef0c54c68c 100644 (file)
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -37,7 +37,7 @@ enum {
         NBD_CMD_TRIM = 4
  };
  
-/* values for flags field */
+/* values for flags field, these are server interaction specific. */
  #define NBD_FLAG_HAS_FLAGS     (1 << 0) /* nbd-server supports flags */
  #define NBD_FLAG_READ_ONLY     (1 << 1) /* device is read-only */
  #define NBD_FLAG_SEND_FLUSH    (1 << 2) /* can flush writeback cache */
@@ -45,6 +45,10 @@ enum {
  #define NBD_FLAG_SEND_TRIM     (1 << 5) /* send trim/discard */
  #define NBD_FLAG_CAN_MULTI_CONN        (1 << 8)        /* Server supports multiple connections per export. */
  
+/* These are client behavior specific flags. */
+#define NBD_CFLAG_DESTROY_ON_DISCONNECT        (1 << 0) /* delete the nbd device on
+                                                   disconnect. */
+
  /* userspace doesn't need the nbd_device structure */
  
  /* These are sent over the network in the request/reply magic fields */
diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h

index d08c63f3dd6ff47c7cf090927e91f27cfc0d767f..0c5d5dd61b6ab1d2039686d25683e6dffa1f634e 100644 (file)
--- a/include/uapi/linux/packet_diag.h
+++ b/include/uapi/linux/packet_diag.h
@@ -64,7 +64,7 @@ struct packet_diag_mclist {
         __u32   pdmc_count;
         __u16   pdmc_type;
         __u16   pdmc_alen;
-       __u8    pdmc_addr[MAX_ADDR_LEN];
+       __u8    pdmc_addr[32]; /* MAX_ADDR_LEN */
  };
  
  struct packet_diag_ring {
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h

index 51a6b86e370043f7b37f128c4896f5f588710643..17b10304c393355da9ed2da743107a5c59748290 100644 (file)
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -48,17 +48,13 @@
   * tv_sec holds the number of seconds before (negative) or after (positive)
   * 00:00:00 1st January 1970 UTC.
   *
- * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
- * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
- *
- * Note that if both tv_sec and tv_nsec are non-zero, then the two values must
- * either be both positive or both negative.
+ * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time.
   *
   * __reserved is held in case we need a yet finer resolution.
   */
  struct statx_timestamp {
         __s64   tv_sec;
-       __s32   tv_nsec;
+       __u32   tv_nsec;
         __s32   __reserved;
  };
  
@@ -114,7 +110,7 @@ struct statx {
         __u64   stx_ino;        /* Inode number */
         __u64   stx_size;       /* File size */
         __u64   stx_blocks;     /* Number of 512-byte blocks allocated */
-       __u64   __spare1[1];
+       __u64   stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
         /* 0x40 */
         struct statx_timestamp  stx_atime;      /* Last access time */
         struct statx_timestamp  stx_btime;      /* File creation time */
@@ -152,9 +148,10 @@ struct statx {
  #define STATX_BASIC_STATS      0x000007ffU     /* The stuff in the normal stat struct */
  #define STATX_BTIME            0x00000800U     /* Want/got stx_btime */
  #define STATX_ALL              0x00000fffU     /* All currently supported flags */
+#define STATX__RESERVED                0x80000000U     /* Reserved for future struct statx expansion */
  
  /*
- * Attributes to be found in stx_attributes
+ * Attributes to be found in stx_attributes and masked in stx_attributes_mask.
   *
   * These give information about the features or the state of a file that might
   * be of use to ordinary userspace programs such as GUIs or ls rather than
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h

index c055947c5c989fa7e399a7b0dcaba8640014b548..3b059530dac95fa6e5dcf736e95a84fe80eb5f35 100644 (file)
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -18,8 +18,7 @@
   * means the userland is reading).
   */
  #define UFFD_API ((__u64)0xAA)
-#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_EXIT |           \
-                          UFFD_FEATURE_EVENT_FORK |            \
+#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK |           \
                            UFFD_FEATURE_EVENT_REMAP |           \
                            UFFD_FEATURE_EVENT_REMOVE |  \
                            UFFD_FEATURE_EVENT_UNMAP |           \
@@ -113,7 +112,6 @@ struct uffd_msg {
  #define UFFD_EVENT_REMAP       0x14
  #define UFFD_EVENT_REMOVE      0x15
  #define UFFD_EVENT_UNMAP       0x16
-#define UFFD_EVENT_EXIT                0x17
  
  /* flags for UFFD_EVENT_PAGEFAULT */
  #define UFFD_PAGEFAULT_FLAG_WRITE      (1<<0)  /* If this was a write fault */
@@ -163,7 +161,6 @@ struct uffdio_api {
  #define UFFD_FEATURE_MISSING_HUGETLBFS         (1<<4)
  #define UFFD_FEATURE_MISSING_SHMEM             (1<<5)
  #define UFFD_FEATURE_EVENT_UNMAP               (1<<6)
-#define UFFD_FEATURE_EVENT_EXIT                        (1<<7)
         __u64 features;
  
         __u64 ioctls;
diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h

index 15b4385a2be169e9b99221f31a444820feadae3d..90007a1abcab144ac3d6ac7d6e6f4001d58abb14 100644 (file)
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -79,7 +79,7 @@
   * configuration space */
  #define VIRTIO_PCI_CONFIG_OFF(msix_enabled)    ((msix_enabled) ? 24 : 20)
  /* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */
-#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->pci_dev->msix_enabled)
+#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled)
  
  /* Virtio ABI version, this must match exactly */
  #define VIRTIO_PCI_ABI_VERSION         0
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h

index da7cd62bace746879e154829b5ecad5f6cd87c00..0b3d30837a9f6456aabe028f5f9931b3593d778c 100644 (file)
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -34,6 +34,7 @@
  #define MLX5_ABI_USER_H
  
  #include <linux/types.h>
+#include <linux/if_ether.h>    /* For ETH_ALEN. */
  
  enum {
         MLX5_QP_FLAG_SIGNATURE          = 1 << 0,
@@ -66,7 +67,7 @@ struct mlx5_ib_alloc_ucontext_req {
  };
  
  enum mlx5_lib_caps {
-       MLX5_LIB_CAP_4K_UAR     = (u64)1 << 0,
+       MLX5_LIB_CAP_4K_UAR     = (__u64)1 << 0,
  };
  
  struct mlx5_ib_alloc_ucontext_req_v2 {
diff --git a/include/video/exynos5433_decon.h b/include/video/exynos5433_decon.h

index ef8e2a8ad0afc85da51bc3fca00a26fa3768ff16..6b083d327e982c0c5deb7895ce6e7c6b1094c8a6 100644 (file)
--- a/include/video/exynos5433_decon.h
+++ b/include/video/exynos5433_decon.h
@@ -46,6 +46,7 @@
  #define DECON_FRAMEFIFO_STATUS         0x0524
  #define DECON_CMU                      0x1404
  #define DECON_UPDATE                   0x1410
+#define DECON_CRFMID                   0x1414
  #define DECON_UPDATE_SCHEME            0x1438
  #define DECON_VIDCON1                  0x2000
  #define DECON_VIDCON2                  0x2004
@@ -126,6 +127,10 @@
  
  /* VIDINTCON0 */
  #define VIDINTCON0_FRAMEDONE           (1 << 17)
+#define VIDINTCON0_FRAMESEL_BP         (0 << 15)
+#define VIDINTCON0_FRAMESEL_VS         (1 << 15)
+#define VIDINTCON0_FRAMESEL_AC         (2 << 15)
+#define VIDINTCON0_FRAMESEL_FP         (3 << 15)
  #define VIDINTCON0_INTFRMEN            (1 << 12)
  #define VIDINTCON0_INTEN               (1 << 0)
  
@@ -142,6 +147,13 @@
  #define STANDALONE_UPDATE_F            (1 << 0)
  
  /* DECON_VIDCON1 */
+#define VIDCON1_LINECNT_MASK           (0x0fff << 16)
+#define VIDCON1_I80_ACTIVE             (1 << 15)
+#define VIDCON1_VSTATUS_MASK           (0x3 << 13)
+#define VIDCON1_VSTATUS_VS             (0 << 13)
+#define VIDCON1_VSTATUS_BP             (1 << 13)
+#define VIDCON1_VSTATUS_AC             (2 << 13)
+#define VIDCON1_VSTATUS_FP             (3 << 13)
  #define VIDCON1_VCLK_MASK              (0x3 << 9)
  #define VIDCON1_VCLK_RUN_VDEN_DISABLE  (0x3 << 9)
  #define VIDCON1_VCLK_HOLD              (0x0 << 9)
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h

index a0083be5d5295157bc11b82a343b58f798488bb8..1f6d78f044b671bca827b15c5b6b8388c09e4fa6 100644 (file)
--- a/include/xen/swiotlb-xen.h
+++ b/include/xen/swiotlb-xen.h
@@ -2,6 +2,7 @@
  #define __LINUX_SWIOTLB_XEN_H
  
  #include <linux/dma-direction.h>
+#include <linux/scatterlist.h>
  #include <linux/swiotlb.h>
  
  extern int xen_swiotlb_init(int verbose, bool early);
@@ -55,4 +56,14 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask);
  
  extern int
  xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask);
+
+extern int
+xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+                    void *cpu_addr, dma_addr_t dma_addr, size_t size,
+                    unsigned long attrs);
+
+extern int
+xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
+                       void *cpu_addr, dma_addr_t handle, size_t size,
+                       unsigned long attrs);
  #endif /* __LINUX_SWIOTLB_XEN_H */
diff --git a/init/main.c b/init/main.c

index eae2f15657c62c31f66353f4bd3c4af1e03451cf..b0c11cbf5ddf8a55a3c832e4acbd72653d9c38f1 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -882,7 +882,6 @@ static void __init do_basic_setup(void)
         do_ctors();
         usermodehelper_enable();
         do_initcalls();
-       random_int_secret_init();
  }
  
  static void __init do_pre_smp_initcalls(void)
@@ -1023,6 +1022,8 @@ static noinline void __init kernel_init_freeable(void)
  
         workqueue_init();
  
+       init_mm_internals();
+
         do_pre_smp_initcalls();
         lockup_detector_init();
  
diff --git a/kernel/audit.c b/kernel/audit.c

index e794544f5e63334afccadf6cc70f5fb2541e1e2e..a871bf80fde1adc79040936475b7045971e72d76 100644 (file)
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -54,6 +54,10 @@
  #include <linux/kthread.h>
  #include <linux/kernel.h>
  #include <linux/syscalls.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/mutex.h>
+#include <linux/gfp.h>
  
  #include <linux/audit.h>
  
@@ -90,13 +94,34 @@ static u32  audit_default;
  /* If auditing cannot proceed, audit_failure selects what happens. */
  static u32     audit_failure = AUDIT_FAIL_PRINTK;
  
-/*
- * If audit records are to be written to the netlink socket, audit_pid
- * contains the pid of the auditd process and audit_nlk_portid contains
- * the portid to use to send netlink messages to that process.
+/* private audit network namespace index */
+static unsigned int audit_net_id;
+
+/**
+ * struct audit_net - audit private network namespace data
+ * @sk: communication socket
   */
-int            audit_pid;
-static __u32   audit_nlk_portid;
+struct audit_net {
+       struct sock *sk;
+};
+
+/**
+ * struct auditd_connection - kernel/auditd connection state
+ * @pid: auditd PID
+ * @portid: netlink portid
+ * @net: the associated network namespace
+ * @lock: spinlock to protect write access
+ *
+ * Description:
+ * This struct is RCU protected; you must either hold the RCU lock for reading
+ * or the included spinlock for writing.
+ */
+static struct auditd_connection {
+       int pid;
+       u32 portid;
+       struct net *net;
+       spinlock_t lock;
+} auditd_conn;
  
  /* If audit_rate_limit is non-zero, limit the rate of sending audit records
   * to that number per second.  This prevents DoS attacks, but results in
@@ -123,10 +148,6 @@ u32                audit_sig_sid = 0;
  */
  static atomic_t        audit_lost = ATOMIC_INIT(0);
  
-/* The netlink socket. */
-static struct sock *audit_sock;
-static unsigned int audit_net_id;
-
  /* Hash for inode-based rules */
  struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
  
@@ -192,6 +213,43 @@ struct audit_reply {
         struct sk_buff *skb;
  };
  
+/**
+ * auditd_test_task - Check to see if a given task is an audit daemon
+ * @task: the task to check
+ *
+ * Description:
+ * Return 1 if the task is a registered audit daemon, 0 otherwise.
+ */
+int auditd_test_task(const struct task_struct *task)
+{
+       int rc;
+
+       rcu_read_lock();
+       rc = (auditd_conn.pid && task->tgid == auditd_conn.pid ? 1 : 0);
+       rcu_read_unlock();
+
+       return rc;
+}
+
+/**
+ * audit_get_sk - Return the audit socket for the given network namespace
+ * @net: the destination network namespace
+ *
+ * Description:
+ * Returns the sock pointer if valid, NULL otherwise.  The caller must ensure
+ * that a reference is held for the network namespace while the sock is in use.
+ */
+static struct sock *audit_get_sk(const struct net *net)
+{
+       struct audit_net *aunet;
+
+       if (!net)
+               return NULL;
+
+       aunet = net_generic(net, audit_net_id);
+       return aunet->sk;
+}
+
  static void audit_set_portid(struct audit_buffer *ab, __u32 portid)
  {
         if (ab) {
@@ -210,9 +268,7 @@ void audit_panic(const char *message)
                         pr_err("%s\n", message);
                 break;
         case AUDIT_FAIL_PANIC:
-               /* test audit_pid since printk is always losey, why bother? */
-               if (audit_pid)
-                       panic("audit: %s\n", message);
+               panic("audit: %s\n", message);
                 break;
         }
  }
@@ -370,21 +426,60 @@ static int audit_set_failure(u32 state)
         return audit_do_config_change("audit_failure", &audit_failure, state);
  }
  
-/*
- * For one reason or another this nlh isn't getting delivered to the userspace
- * audit daemon, just send it to printk.
+/**
+ * auditd_set - Set/Reset the auditd connection state
+ * @pid: auditd PID
+ * @portid: auditd netlink portid
+ * @net: auditd network namespace pointer
+ *
+ * Description:
+ * This function will obtain and drop network namespace references as
+ * necessary.
+ */
+static void auditd_set(int pid, u32 portid, struct net *net)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&auditd_conn.lock, flags);
+       auditd_conn.pid = pid;
+       auditd_conn.portid = portid;
+       if (auditd_conn.net)
+               put_net(auditd_conn.net);
+       if (net)
+               auditd_conn.net = get_net(net);
+       else
+               auditd_conn.net = NULL;
+       spin_unlock_irqrestore(&auditd_conn.lock, flags);
+}
+
+/**
+ * kauditd_print_skb - Print the audit record to the ring buffer
+ * @skb: audit record
+ *
+ * Whatever the reason, this packet may not make it to the auditd connection
+ * so write it via printk so the information isn't completely lost.
   */
  static void kauditd_printk_skb(struct sk_buff *skb)
  {
         struct nlmsghdr *nlh = nlmsg_hdr(skb);
         char *data = nlmsg_data(nlh);
  
-       if (nlh->nlmsg_type != AUDIT_EOE) {
-               if (printk_ratelimit())
-                       pr_notice("type=%d %s\n", nlh->nlmsg_type, data);
-               else
-                       audit_log_lost("printk limit exceeded");
-       }
+       if (nlh->nlmsg_type != AUDIT_EOE && printk_ratelimit())
+               pr_notice("type=%d %s\n", nlh->nlmsg_type, data);
+}
+
+/**
+ * kauditd_rehold_skb - Handle a audit record send failure in the hold queue
+ * @skb: audit record
+ *
+ * Description:
+ * This should only be used by the kauditd_thread when it fails to flush the
+ * hold queue.
+ */
+static void kauditd_rehold_skb(struct sk_buff *skb)
+{
+       /* put the record back in the queue at the same place */
+       skb_queue_head(&audit_hold_queue, skb);
  }
  
  /**
@@ -444,65 +539,163 @@ static void kauditd_retry_skb(struct sk_buff *skb)
   * auditd_reset - Disconnect the auditd connection
   *
   * Description:
- * Break the auditd/kauditd connection and move all the records in the retry
- * queue into the hold queue in case auditd reconnects.  The audit_cmd_mutex
- * must be held when calling this function.
+ * Break the auditd/kauditd connection and move all the queued records into the
+ * hold queue in case auditd reconnects.
   */
  static void auditd_reset(void)
  {
         struct sk_buff *skb;
  
-       /* break the connection */
-       if (audit_sock) {
-               sock_put(audit_sock);
-               audit_sock = NULL;
-       }
-       audit_pid = 0;
-       audit_nlk_portid = 0;
+       /* if it isn't already broken, break the connection */
+       rcu_read_lock();
+       if (auditd_conn.pid)
+               auditd_set(0, 0, NULL);
+       rcu_read_unlock();
  
-       /* flush all of the retry queue to the hold queue */
+       /* flush all of the main and retry queues to the hold queue */
         while ((skb = skb_dequeue(&audit_retry_queue)))
                 kauditd_hold_skb(skb);
+       while ((skb = skb_dequeue(&audit_queue)))
+               kauditd_hold_skb(skb);
  }
  
  /**
- * kauditd_send_unicast_skb - Send a record via unicast to auditd
+ * auditd_send_unicast_skb - Send a record via unicast to auditd
   * @skb: audit record
+ *
+ * Description:
+ * Send a skb to the audit daemon, returns positive/zero values on success and
+ * negative values on failure; in all cases the skb will be consumed by this
+ * function.  If the send results in -ECONNREFUSED the connection with auditd
+ * will be reset.  This function may sleep so callers should not hold any locks
+ * where this would cause a problem.
   */
-static int kauditd_send_unicast_skb(struct sk_buff *skb)
+static int auditd_send_unicast_skb(struct sk_buff *skb)
  {
         int rc;
+       u32 portid;
+       struct net *net;
+       struct sock *sk;
+
+       /* NOTE: we can't call netlink_unicast while in the RCU section so
+        *       take a reference to the network namespace and grab local
+        *       copies of the namespace, the sock, and the portid; the
+        *       namespace and sock aren't going to go away while we hold a
+        *       reference and if the portid does become invalid after the RCU
+        *       section netlink_unicast() should safely return an error */
+
+       rcu_read_lock();
+       if (!auditd_conn.pid) {
+               rcu_read_unlock();
+               rc = -ECONNREFUSED;
+               goto err;
+       }
+       net = auditd_conn.net;
+       get_net(net);
+       sk = audit_get_sk(net);
+       portid = auditd_conn.portid;
+       rcu_read_unlock();
  
-       /* if we know nothing is connected, don't even try the netlink call */
-       if (!audit_pid)
-               return -ECONNREFUSED;
+       rc = netlink_unicast(sk, skb, portid, 0);
+       put_net(net);
+       if (rc < 0)
+               goto err;
  
-       /* get an extra skb reference in case we fail to send */
-       skb_get(skb);
-       rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
-       if (rc >= 0) {
-               consume_skb(skb);
-               rc = 0;
-       }
+       return rc;
  
+err:
+       if (rc == -ECONNREFUSED)
+               auditd_reset();
         return rc;
  }
  
+/**
+ * kauditd_send_queue - Helper for kauditd_thread to flush skb queues
+ * @sk: the sending sock
+ * @portid: the netlink destination
+ * @queue: the skb queue to process
+ * @retry_limit: limit on number of netlink unicast failures
+ * @skb_hook: per-skb hook for additional processing
+ * @err_hook: hook called if the skb fails the netlink unicast send
+ *
+ * Description:
+ * Run through the given queue and attempt to send the audit records to auditd,
+ * returns zero on success, negative values on failure.  It is up to the caller
+ * to ensure that the @sk is valid for the duration of this function.
+ *
+ */
+static int kauditd_send_queue(struct sock *sk, u32 portid,
+                             struct sk_buff_head *queue,
+                             unsigned int retry_limit,
+                             void (*skb_hook)(struct sk_buff *skb),
+                             void (*err_hook)(struct sk_buff *skb))
+{
+       int rc = 0;
+       struct sk_buff *skb;
+       static unsigned int failed = 0;
+
+       /* NOTE: kauditd_thread takes care of all our locking, we just use
+        *       the netlink info passed to us (e.g. sk and portid) */
+
+       while ((skb = skb_dequeue(queue))) {
+               /* call the skb_hook for each skb we touch */
+               if (skb_hook)
+                       (*skb_hook)(skb);
+
+               /* can we send to anyone via unicast? */
+               if (!sk) {
+                       if (err_hook)
+                               (*err_hook)(skb);
+                       continue;
+               }
+
+               /* grab an extra skb reference in case of error */
+               skb_get(skb);
+               rc = netlink_unicast(sk, skb, portid, 0);
+               if (rc < 0) {
+                       /* fatal failure for our queue flush attempt? */
+                       if (++failed >= retry_limit ||
+                           rc == -ECONNREFUSED || rc == -EPERM) {
+                               /* yes - error processing for the queue */
+                               sk = NULL;
+                               if (err_hook)
+                                       (*err_hook)(skb);
+                               if (!skb_hook)
+                                       goto out;
+                               /* keep processing with the skb_hook */
+                               continue;
+                       } else
+                               /* no - requeue to preserve ordering */
+                               skb_queue_head(queue, skb);
+               } else {
+                       /* it worked - drop the extra reference and continue */
+                       consume_skb(skb);
+                       failed = 0;
+               }
+       }
+
+out:
+       return (rc >= 0 ? 0 : rc);
+}
+
  /*
   * kauditd_send_multicast_skb - Send a record to any multicast listeners
   * @skb: audit record
   *
   * Description:
- * This function doesn't consume an skb as might be expected since it has to
- * copy it anyways.
+ * Write a multicast message to anyone listening in the initial network
+ * namespace.  This function doesn't consume an skb as might be expected since
+ * it has to copy it anyways.
   */
  static void kauditd_send_multicast_skb(struct sk_buff *skb)
  {
         struct sk_buff *copy;
-       struct audit_net *aunet = net_generic(&init_net, audit_net_id);
-       struct sock *sock = aunet->nlsk;
+       struct sock *sock = audit_get_sk(&init_net);
         struct nlmsghdr *nlh;
  
+       /* NOTE: we are not taking an additional reference for init_net since
+        *       we don't have to worry about it going away */
+
         if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
                 return;
  
@@ -526,149 +719,79 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb)
  }
  
  /**
- * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
- *
- * Description:
- * This function is for use by the wait_event_freezable() call in
- * kauditd_thread().
+ * kauditd_thread - Worker thread to send audit records to userspace
+ * @dummy: unused
   */
-static int kauditd_wake_condition(void)
-{
-       static int pid_last = 0;
-       int rc;
-       int pid = audit_pid;
-
-       /* wake on new messages or a change in the connected auditd */
-       rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
-       if (rc)
-               pid_last = pid;
-
-       return rc;
-}
-
  static int kauditd_thread(void *dummy)
  {
         int rc;
-       int auditd = 0;
-       int reschedule = 0;
-       struct sk_buff *skb;
-       struct nlmsghdr *nlh;
+       u32 portid = 0;
+       struct net *net = NULL;
+       struct sock *sk = NULL;
  
  #define UNICAST_RETRIES 5
-#define AUDITD_BAD(x,y) \
-       ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
-
-       /* NOTE: we do invalidate the auditd connection flag on any sending
-        * errors, but we only "restore" the connection flag at specific places
-        * in the loop in order to help ensure proper ordering of audit
-        * records */
  
         set_freezable();
         while (!kthread_should_stop()) {
-               /* NOTE: possible area for future improvement is to look at
-                *       the hold and retry queues, since only this thread
-                *       has access to these queues we might be able to do
-                *       our own queuing and skip some/all of the locking */
-
-               /* NOTE: it might be a fun experiment to split the hold and
-                *       retry queue handling to another thread, but the
-                *       synchronization issues and other overhead might kill
-                *       any performance gains */
+               /* NOTE: see the lock comments in auditd_send_unicast_skb() */
+               rcu_read_lock();
+               if (!auditd_conn.pid) {
+                       rcu_read_unlock();
+                       goto main_queue;
+               }
+               net = auditd_conn.net;
+               get_net(net);
+               sk = audit_get_sk(net);
+               portid = auditd_conn.portid;
+               rcu_read_unlock();
  
                 /* attempt to flush the hold queue */
-               while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
-                       rc = kauditd_send_unicast_skb(skb);
-                       if (rc) {
-                               /* requeue to the same spot */
-                               skb_queue_head(&audit_hold_queue, skb);
-
-                               auditd = 0;
-                               if (AUDITD_BAD(rc, reschedule)) {
-                                       mutex_lock(&audit_cmd_mutex);
-                                       auditd_reset();
-                                       mutex_unlock(&audit_cmd_mutex);
-                                       reschedule = 0;
-                               }
-                       } else
-                               /* we were able to send successfully */
-                               reschedule = 0;
+               rc = kauditd_send_queue(sk, portid,
+                                       &audit_hold_queue, UNICAST_RETRIES,
+                                       NULL, kauditd_rehold_skb);
+               if (rc < 0) {
+                       sk = NULL;
+                       auditd_reset();
+                       goto main_queue;
                 }
  
                 /* attempt to flush the retry queue */
-               while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
-                       rc = kauditd_send_unicast_skb(skb);
-                       if (rc) {
-                               auditd = 0;
-                               if (AUDITD_BAD(rc, reschedule)) {
-                                       kauditd_hold_skb(skb);
-                                       mutex_lock(&audit_cmd_mutex);
-                                       auditd_reset();
-                                       mutex_unlock(&audit_cmd_mutex);
-                                       reschedule = 0;
-                               } else
-                                       /* temporary problem (we hope), queue
-                                        * to the same spot and retry */
-                                       skb_queue_head(&audit_retry_queue, skb);
-                       } else
-                               /* we were able to send successfully */
-                               reschedule = 0;
+               rc = kauditd_send_queue(sk, portid,
+                                       &audit_retry_queue, UNICAST_RETRIES,
+                                       NULL, kauditd_hold_skb);
+               if (rc < 0) {
+                       sk = NULL;
+                       auditd_reset();
+                       goto main_queue;
                 }
  
-               /* standard queue processing, try to be as quick as possible */
-quick_loop:
-               skb = skb_dequeue(&audit_queue);
-               if (skb) {
-                       /* setup the netlink header, see the comments in
-                        * kauditd_send_multicast_skb() for length quirks */
-                       nlh = nlmsg_hdr(skb);
-                       nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
-
-                       /* attempt to send to any multicast listeners */
-                       kauditd_send_multicast_skb(skb);
-
-                       /* attempt to send to auditd, queue on failure */
-                       if (auditd) {
-                               rc = kauditd_send_unicast_skb(skb);
-                               if (rc) {
-                                       auditd = 0;
-                                       if (AUDITD_BAD(rc, reschedule)) {
-                                               mutex_lock(&audit_cmd_mutex);
-                                               auditd_reset();
-                                               mutex_unlock(&audit_cmd_mutex);
-                                               reschedule = 0;
-                                       }
-
-                                       /* move to the retry queue */
-                                       kauditd_retry_skb(skb);
-                               } else
-                                       /* everything is working so go fast! */
-                                       goto quick_loop;
-                       } else if (reschedule)
-                               /* we are currently having problems, move to
-                                * the retry queue */
-                               kauditd_retry_skb(skb);
-                       else
-                               /* dump the message via printk and hold it */
-                               kauditd_hold_skb(skb);
-               } else {
-                       /* we have flushed the backlog so wake everyone */
-                       wake_up(&audit_backlog_wait);
-
-                       /* if everything is okay with auditd (if present), go
-                        * to sleep until there is something new in the queue
-                        * or we have a change in the connected auditd;
-                        * otherwise simply reschedule to give things a chance
-                        * to recover */
-                       if (reschedule) {
-                               set_current_state(TASK_INTERRUPTIBLE);
-                               schedule();
-                       } else
-                               wait_event_freezable(kauditd_wait,
-                                                    kauditd_wake_condition());
-
-                       /* update the auditd connection status */
-                       auditd = (audit_pid ? 1 : 0);
+main_queue:
+               /* process the main queue - do the multicast send and attempt
+                * unicast, dump failed record sends to the retry queue; if
+                * sk == NULL due to previous failures we will just do the
+                * multicast send and move the record to the retry queue */
+               rc = kauditd_send_queue(sk, portid, &audit_queue, 1,
+                                       kauditd_send_multicast_skb,
+                                       kauditd_retry_skb);
+               if (sk == NULL || rc < 0)
+                       auditd_reset();
+               sk = NULL;
+
+               /* drop our netns reference, no auditd sends past this line */
+               if (net) {
+                       put_net(net);
+                       net = NULL;
                 }
+
+               /* we have processed all the queues so wake everyone */
+               wake_up(&audit_backlog_wait);
+
+               /* NOTE: we want to wake up if there is anything on the queue,
+                *       regardless of if an auditd is connected, as we need to
+                *       do the multicast send and rotate records from the
+                *       main queue to the retry/hold queues */
+               wait_event_freezable(kauditd_wait,
+                                    (skb_queue_len(&audit_queue) ? 1 : 0));
         }
  
         return 0;
@@ -678,17 +801,16 @@ int audit_send_list(void *_dest)
  {
         struct audit_netlink_list *dest = _dest;
         struct sk_buff *skb;
-       struct net *net = dest->net;
-       struct audit_net *aunet = net_generic(net, audit_net_id);
+       struct sock *sk = audit_get_sk(dest->net);
  
         /* wait for parent to finish and send an ACK */
         mutex_lock(&audit_cmd_mutex);
         mutex_unlock(&audit_cmd_mutex);
  
         while ((skb = __skb_dequeue(&dest->q)) != NULL)
-               netlink_unicast(aunet->nlsk, skb, dest->portid, 0);
+               netlink_unicast(sk, skb, dest->portid, 0);
  
-       put_net(net);
+       put_net(dest->net);
         kfree(dest);
  
         return 0;
@@ -722,16 +844,15 @@ out_kfree_skb:
  static int audit_send_reply_thread(void *arg)
  {
         struct audit_reply *reply = (struct audit_reply *)arg;
-       struct net *net = reply->net;
-       struct audit_net *aunet = net_generic(net, audit_net_id);
+       struct sock *sk = audit_get_sk(reply->net);
  
         mutex_lock(&audit_cmd_mutex);
         mutex_unlock(&audit_cmd_mutex);
  
         /* Ignore failure. It'll only happen if the sender goes away,
            because our timeout is set to infinite. */
-       netlink_unicast(aunet->nlsk , reply->skb, reply->portid, 0);
-       put_net(net);
+       netlink_unicast(sk, reply->skb, reply->portid, 0);
+       put_net(reply->net);
         kfree(reply);
         return 0;
  }
@@ -949,12 +1070,12 @@ static int audit_set_feature(struct sk_buff *skb)
  
  static int audit_replace(pid_t pid)
  {
-       struct sk_buff *skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0,
-                                              &pid, sizeof(pid));
+       struct sk_buff *skb;
  
+       skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0, &pid, sizeof(pid));
         if (!skb)
                 return -ENOMEM;
-       return netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
+       return auditd_send_unicast_skb(skb);
  }
  
  static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -981,7 +1102,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                 memset(&s, 0, sizeof(s));
                 s.enabled               = audit_enabled;
                 s.failure               = audit_failure;
-               s.pid                   = audit_pid;
+               rcu_read_lock();
+               s.pid                   = auditd_conn.pid;
+               rcu_read_unlock();
                 s.rate_limit            = audit_rate_limit;
                 s.backlog_limit         = audit_backlog_limit;
                 s.lost                  = atomic_read(&audit_lost);
@@ -1014,30 +1137,44 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                          *       from the initial pid namespace, but something
                          *       to keep in mind if this changes */
                         int new_pid = s.pid;
+                       pid_t auditd_pid;
                         pid_t requesting_pid = task_tgid_vnr(current);
  
-                       if ((!new_pid) && (requesting_pid != audit_pid)) {
-                               audit_log_config_change("audit_pid", new_pid, audit_pid, 0);
+                       /* test the auditd connection */
+                       audit_replace(requesting_pid);
+
+                       rcu_read_lock();
+                       auditd_pid = auditd_conn.pid;
+                       /* only the current auditd can unregister itself */
+                       if ((!new_pid) && (requesting_pid != auditd_pid)) {
+                               rcu_read_unlock();
+                               audit_log_config_change("audit_pid", new_pid,
+                                                       auditd_pid, 0);
                                 return -EACCES;
                         }
-                       if (audit_pid && new_pid &&
-                           audit_replace(requesting_pid) != -ECONNREFUSED) {
-                               audit_log_config_change("audit_pid", new_pid, audit_pid, 0);
+                       /* replacing a healthy auditd is not allowed */
+                       if (auditd_pid && new_pid) {
+                               rcu_read_unlock();
+                               audit_log_config_change("audit_pid", new_pid,
+                                                       auditd_pid, 0);
                                 return -EEXIST;
                         }
+                       rcu_read_unlock();
+
                         if (audit_enabled != AUDIT_OFF)
-                               audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
+                               audit_log_config_change("audit_pid", new_pid,
+                                                       auditd_pid, 1);
+
                         if (new_pid) {
-                               if (audit_sock)
-                                       sock_put(audit_sock);
-                               audit_pid = new_pid;
-                               audit_nlk_portid = NETLINK_CB(skb).portid;
-                               sock_hold(skb->sk);
-                               audit_sock = skb->sk;
-                       } else {
+                               /* register a new auditd connection */
+                               auditd_set(new_pid,
+                                          NETLINK_CB(skb).portid,
+                                          sock_net(NETLINK_CB(skb).sk));
+                               /* try to process any backlog */
+                               wake_up_interruptible(&kauditd_wait);
+                       } else
+                               /* unregister the auditd connection */
                                 auditd_reset();
-                       }
-                       wake_up_interruptible(&kauditd_wait);
                 }
                 if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
                         err = audit_set_rate_limit(s.rate_limit);
@@ -1090,7 +1227,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                                 if (err)
                                         break;
                         }
-                       mutex_unlock(&audit_cmd_mutex);
                         audit_log_common_recv_msg(&ab, msg_type);
                         if (msg_type != AUDIT_USER_TTY)
                                 audit_log_format(ab, " msg='%.*s'",
@@ -1108,7 +1244,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                         }
                         audit_set_portid(ab, NETLINK_CB(skb).portid);
                         audit_log_end(ab);
-                       mutex_lock(&audit_cmd_mutex);
                 }
                 break;
         case AUDIT_ADD_RULE:
@@ -1298,26 +1433,26 @@ static int __net_init audit_net_init(struct net *net)
  
         struct audit_net *aunet = net_generic(net, audit_net_id);
  
-       aunet->nlsk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg);
-       if (aunet->nlsk == NULL) {
+       aunet->sk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg);
+       if (aunet->sk == NULL) {
                 audit_panic("cannot initialize netlink socket in namespace");
                 return -ENOMEM;
         }
-       aunet->nlsk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+       aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+
         return 0;
  }
  
  static void __net_exit audit_net_exit(struct net *net)
  {
         struct audit_net *aunet = net_generic(net, audit_net_id);
-       struct sock *sock = aunet->nlsk;
-       mutex_lock(&audit_cmd_mutex);
-       if (sock == audit_sock)
+
+       rcu_read_lock();
+       if (net == auditd_conn.net)
                 auditd_reset();
-       mutex_unlock(&audit_cmd_mutex);
+       rcu_read_unlock();
  
-       netlink_kernel_release(sock);
-       aunet->nlsk = NULL;
+       netlink_kernel_release(aunet->sk);
  }
  
  static struct pernet_operations audit_net_ops __net_initdata = {
@@ -1335,20 +1470,24 @@ static int __init audit_init(void)
         if (audit_initialized == AUDIT_DISABLED)
                 return 0;
  
-       pr_info("initializing netlink subsys (%s)\n",
-               audit_default ? "enabled" : "disabled");
-       register_pernet_subsys(&audit_net_ops);
+       memset(&auditd_conn, 0, sizeof(auditd_conn));
+       spin_lock_init(&auditd_conn.lock);
  
         skb_queue_head_init(&audit_queue);
         skb_queue_head_init(&audit_retry_queue);
         skb_queue_head_init(&audit_hold_queue);
-       audit_initialized = AUDIT_INITIALIZED;
-       audit_enabled = audit_default;
-       audit_ever_enabled |= !!audit_default;
  
         for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
                 INIT_LIST_HEAD(&audit_inode_hash[i]);
  
+       pr_info("initializing netlink subsys (%s)\n",
+               audit_default ? "enabled" : "disabled");
+       register_pernet_subsys(&audit_net_ops);
+
+       audit_initialized = AUDIT_INITIALIZED;
+       audit_enabled = audit_default;
+       audit_ever_enabled |= !!audit_default;
+
         kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
         if (IS_ERR(kauditd_task)) {
                 int err = PTR_ERR(kauditd_task);
@@ -1519,20 +1658,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
         if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
                 return NULL;
  
-       /* don't ever fail/sleep on these two conditions:
+       /* NOTE: don't ever fail/sleep on these two conditions:
          * 1. auditd generated record - since we need auditd to drain the
          *    queue; also, when we are checking for auditd, compare PIDs using
          *    task_tgid_vnr() since auditd_pid is set in audit_receive_msg()
          *    using a PID anchored in the caller's namespace
-        * 2. audit command message - record types 1000 through 1099 inclusive
-        *    are command messages/records used to manage the kernel subsystem
-        *    and the audit userspace, blocking on these messages could cause
-        *    problems under load so don't do it (note: not all of these
-        *    command types are valid as record types, but it is quicker to
-        *    just check two ints than a series of ints in a if/switch stmt) */
-       if (!((audit_pid && audit_pid == task_tgid_vnr(current)) ||
-             (type >= 1000 && type <= 1099))) {
-               long sleep_time = audit_backlog_wait_time;
+        * 2. generator holding the audit_cmd_mutex - we don't want to block
+        *    while holding the mutex */
+       if (!(auditd_test_task(current) ||
+             (current == __mutex_owner(&audit_cmd_mutex)))) {
+               long stime = audit_backlog_wait_time;
  
                 while (audit_backlog_limit &&
                        (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
@@ -1541,14 +1676,13 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
  
                         /* sleep if we are allowed and we haven't exhausted our
                          * backlog wait limit */
-                       if ((gfp_mask & __GFP_DIRECT_RECLAIM) &&
-                           (sleep_time > 0)) {
+                       if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) {
                                 DECLARE_WAITQUEUE(wait, current);
  
                                 add_wait_queue_exclusive(&audit_backlog_wait,
                                                          &wait);
                                 set_current_state(TASK_UNINTERRUPTIBLE);
-                               sleep_time = schedule_timeout(sleep_time);
+                               stime = schedule_timeout(stime);
                                 remove_wait_queue(&audit_backlog_wait, &wait);
                         } else {
                                 if (audit_rate_check() && printk_ratelimit())
@@ -2127,15 +2261,27 @@ out:
   */
  void audit_log_end(struct audit_buffer *ab)
  {
+       struct sk_buff *skb;
+       struct nlmsghdr *nlh;
+
         if (!ab)
                 return;
-       if (!audit_rate_check()) {
-               audit_log_lost("rate limit exceeded");
-       } else {
-               skb_queue_tail(&audit_queue, ab->skb);
-               wake_up_interruptible(&kauditd_wait);
+
+       if (audit_rate_check()) {
+               skb = ab->skb;
                 ab->skb = NULL;
-       }
+
+               /* setup the netlink header, see the comments in
+                * kauditd_send_multicast_skb() for length quirks */
+               nlh = nlmsg_hdr(skb);
+               nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
+
+               /* queue the netlink packet and poke the kauditd thread */
+               skb_queue_tail(&audit_queue, skb);
+               wake_up_interruptible(&kauditd_wait);
+       } else
+               audit_log_lost("rate limit exceeded");
+
         audit_buffer_free(ab);
  }
  
diff --git a/kernel/audit.h b/kernel/audit.h

index ca579880303ab475b2c81839a4948bdb128e92f8..0d87f8ab8778579dde21754e4ed647286f34e3ef 100644 (file)
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -218,7 +218,7 @@ extern void audit_log_name(struct audit_context *context,
                            struct audit_names *n, const struct path *path,
                            int record_num, int *call_panic);
  
-extern int audit_pid;
+extern int auditd_test_task(const struct task_struct *task);
  
  #define AUDIT_INODE_BUCKETS    32
  extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
@@ -250,10 +250,6 @@ struct audit_netlink_list {
  
  int audit_send_list(void *);
  
-struct audit_net {
-       struct sock *nlsk;
-};
-
  extern int selinux_audit_rule_update(void);
  
  extern struct mutex audit_filter_mutex;
@@ -337,14 +333,7 @@ extern u32 audit_sig_sid;
  extern int audit_filter(int msgtype, unsigned int listtype);
  
  #ifdef CONFIG_AUDITSYSCALL
-extern int __audit_signal_info(int sig, struct task_struct *t);
-static inline int audit_signal_info(int sig, struct task_struct *t)
-{
-       if (unlikely((audit_pid && t->tgid == audit_pid) ||
-                    (audit_signals && !audit_dummy_context())))
-               return __audit_signal_info(sig, t);
-       return 0;
-}
+extern int audit_signal_info(int sig, struct task_struct *t);
  extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
  extern struct list_head *audit_killed_trees(void);
  #else
diff --git a/kernel/auditsc.c b/kernel/auditsc.c

index d6a8de5f8fa3d0ba33c14b20e6341e32d62dab2a..1c2333155893fac54138a8b3f7311c2ca520992c 100644 (file)
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -762,7 +762,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
         struct audit_entry *e;
         enum audit_state state;
  
-       if (audit_pid && tsk->tgid == audit_pid)
+       if (auditd_test_task(tsk))
                 return AUDIT_DISABLED;
  
         rcu_read_lock();
@@ -816,7 +816,7 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx)
  {
         struct audit_names *n;
  
-       if (audit_pid && tsk->tgid == audit_pid)
+       if (auditd_test_task(tsk))
                 return;
  
         rcu_read_lock();
@@ -2249,26 +2249,27 @@ void __audit_ptrace(struct task_struct *t)
   * If the audit subsystem is being terminated, record the task (pid)
   * and uid that is doing that.
   */
-int __audit_signal_info(int sig, struct task_struct *t)
+int audit_signal_info(int sig, struct task_struct *t)
  {
         struct audit_aux_data_pids *axp;
         struct task_struct *tsk = current;
         struct audit_context *ctx = tsk->audit_context;
         kuid_t uid = current_uid(), t_uid = task_uid(t);
  
-       if (audit_pid && t->tgid == audit_pid) {
-               if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
-                       audit_sig_pid = task_tgid_nr(tsk);
-                       if (uid_valid(tsk->loginuid))
-                               audit_sig_uid = tsk->loginuid;
-                       else
-                               audit_sig_uid = uid;
-                       security_task_getsecid(tsk, &audit_sig_sid);
-               }
-               if (!audit_signals || audit_dummy_context())
-                       return 0;
+       if (auditd_test_task(t) &&
+           (sig == SIGTERM || sig == SIGHUP ||
+            sig == SIGUSR1 || sig == SIGUSR2)) {
+               audit_sig_pid = task_tgid_nr(tsk);
+               if (uid_valid(tsk->loginuid))
+                       audit_sig_uid = tsk->loginuid;
+               else
+                       audit_sig_uid = uid;
+               security_task_getsecid(tsk, &audit_sig_sid);
         }
  
+       if (!audit_signals || audit_dummy_context())
+               return 0;
+
         /* optimize the common case by putting first signal recipient directly
          * in audit_context */
         if (!ctx->target_pid) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index f45827e205d3f491a818a024dca5122c68924697..b4f1cb0c5ac7104c3f12f9ed5c1e3fe159c57824 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1162,12 +1162,12 @@ out:
         LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
                 off = IMM;
  load_word:
-               /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
-                * only appearing in the programs where ctx ==
-                * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
-                * == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
-                * internal BPF verifier will check that BPF_R6 ==
-                * ctx.
+               /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
+                * appearing in the programs where ctx == skb
+                * (see may_access_skb() in the verifier). All programs
+                * keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6,
+                * bpf_convert_filter() saves it in BPF_R6, internal BPF
+                * verifier will check that BPF_R6 == ctx.
                  *
                  * BPF_ABS and BPF_IND are wrappers of function calls,
                  * so they scratch BPF_R1-BPF_R5 registers, preserve
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c

index 3ea87fb19a9416771985d9236f148ffb927ad19b..361a69dfe5434d6afb554477b899e91db90fb29f 100644 (file)
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -13,11 +13,12 @@
  #include <linux/bpf.h>
  #include <linux/jhash.h>
  #include <linux/filter.h>
+#include <linux/rculist_nulls.h>
  #include "percpu_freelist.h"
  #include "bpf_lru_list.h"
  
  struct bucket {
-       struct hlist_head head;
+       struct hlist_nulls_head head;
         raw_spinlock_t lock;
  };
  
@@ -29,28 +30,26 @@ struct bpf_htab {
                 struct pcpu_freelist freelist;
                 struct bpf_lru lru;
         };
-       void __percpu *extra_elems;
+       struct htab_elem *__percpu *extra_elems;
         atomic_t count; /* number of elements in this hashtable */
         u32 n_buckets;  /* number of hash buckets */
         u32 elem_size;  /* size of each element in bytes */
  };
  
-enum extra_elem_state {
-       HTAB_NOT_AN_EXTRA_ELEM = 0,
-       HTAB_EXTRA_ELEM_FREE,
-       HTAB_EXTRA_ELEM_USED
-};
-
  /* each htab element is struct htab_elem + key + value */
  struct htab_elem {
         union {
-               struct hlist_node hash_node;
-               struct bpf_htab *htab;
-               struct pcpu_freelist_node fnode;
+               struct hlist_nulls_node hash_node;
+               struct {
+                       void *padding;
+                       union {
+                               struct bpf_htab *htab;
+                               struct pcpu_freelist_node fnode;
+                       };
+               };
         };
         union {
                 struct rcu_head rcu;
-               enum extra_elem_state state;
                 struct bpf_lru_node lru_node;
         };
         u32 hash;
@@ -71,6 +70,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
                 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
  }
  
+static bool htab_is_prealloc(const struct bpf_htab *htab)
+{
+       return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
+}
+
  static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
                                      void __percpu *pptr)
  {
@@ -122,17 +126,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
  
  static int prealloc_init(struct bpf_htab *htab)
  {
+       u32 num_entries = htab->map.max_entries;
         int err = -ENOMEM, i;
  
-       htab->elems = bpf_map_area_alloc(htab->elem_size *
-                                        htab->map.max_entries);
+       if (!htab_is_percpu(htab) && !htab_is_lru(htab))
+               num_entries += num_possible_cpus();
+
+       htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries);
         if (!htab->elems)
                 return -ENOMEM;
  
         if (!htab_is_percpu(htab))
                 goto skip_percpu_elems;
  
-       for (i = 0; i < htab->map.max_entries; i++) {
+       for (i = 0; i < num_entries; i++) {
                 u32 size = round_up(htab->map.value_size, 8);
                 void __percpu *pptr;
  
@@ -160,10 +167,11 @@ skip_percpu_elems:
         if (htab_is_lru(htab))
                 bpf_lru_populate(&htab->lru, htab->elems,
                                  offsetof(struct htab_elem, lru_node),
-                                htab->elem_size, htab->map.max_entries);
+                                htab->elem_size, num_entries);
         else
-               pcpu_freelist_populate(&htab->freelist, htab->elems,
-                                      htab->elem_size, htab->map.max_entries);
+               pcpu_freelist_populate(&htab->freelist,
+                                      htab->elems + offsetof(struct htab_elem, fnode),
+                                      htab->elem_size, num_entries);
  
         return 0;
  
@@ -184,16 +192,22 @@ static void prealloc_destroy(struct bpf_htab *htab)
  
  static int alloc_extra_elems(struct bpf_htab *htab)
  {
-       void __percpu *pptr;
+       struct htab_elem *__percpu *pptr, *l_new;
+       struct pcpu_freelist_node *l;
         int cpu;
  
-       pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+       pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
+                                 GFP_USER | __GFP_NOWARN);
         if (!pptr)
                 return -ENOMEM;
  
         for_each_possible_cpu(cpu) {
-               ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
-                       HTAB_EXTRA_ELEM_FREE;
+               l = pcpu_freelist_pop(&htab->freelist);
+               /* pop will succeed, since prealloc_init()
+                * preallocated extra num_possible_cpus elements
+                */
+               l_new = container_of(l, struct htab_elem, fnode);
+               *per_cpu_ptr(pptr, cpu) = l_new;
         }
         htab->extra_elems = pptr;
         return 0;
@@ -217,6 +231,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
         int err, i;
         u64 cost;
  
+       BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
+                    offsetof(struct htab_elem, hash_node.pprev));
+       BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
+                    offsetof(struct htab_elem, hash_node.pprev));
+
         if (lru && !capable(CAP_SYS_ADMIN))
                 /* LRU implementation is much complicated than other
                  * maps.  Hence, limit to CAP_SYS_ADMIN for now.
@@ -326,29 +345,29 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
                 goto free_htab;
  
         for (i = 0; i < htab->n_buckets; i++) {
-               INIT_HLIST_HEAD(&htab->buckets[i].head);
+               INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
                 raw_spin_lock_init(&htab->buckets[i].lock);
         }
  
-       if (!percpu && !lru) {
-               /* lru itself can remove the least used element, so
-                * there is no need for an extra elem during map_update.
-                */
-               err = alloc_extra_elems(htab);
-               if (err)
-                       goto free_buckets;
-       }
-
         if (prealloc) {
                 err = prealloc_init(htab);
                 if (err)
-                       goto free_extra_elems;
+                       goto free_buckets;
+
+               if (!percpu && !lru) {
+                       /* lru itself can remove the least used element, so
+                        * there is no need for an extra elem during map_update.
+                        */
+                       err = alloc_extra_elems(htab);
+                       if (err)
+                               goto free_prealloc;
+               }
         }
  
         return &htab->map;
  
-free_extra_elems:
-       free_percpu(htab->extra_elems);
+free_prealloc:
+       prealloc_destroy(htab);
  free_buckets:
         bpf_map_area_free(htab->buckets);
  free_htab:
@@ -366,20 +385,44 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
         return &htab->buckets[hash & (htab->n_buckets - 1)];
  }
  
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
  {
         return &__select_bucket(htab, hash)->head;
  }
  
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
+/* this lookup function can only be called with bucket lock taken */
+static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
                                          void *key, u32 key_size)
  {
+       struct hlist_nulls_node *n;
+       struct htab_elem *l;
+
+       hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+               if (l->hash == hash && !memcmp(&l->key, key, key_size))
+                       return l;
+
+       return NULL;
+}
+
+/* can be called without bucket lock. it will repeat the loop in
+ * the unlikely event when elements moved from one bucket into another
+ * while link list is being walked
+ */
+static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
+                                              u32 hash, void *key,
+                                              u32 key_size, u32 n_buckets)
+{
+       struct hlist_nulls_node *n;
         struct htab_elem *l;
  
-       hlist_for_each_entry_rcu(l, head, hash_node)
+again:
+       hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
                 if (l->hash == hash && !memcmp(&l->key, key, key_size))
                         return l;
  
+       if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
+               goto again;
+
         return NULL;
  }
  
@@ -387,7 +430,7 @@ static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
  static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
  {
         struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
         struct htab_elem *l;
         u32 hash, key_size;
  
@@ -400,7 +443,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
  
         head = select_bucket(htab, hash);
  
-       l = lookup_elem_raw(head, hash, key, key_size);
+       l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
  
         return l;
  }
@@ -433,8 +476,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
  static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
  {
         struct bpf_htab *htab = (struct bpf_htab *)arg;
-       struct htab_elem *l, *tgt_l;
-       struct hlist_head *head;
+       struct htab_elem *l = NULL, *tgt_l;
+       struct hlist_nulls_head *head;
+       struct hlist_nulls_node *n;
         unsigned long flags;
         struct bucket *b;
  
@@ -444,9 +488,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
  
         raw_spin_lock_irqsave(&b->lock, flags);
  
-       hlist_for_each_entry_rcu(l, head, hash_node)
+       hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
                 if (l == tgt_l) {
-                       hlist_del_rcu(&l->hash_node);
+                       hlist_nulls_del_rcu(&l->hash_node);
                         break;
                 }
  
@@ -459,7 +503,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
  static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
  {
         struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
         struct htab_elem *l, *next_l;
         u32 hash, key_size;
         int i;
@@ -473,7 +517,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
         head = select_bucket(htab, hash);
  
         /* lookup the key */
-       l = lookup_elem_raw(head, hash, key, key_size);
+       l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
  
         if (!l) {
                 i = 0;
@@ -481,7 +525,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
         }
  
         /* key was found, get next key in the same bucket */
-       next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
+       next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
                                   struct htab_elem, hash_node);
  
         if (next_l) {
@@ -500,7 +544,7 @@ find_first_elem:
                 head = select_bucket(htab, i);
  
                 /* pick first element in the bucket */
-               next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+               next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
                                           struct htab_elem, hash_node);
                 if (next_l) {
                         /* if it's not empty, just return it */
@@ -538,12 +582,7 @@ static void htab_elem_free_rcu(struct rcu_head *head)
  
  static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
  {
-       if (l->state == HTAB_EXTRA_ELEM_USED) {
-               l->state = HTAB_EXTRA_ELEM_FREE;
-               return;
-       }
-
-       if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
+       if (htab_is_prealloc(htab)) {
                 pcpu_freelist_push(&htab->freelist, &l->fnode);
         } else {
                 atomic_dec(&htab->count);
@@ -573,43 +612,43 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
  static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
                                          void *value, u32 key_size, u32 hash,
                                          bool percpu, bool onallcpus,
-                                        bool old_elem_exists)
+                                        struct htab_elem *old_elem)
  {
         u32 size = htab->map.value_size;
-       bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
-       struct htab_elem *l_new;
+       bool prealloc = htab_is_prealloc(htab);
+       struct htab_elem *l_new, **pl_new;
         void __percpu *pptr;
-       int err = 0;
  
         if (prealloc) {
-               l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
-               if (!l_new)
-                       err = -E2BIG;
-       } else {
-               if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
-                       atomic_dec(&htab->count);
-                       err = -E2BIG;
+               if (old_elem) {
+                       /* if we're updating the existing element,
+                        * use per-cpu extra elems to avoid freelist_pop/push
+                        */
+                       pl_new = this_cpu_ptr(htab->extra_elems);
+                       l_new = *pl_new;
+                       *pl_new = old_elem;
                 } else {
-                       l_new = kmalloc(htab->elem_size,
-                                       GFP_ATOMIC | __GFP_NOWARN);
-                       if (!l_new)
-                               return ERR_PTR(-ENOMEM);
-               }
-       }
+                       struct pcpu_freelist_node *l;
  
-       if (err) {
-               if (!old_elem_exists)
-                       return ERR_PTR(err);
-
-               /* if we're updating the existing element and the hash table
-                * is full, use per-cpu extra elems
-                */
-               l_new = this_cpu_ptr(htab->extra_elems);
-               if (l_new->state != HTAB_EXTRA_ELEM_FREE)
-                       return ERR_PTR(-E2BIG);
-               l_new->state = HTAB_EXTRA_ELEM_USED;
+                       l = pcpu_freelist_pop(&htab->freelist);
+                       if (!l)
+                               return ERR_PTR(-E2BIG);
+                       l_new = container_of(l, struct htab_elem, fnode);
+               }
         } else {
-               l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
+               if (atomic_inc_return(&htab->count) > htab->map.max_entries)
+                       if (!old_elem) {
+                               /* when map is full and update() is replacing
+                                * old element, it's ok to allocate, since
+                                * old element will be freed immediately.
+                                * Otherwise return an error
+                                */
+                               atomic_dec(&htab->count);
+                               return ERR_PTR(-E2BIG);
+                       }
+               l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+               if (!l_new)
+                       return ERR_PTR(-ENOMEM);
         }
  
         memcpy(l_new->key, key, key_size);
@@ -661,7 +700,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
  {
         struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
         struct htab_elem *l_new = NULL, *l_old;
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
         unsigned long flags;
         struct bucket *b;
         u32 key_size, hash;
@@ -690,7 +729,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
                 goto err;
  
         l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
-                               !!l_old);
+                               l_old);
         if (IS_ERR(l_new)) {
                 /* all pre-allocated elements are in use or memory exhausted */
                 ret = PTR_ERR(l_new);
@@ -700,10 +739,11 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
         /* add new element to the head of the list, so that
          * concurrent search will find it before old elem
          */
-       hlist_add_head_rcu(&l_new->hash_node, head);
+       hlist_nulls_add_head_rcu(&l_new->hash_node, head);
         if (l_old) {
-               hlist_del_rcu(&l_old->hash_node);
-               free_htab_elem(htab, l_old);
+               hlist_nulls_del_rcu(&l_old->hash_node);
+               if (!htab_is_prealloc(htab))
+                       free_htab_elem(htab, l_old);
         }
         ret = 0;
  err:
@@ -716,7 +756,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
  {
         struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
         struct htab_elem *l_new, *l_old = NULL;
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
         unsigned long flags;
         struct bucket *b;
         u32 key_size, hash;
@@ -757,10 +797,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
         /* add new element to the head of the list, so that
          * concurrent search will find it before old elem
          */
-       hlist_add_head_rcu(&l_new->hash_node, head);
+       hlist_nulls_add_head_rcu(&l_new->hash_node, head);
         if (l_old) {
                 bpf_lru_node_set_ref(&l_new->lru_node);
-               hlist_del_rcu(&l_old->hash_node);
+               hlist_nulls_del_rcu(&l_old->hash_node);
         }
         ret = 0;
  
@@ -781,7 +821,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
  {
         struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
         struct htab_elem *l_new = NULL, *l_old;
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
         unsigned long flags;
         struct bucket *b;
         u32 key_size, hash;
@@ -815,12 +855,12 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
                                 value, onallcpus);
         } else {
                 l_new = alloc_htab_elem(htab, key, value, key_size,
-                                       hash, true, onallcpus, false);
+                                       hash, true, onallcpus, NULL);
                 if (IS_ERR(l_new)) {
                         ret = PTR_ERR(l_new);
                         goto err;
                 }
-               hlist_add_head_rcu(&l_new->hash_node, head);
+               hlist_nulls_add_head_rcu(&l_new->hash_node, head);
         }
         ret = 0;
  err:
@@ -834,7 +874,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
  {
         struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
         struct htab_elem *l_new = NULL, *l_old;
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
         unsigned long flags;
         struct bucket *b;
         u32 key_size, hash;
@@ -882,7 +922,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
         } else {
                 pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
                                 value, onallcpus);
-               hlist_add_head_rcu(&l_new->hash_node, head);
+               hlist_nulls_add_head_rcu(&l_new->hash_node, head);
                 l_new = NULL;
         }
         ret = 0;
@@ -910,7 +950,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
  static int htab_map_delete_elem(struct bpf_map *map, void *key)
  {
         struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
         struct bucket *b;
         struct htab_elem *l;
         unsigned long flags;
@@ -930,7 +970,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
         l = lookup_elem_raw(head, hash, key, key_size);
  
         if (l) {
-               hlist_del_rcu(&l->hash_node);
+               hlist_nulls_del_rcu(&l->hash_node);
                 free_htab_elem(htab, l);
                 ret = 0;
         }
@@ -942,7 +982,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
  static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
  {
         struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
         struct bucket *b;
         struct htab_elem *l;
         unsigned long flags;
@@ -962,7 +1002,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
         l = lookup_elem_raw(head, hash, key, key_size);
  
         if (l) {
-               hlist_del_rcu(&l->hash_node);
+               hlist_nulls_del_rcu(&l->hash_node);
                 ret = 0;
         }
  
@@ -977,14 +1017,13 @@ static void delete_all_elements(struct bpf_htab *htab)
         int i;
  
         for (i = 0; i < htab->n_buckets; i++) {
-               struct hlist_head *head = select_bucket(htab, i);
-               struct hlist_node *n;
+               struct hlist_nulls_head *head = select_bucket(htab, i);
+               struct hlist_nulls_node *n;
                 struct htab_elem *l;
  
-               hlist_for_each_entry_safe(l, n, head, hash_node) {
-                       hlist_del_rcu(&l->hash_node);
-                       if (l->state != HTAB_EXTRA_ELEM_USED)
-                               htab_elem_free(htab, l);
+               hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+                       hlist_nulls_del_rcu(&l->hash_node);
+                       htab_elem_free(htab, l);
                 }
         }
  }
@@ -1004,7 +1043,7 @@ static void htab_map_free(struct bpf_map *map)
          * not have executed. Wait for them.
          */
         rcu_barrier();
-       if (htab->map.map_flags & BPF_F_NO_PREALLOC)
+       if (!htab_is_prealloc(htab))
                 delete_all_elements(htab);
         else
                 prealloc_destroy(htab);
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c

index 8bfe0afaee1082186f805a2a37a556247cbfa608..b37bd9ab7f574242722c1d9b0503b896019488b2 100644 (file)
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -500,9 +500,15 @@ unlock:
         raw_spin_unlock(&trie->lock);
  }
  
+static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+       return -ENOTSUPP;
+}
+
  static const struct bpf_map_ops trie_ops = {
         .map_alloc = trie_alloc,
         .map_free = trie_free,
+       .map_get_next_key = trie_get_next_key,
         .map_lookup_elem = trie_lookup_elem,
         .map_update_elem = trie_update_elem,
         .map_delete_elem = trie_delete_elem,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 7af0dcc5d7555679cea6c08395ab54710e7066e6..821f9e807de5705d5b4d65e502fb13a06d3215bb 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -617,6 +617,14 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
                         if (insn->imm == BPF_FUNC_xdp_adjust_head)
                                 prog->xdp_adjust_head = 1;
                         if (insn->imm == BPF_FUNC_tail_call) {
+                               /* If we tail call into other programs, we
+                                * cannot make any assumptions since they
+                                * can be replaced dynamically during runtime
+                                * in the program array.
+                                */
+                               prog->cb_access = 1;
+                               prog->xdp_adjust_head = 1;
+
                                 /* mark bpf_tail_call as different opcode
                                  * to avoid conditional branch in
                                  * interpeter for every normal call
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 796b68d001198a39186cba850fe8161476a17bfa..a834068a400e279f963097489ed165c6ad1301b2 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -765,38 +765,56 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
         }
  }
  
-static int check_ptr_alignment(struct bpf_verifier_env *env,
-                              struct bpf_reg_state *reg, int off, int size)
+static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
+                                  int off, int size)
  {
-       if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) {
-               if (off % size != 0) {
-                       verbose("misaligned access off %d size %d\n",
-                               off, size);
-                       return -EACCES;
-               } else {
-                       return 0;
-               }
-       }
-
-       if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
-               /* misaligned access to packet is ok on x86,arm,arm64 */
-               return 0;
-
         if (reg->id && size != 1) {
-               verbose("Unknown packet alignment. Only byte-sized access allowed\n");
+               verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n");
                 return -EACCES;
         }
  
         /* skb->data is NET_IP_ALIGN-ed */
-       if (reg->type == PTR_TO_PACKET &&
-           (NET_IP_ALIGN + reg->off + off) % size != 0) {
+       if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
                 verbose("misaligned packet access off %d+%d+%d size %d\n",
                         NET_IP_ALIGN, reg->off, off, size);
                 return -EACCES;
         }
+
         return 0;
  }
  
+static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
+                                  int size)
+{
+       if (size != 1) {
+               verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
+               return -EACCES;
+       }
+
+       return 0;
+}
+
+static int check_ptr_alignment(const struct bpf_reg_state *reg,
+                              int off, int size)
+{
+       switch (reg->type) {
+       case PTR_TO_PACKET:
+               return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+                      check_pkt_ptr_alignment(reg, off, size);
+       case PTR_TO_MAP_VALUE_ADJ:
+               return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+                      check_val_ptr_alignment(reg, size);
+       default:
+               if (off % size != 0) {
+                       verbose("misaligned access off %d size %d\n",
+                               off, size);
+                       return -EACCES;
+               }
+
+               return 0;
+       }
+}
+
  /* check whether memory at (regno + off) is accessible for t = (read | write)
   * if t==write, value_regno is a register which value is stored into memory
   * if t==read, value_regno is a register which will receive the value from memory
@@ -818,7 +836,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
         if (size < 0)
                 return size;
  
-       err = check_ptr_alignment(env, reg, off, size);
+       err = check_ptr_alignment(reg, off, size);
         if (err)
                 return err;
  
@@ -1925,6 +1943,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
                  * register as unknown.
                  */
                 if (env->allow_ptr_leaks &&
+                   BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
                     (dst_reg->type == PTR_TO_MAP_VALUE ||
                      dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
                         dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
@@ -1973,14 +1992,15 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
  
         for (i = 0; i < MAX_BPF_REG; i++)
                 if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
-                       regs[i].range = dst_reg->off;
+                       /* keep the maximum range already checked */
+                       regs[i].range = max(regs[i].range, dst_reg->off);
  
         for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
                 if (state->stack_slot_type[i] != STACK_SPILL)
                         continue;
                 reg = &state->spilled_regs[i / BPF_REG_SIZE];
                 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
-                       reg->range = dst_reg->off;
+                       reg->range = max(reg->range, dst_reg->off);
         }
  }
  
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c

index 56eba9caa632adcc118114d8aa55cbab00895495..1dc22f6b49f5e06c4af22222dfb1b32c885ce16a 100644 (file)
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1329,7 +1329,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
                 struct task_struct *task;
                 int count = 0;
  
-               seq_printf(seq, "css_set %p\n", cset);
+               seq_printf(seq, "css_set %pK\n", cset);
  
                 list_for_each_entry(task, &cset->tasks, cg_list) {
                         if (count++ > MAX_TASKS_SHOWN_PER_CSS)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c

index 0125589c742841ddbff14639c1ded5e0590b00b4..687f5e0194efccbadce199c0570cd08b8c96181a 100644 (file)
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2425,11 +2425,12 @@ ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
                 tsk = tsk->group_leader;
  
         /*
-        * Workqueue threads may acquire PF_NO_SETAFFINITY and become
-        * trapped in a cpuset, or RT worker may be born in a cgroup
-        * with no rt_runtime allocated.  Just say no.
+        * kthreads may acquire PF_NO_SETAFFINITY during initialization.
+        * If userland migrates such a kthread to a non-root cgroup, it can
+        * become trapped in a cpuset, or RT kthread may be born in a
+        * cgroup with no rt_runtime allocated.  Just say no.
          */
-       if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
+       if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
                 ret = -EINVAL;
                 goto out_unlock_rcu;
         }
@@ -2669,7 +2670,7 @@ static bool css_visible(struct cgroup_subsys_state *css)
   *
   * Returns 0 on success, -errno on failure.  On failure, csses which have
   * been processed already aren't cleaned up.  The caller is responsible for
- * cleaning up with cgroup_apply_control_disble().
+ * cleaning up with cgroup_apply_control_disable().
   */
  static int cgroup_apply_control_enable(struct cgroup *cgrp)
  {
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c

index e756dae493008e4bc4bf9f87846f818d7349ede8..2237201d66d5dacf1fe952a15d9acc54b1e90b50 100644 (file)
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -229,7 +229,7 @@ static int pids_can_fork(struct task_struct *task)
                 /* Only log the first time events_limit is incremented. */
                 if (atomic64_inc_return(&pids->events_limit) == 1) {
                         pr_info("cgroup: fork rejected by pids controller in ");
-                       pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id));
+                       pr_cont_cgroup_path(css->cgroup);
                         pr_cont("\n");
                 }
                 cgroup_file_notify(&pids->events_file);
diff --git a/kernel/cpu.c b/kernel/cpu.c

index f7c063239fa5c74636922743ddb094052b9044c9..37b223e4fc05b74fc50aa51df0c307d65da026c3 100644 (file)
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1335,26 +1335,21 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
         struct cpuhp_step *sp;
         int ret = 0;
  
-       mutex_lock(&cpuhp_state_mutex);
-
         if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) {
                 ret = cpuhp_reserve_state(state);
                 if (ret < 0)
-                       goto out;
+                       return ret;
                 state = ret;
         }
         sp = cpuhp_get_step(state);
-       if (name && sp->name) {
-               ret = -EBUSY;
-               goto out;
-       }
+       if (name && sp->name)
+               return -EBUSY;
+
         sp->startup.single = startup;
         sp->teardown.single = teardown;
         sp->name = name;
         sp->multi_instance = multi_instance;
         INIT_HLIST_HEAD(&sp->list);
-out:
-       mutex_unlock(&cpuhp_state_mutex);
         return ret;
  }
  
@@ -1428,6 +1423,7 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
                 return -EINVAL;
  
         get_online_cpus();
+       mutex_lock(&cpuhp_state_mutex);
  
         if (!invoke || !sp->startup.multi)
                 goto add_node;
@@ -1447,16 +1443,14 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
                 if (ret) {
                         if (sp->teardown.multi)
                                 cpuhp_rollback_install(cpu, state, node);
-                       goto err;
+                       goto unlock;
                 }
         }
  add_node:
         ret = 0;
-       mutex_lock(&cpuhp_state_mutex);
         hlist_add_head(node, &sp->list);
+unlock:
         mutex_unlock(&cpuhp_state_mutex);
-
-err:
         put_online_cpus();
         return ret;
  }
@@ -1491,6 +1485,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
                 return -EINVAL;
  
         get_online_cpus();
+       mutex_lock(&cpuhp_state_mutex);
  
         ret = cpuhp_store_callbacks(state, name, startup, teardown,
                                     multi_instance);
@@ -1524,6 +1519,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
                 }
         }
  out:
+       mutex_unlock(&cpuhp_state_mutex);
         put_online_cpus();
         /*
          * If the requested state is CPUHP_AP_ONLINE_DYN, return the
@@ -1547,6 +1543,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
                 return -EINVAL;
  
         get_online_cpus();
+       mutex_lock(&cpuhp_state_mutex);
+
         if (!invoke || !cpuhp_get_teardown_cb(state))
                 goto remove;
         /*
@@ -1563,7 +1561,6 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
         }
  
  remove:
-       mutex_lock(&cpuhp_state_mutex);
         hlist_del(node);
         mutex_unlock(&cpuhp_state_mutex);
         put_online_cpus();
@@ -1571,6 +1568,7 @@ remove:
         return 0;
  }
  EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
+
  /**
   * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
   * @state:     The state to remove
@@ -1589,6 +1587,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
  
         get_online_cpus();
  
+       mutex_lock(&cpuhp_state_mutex);
         if (sp->multi_instance) {
                 WARN(!hlist_empty(&sp->list),
                      "Error: Removing state %d which has instances left.\n",
@@ -1613,6 +1612,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
         }
  remove:
         cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
+       mutex_unlock(&cpuhp_state_mutex);
         put_online_cpus();
  }
  EXPORT_SYMBOL(__cpuhp_remove_state);
diff --git a/kernel/events/core.c b/kernel/events/core.c

index 6f41548f2e320a98182f4fe4b10700bcab7e6b86..ff01cba86f430fd29916ab73c755698bf81feff0 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -998,7 +998,7 @@ list_update_cgroup_event(struct perf_event *event,
   */
  #define PERF_CPU_HRTIMER (1000 / HZ)
  /*
- * function must be called with interrupts disbled
+ * function must be called with interrupts disabled
   */
  static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
  {
@@ -4256,7 +4256,7 @@ int perf_event_release_kernel(struct perf_event *event)
  
         raw_spin_lock_irq(&ctx->lock);
         /*
-        * Mark this even as STATE_DEAD, there is no external reference to it
+        * Mark this event as STATE_DEAD, there is no external reference to it
          * anymore.
          *
          * Anybody acquiring event->child_mutex after the below loop _must_
@@ -10417,21 +10417,22 @@ void perf_event_free_task(struct task_struct *task)
                         continue;
  
                 mutex_lock(&ctx->mutex);
-again:
-               list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
-                               group_entry)
-                       perf_free_event(event, ctx);
+               raw_spin_lock_irq(&ctx->lock);
+               /*
+                * Destroy the task <-> ctx relation and mark the context dead.
+                *
+                * This is important because even though the task hasn't been
+                * exposed yet the context has been (through child_list).
+                */
+               RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL);
+               WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
+               put_task_struct(task); /* cannot be last */
+               raw_spin_unlock_irq(&ctx->lock);
  
-               list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
-                               group_entry)
+               list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
                         perf_free_event(event, ctx);
  
-               if (!list_empty(&ctx->pinned_groups) ||
-                               !list_empty(&ctx->flexible_groups))
-                       goto again;
-
                 mutex_unlock(&ctx->mutex);
-
                 put_ctx(ctx);
         }
  }
@@ -10469,7 +10470,12 @@ const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
  }
  
  /*
- * inherit a event from parent task to child task:
+ * Inherit a event from parent task to child task.
+ *
+ * Returns:
+ *  - valid pointer on success
+ *  - NULL for orphaned events
+ *  - IS_ERR() on error
   */
  static struct perf_event *
  inherit_event(struct perf_event *parent_event,
@@ -10563,6 +10569,16 @@ inherit_event(struct perf_event *parent_event,
         return child_event;
  }
  
+/*
+ * Inherits an event group.
+ *
+ * This will quietly suppress orphaned events; !inherit_event() is not an error.
+ * This matches with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ *  - 0 on success
+ *  - <0 on error
+ */
  static int inherit_group(struct perf_event *parent_event,
               struct task_struct *parent,
               struct perf_event_context *parent_ctx,
@@ -10577,6 +10593,11 @@ static int inherit_group(struct perf_event *parent_event,
                                  child, NULL, child_ctx);
         if (IS_ERR(leader))
                 return PTR_ERR(leader);
+       /*
+        * @leader can be NULL here because of is_orphaned_event(). In this
+        * case inherit_event() will create individual events, similar to what
+        * perf_group_detach() would do anyway.
+        */
         list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
                 child_ctr = inherit_event(sub, parent, parent_ctx,
                                             child, leader, child_ctx);
@@ -10586,6 +10607,17 @@ static int inherit_group(struct perf_event *parent_event,
         return 0;
  }
  
+/*
+ * Creates the child task context and tries to inherit the event-group.
+ *
+ * Clears @inherited_all on !attr.inherited or error. Note that we'll leave
+ * inherited_all set when we 'fail' to inherit an orphaned event; this is
+ * consistent with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ *  - 0 on success
+ *  - <0 on error
+ */
  static int
  inherit_task_group(struct perf_event *event, struct task_struct *parent,
                    struct perf_event_context *parent_ctx,
@@ -10608,7 +10640,6 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
                  * First allocate and initialize a context for the
                  * child.
                  */
-
                 child_ctx = alloc_perf_context(parent_ctx->pmu, child);
                 if (!child_ctx)
                         return -ENOMEM;
@@ -10670,7 +10701,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
                 ret = inherit_task_group(event, parent, parent_ctx,
                                          child, ctxn, &inherited_all);
                 if (ret)
-                       break;
+                       goto out_unlock;
         }
  
         /*
@@ -10686,7 +10717,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
                 ret = inherit_task_group(event, parent, parent_ctx,
                                          child, ctxn, &inherited_all);
                 if (ret)
-                       break;
+                       goto out_unlock;
         }
  
         raw_spin_lock_irqsave(&parent_ctx->lock, flags);
@@ -10714,6 +10745,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
         }
  
         raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+out_unlock:
         mutex_unlock(&parent_ctx->mutex);
  
         perf_unpin_context(parent_ctx);
diff --git a/kernel/exit.c b/kernel/exit.c

index e126ebf2400c221adfb8a73508d883ec9accd63d..516acdb0e0ec9bd48e3006a8ede165437b3e121f 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -554,7 +554,6 @@ static void exit_mm(void)
         enter_lazy_tlb(mm, current);
         task_unlock(current);
         mm_update_next_owner(mm);
-       userfaultfd_exit(mm);
         mmput(mm);
         if (test_thread_flag(TIF_MEMDIE))
                 exit_oom_victim();
diff --git a/kernel/futex.c b/kernel/futex.c

index 229a744b1781be2e4fccc1b5c290bd246d8b8694..45858ec739411f5741667e560552757697441e6b 100644 (file)
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2815,7 +2815,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  {
         struct hrtimer_sleeper timeout, *to = NULL;
         struct rt_mutex_waiter rt_waiter;
-       struct rt_mutex *pi_mutex = NULL;
         struct futex_hash_bucket *hb;
         union futex_key key2 = FUTEX_KEY_INIT;
         struct futex_q q = futex_q_init;
@@ -2899,6 +2898,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                 if (q.pi_state && (q.pi_state->owner != current)) {
                         spin_lock(q.lock_ptr);
                         ret = fixup_pi_state_owner(uaddr2, &q, current);
+                       if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current)
+                               rt_mutex_unlock(&q.pi_state->pi_mutex);
                         /*
                          * Drop the reference to the pi state which
                          * the requeue_pi() code acquired for us.
@@ -2907,6 +2908,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                         spin_unlock(q.lock_ptr);
                 }
         } else {
+               struct rt_mutex *pi_mutex;
+
                 /*
                  * We have been woken up by futex_unlock_pi(), a timeout, or a
                  * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
@@ -2930,18 +2933,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                 if (res)
                         ret = (res < 0) ? res : 0;
  
+               /*
+                * If fixup_pi_state_owner() faulted and was unable to handle
+                * the fault, unlock the rt_mutex and return the fault to
+                * userspace.
+                */
+               if (ret && rt_mutex_owner(pi_mutex) == current)
+                       rt_mutex_unlock(pi_mutex);
+
                 /* Unqueue and drop the lock. */
                 unqueue_me_pi(&q);
         }
  
-       /*
-        * If fixup_pi_state_owner() faulted and was unable to handle the
-        * fault, unlock the rt_mutex and return the fault to userspace.
-        */
-       if (ret == -EFAULT) {
-               if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
-                       rt_mutex_unlock(pi_mutex);
-       } else if (ret == -EINTR) {
+       if (ret == -EINTR) {
                 /*
                  * We've already been requeued, but cannot restart by calling
                  * futex_lock_pi() directly. We could restart this syscall, but
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c

index 4544b115f5eb85d4b01ec966f933f191cd2cae1e..e2d356dd75812df8e42dfac3feb132edd5612e33 100644 (file)
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -59,7 +59,7 @@ static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
  struct cpumask *
  irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
  {
-       int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec;
+       int n, nodes, cpus_per_vec, extra_vecs, curvec;
         int affv = nvecs - affd->pre_vectors - affd->post_vectors;
         int last_affv = affv + affd->pre_vectors;
         nodemask_t nodemsk = NODE_MASK_NONE;
@@ -94,19 +94,21 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
                 goto done;
         }
  
-       /* Spread the vectors per node */
-       vecs_per_node = affv / nodes;
-       /* Account for rounding errors */
-       extra_vecs = affv - (nodes * vecs_per_node);
-
         for_each_node_mask(n, nodemsk) {
-               int ncpus, v, vecs_to_assign = vecs_per_node;
+               int ncpus, v, vecs_to_assign, vecs_per_node;
+
+               /* Spread the vectors per node */
+               vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;
  
                 /* Get the cpus on this node which are in the mask */
                 cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
  
                 /* Calculate the number of cpus per vector */
                 ncpus = cpumask_weight(nmsk);
+               vecs_to_assign = min(vecs_per_node, ncpus);
+
+               /* Account for rounding errors */
+               extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);
  
                 for (v = 0; curvec < last_affv && v < vecs_to_assign;
                      curvec++, v++) {
@@ -115,14 +117,14 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
                         /* Account for extra vectors to compensate rounding errors */
                         if (extra_vecs) {
                                 cpus_per_vec++;
-                               if (!--extra_vecs)
-                                       vecs_per_node++;
+                               --extra_vecs;
                         }
                         irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
                 }
  
                 if (curvec >= last_affv)
                         break;
+               --nodes;
         }
  
  done:
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c

index b56a558e406db6375bea4b07e5873a4c6f0b401e..b118735fea9da471a15ba627c87af523b891bafa 100644 (file)
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -614,13 +614,13 @@ static int kexec_calculate_store_digests(struct kimage *image)
                 ret = crypto_shash_final(desc, digest);
                 if (ret)
                         goto out_free_digest;
-               ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
-                                               sha_regions, sha_region_sz, 0);
+               ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions",
+                                                    sha_regions, sha_region_sz, 0);
                 if (ret)
                         goto out_free_digest;
  
-               ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
-                                               digest, SHA256_DIGEST_SIZE, 0);
+               ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest",
+                                                    digest, SHA256_DIGEST_SIZE, 0);
                 if (ret)
                         goto out_free_digest;
         }
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h

index 4cef7e4706b098d7918b53ff1e1b931d1a5ec8dc..799a8a4521870a6444818fef64c0ae1e2dfad671 100644 (file)
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -15,11 +15,7 @@ int kimage_is_destination_range(struct kimage *image,
  extern struct mutex kexec_mutex;
  
  #ifdef CONFIG_KEXEC_FILE
-struct kexec_sha_region {
-       unsigned long start;
-       unsigned long len;
-};
-
+#include <linux/purgatory.h>
  void kimage_file_post_load_cleanup(struct kimage *image);
  #else /* CONFIG_KEXEC_FILE */
  static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
diff --git a/kernel/kthread.c b/kernel/kthread.c

index 2f26adea0f84d21f4dae6d1ffdbbf94a73f40c67..26db528c1d881bf371ea5b53b7ade0815c990bf1 100644 (file)
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -20,6 +20,7 @@
  #include <linux/freezer.h>
  #include <linux/ptrace.h>
  #include <linux/uaccess.h>
+#include <linux/cgroup.h>
  #include <trace/events/sched.h>
  
  static DEFINE_SPINLOCK(kthread_create_lock);
@@ -225,6 +226,7 @@ static int kthread(void *_create)
  
         ret = -EINTR;
         if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+               cgroup_kthread_ready();
                 __kthread_parkme(self);
                 ret = threadfn(data);
         }
@@ -538,6 +540,7 @@ int kthreadd(void *unused)
         set_mems_allowed(node_states[N_MEMORY]);
  
         current->flags |= PF_NOFREEZE;
+       cgroup_init_kthreadd();
  
         for (;;) {
                 set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c

index 12e38c213b70111c1b343af3b1b0bb4eaa99f829..a95e5d1f4a9c447de6aa4b0b1b85e5f56de9f729 100644 (file)
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3262,10 +3262,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
         if (depth) {
                 hlock = curr->held_locks + depth - 1;
                 if (hlock->class_idx == class_idx && nest_lock) {
-                       if (hlock->references)
+                       if (hlock->references) {
+                               /*
+                                * Check: unsigned int references:12, overflow.
+                                */
+                               if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1))
+                                       return 0;
+
                                 hlock->references++;
-                       else
+                       } else {
                                 hlock->references = 2;
+                       }
  
                         return 1;
                 }
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h

index c2b88490d857583026a35090b62f7891446b7ba2..c08fbd2f5ba9fa2a806f326a3a85f5d021d74027 100644 (file)
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -46,13 +46,13 @@ enum {
                 (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
  
  /*
- * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text,
+ * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,
   * .data and .bss to fit in required 32MB limit for the kernel. With
- * PROVE_LOCKING we could go over this limit and cause system boot-up problems.
+ * CONFIG_LOCKDEP we could go over this limit and cause system boot-up problems.
   * So, reduce the static allocations for lockdeps related structures so that
   * everything fits in current required size limit.
   */
-#ifdef CONFIG_PROVE_LOCKING_SMALL
+#ifdef CONFIG_LOCKDEP_SMALL
  /*
   * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
   * we track.
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c

index 7bc24d477805d868b932aab7acc6997120931fc5..c65f7989f850d12508045896a2cb98d5b691c068 100644 (file)
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -213,10 +213,9 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
                  */
                 if (sem->count == 0)
                         break;
-               if (signal_pending_state(state, current)) {
-                       ret = -EINTR;
-                       goto out;
-               }
+               if (signal_pending_state(state, current))
+                       goto out_nolock;
+
                 set_current_state(state);
                 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
                 schedule();
@@ -224,12 +223,19 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
         }
         /* got the lock */
         sem->count = -1;
-out:
         list_del(&waiter.list);
  
         raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
  
         return ret;
+
+out_nolock:
+       list_del(&waiter.list);
+       if (!list_empty(&sem->wait_list))
+               __rwsem_do_wake(sem, 1);
+       raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+       return -EINTR;
  }
  
  void __sched __down_write(struct rw_semaphore *sem)
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c

index da6c9a34f62f5c74f17eada78d6bb843c8fbf01d..6b7abb334ca6027dd2b189a211671fa98a33be82 100644 (file)
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -50,7 +50,7 @@ static void test_mutex_work(struct work_struct *work)
  
         if (mtx->flags & TEST_MTX_TRY) {
                 while (!ww_mutex_trylock(&mtx->mutex))
-                       cpu_relax();
+                       cond_resched();
         } else {
                 ww_mutex_lock(&mtx->mutex, NULL);
         }
@@ -88,7 +88,7 @@ static int __test_mutex(unsigned int flags)
                                 ret = -EINVAL;
                                 break;
                         }
-                       cpu_relax();
+                       cond_resched();
                 } while (time_before(jiffies, timeout));
         } else {
                 ret = wait_for_completion_timeout(&mtx.done, TIMEOUT);
@@ -627,7 +627,7 @@ static int __init test_ww_mutex_init(void)
         if (ret)
                 return ret;
  
-       ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL);
+       ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL);
         if (ret)
                 return ret;
  
diff --git a/kernel/memremap.c b/kernel/memremap.c

index 06123234f1189c86ee42dffdc2d14873b6b16895..07e85e5229da849d33391f97234c1e1fff2c5ce1 100644 (file)
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -247,11 +247,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
         align_start = res->start & ~(SECTION_SIZE - 1);
         align_size = ALIGN(resource_size(res), SECTION_SIZE);
  
-       lock_device_hotplug();
         mem_hotplug_begin();
         arch_remove_memory(align_start, align_size);
         mem_hotplug_done();
-       unlock_device_hotplug();
  
         untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
         pgmap_radix_release(res);
@@ -364,11 +362,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
         if (error)
                 goto err_pfn_remap;
  
-       lock_device_hotplug();
         mem_hotplug_begin();
         error = arch_add_memory(nid, align_start, align_size, true);
         mem_hotplug_done();
-       unlock_device_hotplug();
         if (error)
                 goto err_add_memory;
  
diff --git a/kernel/padata.c b/kernel/padata.c

index 05316c9f32da9d0e20b3d3c92eeaf3eb49f1deef..3202aa17492c808af5331044de710a2f34e277a3 100644 (file)
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -186,19 +186,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
  
         reorder = &next_queue->reorder;
  
+       spin_lock(&reorder->lock);
         if (!list_empty(&reorder->list)) {
                 padata = list_entry(reorder->list.next,
                                     struct padata_priv, list);
  
-               spin_lock(&reorder->lock);
                 list_del_init(&padata->list);
                 atomic_dec(&pd->reorder_objects);
-               spin_unlock(&reorder->lock);
  
                 pd->processed++;
  
+               spin_unlock(&reorder->lock);
                 goto out;
         }
+       spin_unlock(&reorder->lock);
  
         if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
                 padata = ERR_PTR(-ENODATA);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c

index 0af9287121746d1b198429d52a99e862c4f1a8f0..266ddcc1d8bbbc6af7bceda3657618beef2a9c59 100644 (file)
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -184,11 +184,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
  
         WARN_ON(!task->ptrace || task->parent != current);
  
+       /*
+        * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely.
+        * Recheck state under the lock to close this race.
+        */
         spin_lock_irq(&task->sighand->siglock);
-       if (__fatal_signal_pending(task))
-               wake_up_state(task, __TASK_TRACED);
-       else
-               task->state = TASK_TRACED;
+       if (task->state == __TASK_TRACED) {
+               if (__fatal_signal_pending(task))
+                       wake_up_state(task, __TASK_TRACED);
+               else
+                       task->state = TASK_TRACED;
+       }
         spin_unlock_irq(&task->sighand->siglock);
  }
  
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c

index a08795e216283f1292f82720bb1ff758e6a2629c..00a45c45beca09829ad479aad9ba299f5498a42e 100644 (file)
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable);
  static int __sched_clock_stable_early = 1;
  
  /*
- * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset
+ * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset
   */
-static __read_mostly u64 raw_offset;
-static __read_mostly u64 gtod_offset;
+__read_mostly u64 __sched_clock_offset;
+static __read_mostly u64 __gtod_offset;
  
  struct sched_clock_data {
         u64                     tick_raw;
@@ -131,17 +131,24 @@ static void __set_sched_clock_stable(void)
         /*
          * Attempt to make the (initial) unstable->stable transition continuous.
          */
-       raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw);
+       __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
  
         printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
-                       scd->tick_gtod, gtod_offset,
-                       scd->tick_raw,  raw_offset);
+                       scd->tick_gtod, __gtod_offset,
+                       scd->tick_raw,  __sched_clock_offset);
  
         static_branch_enable(&__sched_clock_stable);
         tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
  }
  
-static void __clear_sched_clock_stable(struct work_struct *work)
+static void __sched_clock_work(struct work_struct *work)
+{
+       static_branch_disable(&__sched_clock_stable);
+}
+
+static DECLARE_WORK(sched_clock_work, __sched_clock_work);
+
+static void __clear_sched_clock_stable(void)
  {
         struct sched_clock_data *scd = this_scd();
  
@@ -154,17 +161,17 @@ static void __clear_sched_clock_stable(struct work_struct *work)
          *
          * Still do what we can.
          */
-       gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod);
+       __gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod);
  
         printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
-                       scd->tick_gtod, gtod_offset,
-                       scd->tick_raw,  raw_offset);
+                       scd->tick_gtod, __gtod_offset,
+                       scd->tick_raw,  __sched_clock_offset);
  
-       static_branch_disable(&__sched_clock_stable);
         tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
-}
  
-static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
+       if (sched_clock_stable())
+               schedule_work(&sched_clock_work);
+}
  
  void clear_sched_clock_stable(void)
  {
@@ -173,7 +180,7 @@ void clear_sched_clock_stable(void)
         smp_mb(); /* matches sched_clock_init_late() */
  
         if (sched_clock_running == 2)
-               schedule_work(&sched_clock_work);
+               __clear_sched_clock_stable();
  }
  
  void sched_clock_init_late(void)
@@ -214,7 +221,7 @@ static inline u64 wrap_max(u64 x, u64 y)
   */
  static u64 sched_clock_local(struct sched_clock_data *scd)
  {
-       u64 now, clock, old_clock, min_clock, max_clock;
+       u64 now, clock, old_clock, min_clock, max_clock, gtod;
         s64 delta;
  
  again:
@@ -231,9 +238,10 @@ again:
          *                    scd->tick_gtod + TICK_NSEC);
          */
  
-       clock = scd->tick_gtod + gtod_offset + delta;
-       min_clock = wrap_max(scd->tick_gtod, old_clock);
-       max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
+       gtod = scd->tick_gtod + __gtod_offset;
+       clock = gtod + delta;
+       min_clock = wrap_max(gtod, old_clock);
+       max_clock = wrap_max(old_clock, gtod + TICK_NSEC);
  
         clock = wrap_max(clock, min_clock);
         clock = wrap_min(clock, max_clock);
@@ -317,7 +325,7 @@ u64 sched_clock_cpu(int cpu)
         u64 clock;
  
         if (sched_clock_stable())
-               return sched_clock() + raw_offset;
+               return sched_clock() + __sched_clock_offset;
  
         if (unlikely(!sched_clock_running))
                 return 0ull;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 956383844116ab456f8552abd4b5dcc00e09f347..3b31fc05a0f1e45be5985b860a5fde95ee969832 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3287,10 +3287,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
         struct task_struct *p;
  
         /*
-        * Optimization: we know that if all tasks are in
-        * the fair class we can call that function directly:
+        * Optimization: we know that if all tasks are in the fair class we can
+        * call that function directly, but only if the @prev task wasn't of a
+        * higher scheduling class, because otherwise those loose the
+        * opportunity to pull in more work from other CPUs.
          */
-       if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
+       if (likely((prev->sched_class == &idle_sched_class ||
+                   prev->sched_class == &fair_sched_class) &&
+                  rq->nr_running == rq->cfs.h_nr_running)) {
+
                 p = fair_sched_class.pick_next_task(rq, prev, rf);
                 if (unlikely(p == RETRY_TASK))
                         goto again;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

index 8f8de3d4d6b7a3c71358ac1c6660f2e645b98477..54c577578da6899160cf4a611e87a386a2fd7db2 100644 (file)
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -36,6 +36,7 @@ struct sugov_policy {
         u64 last_freq_update_time;
         s64 freq_update_delay_ns;
         unsigned int next_freq;
+       unsigned int cached_raw_freq;
  
         /* The next fields are only needed if fast switch cannot be used. */
         struct irq_work irq_work;
@@ -52,7 +53,6 @@ struct sugov_cpu {
         struct update_util_data update_util;
         struct sugov_policy *sg_policy;
  
-       unsigned int cached_raw_freq;
         unsigned long iowait_boost;
         unsigned long iowait_boost_max;
         u64 last_update;
@@ -116,7 +116,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
  
  /**
   * get_next_freq - Compute a new frequency for a given cpufreq policy.
- * @sg_cpu: schedutil cpu object to compute the new frequency for.
+ * @sg_policy: schedutil policy object to compute the new frequency for.
   * @util: Current CPU utilization.
   * @max: CPU capacity.
   *
@@ -136,19 +136,18 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
   * next_freq (as calculated above) is returned, subject to policy min/max and
   * cpufreq driver limitations.
   */
-static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
-                                 unsigned long max)
+static unsigned int get_next_freq(struct sugov_policy *sg_policy,
+                                 unsigned long util, unsigned long max)
  {
-       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
         struct cpufreq_policy *policy = sg_policy->policy;
         unsigned int freq = arch_scale_freq_invariant() ?
                                 policy->cpuinfo.max_freq : policy->cur;
  
         freq = (freq + (freq >> 2)) * util / max;
  
-       if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
+       if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
                 return sg_policy->next_freq;
-       sg_cpu->cached_raw_freq = freq;
+       sg_policy->cached_raw_freq = freq;
         return cpufreq_driver_resolve_freq(policy, freq);
  }
  
@@ -213,7 +212,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
         } else {
                 sugov_get_util(&util, &max);
                 sugov_iowait_boost(sg_cpu, &util, &max);
-               next_f = get_next_freq(sg_cpu, util, max);
+               next_f = get_next_freq(sg_policy, util, max);
         }
         sugov_update_commit(sg_policy, time, next_f);
  }
@@ -267,7 +266,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
                 sugov_iowait_boost(j_sg_cpu, &util, &max);
         }
  
-       return get_next_freq(sg_cpu, util, max);
+       return get_next_freq(sg_policy, util, max);
  }
  
  static void sugov_update_shared(struct update_util_data *hook, u64 time,
@@ -580,25 +579,19 @@ static int sugov_start(struct cpufreq_policy *policy)
         sg_policy->next_freq = UINT_MAX;
         sg_policy->work_in_progress = false;
         sg_policy->need_freq_update = false;
+       sg_policy->cached_raw_freq = 0;
  
         for_each_cpu(cpu, policy->cpus) {
                 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
  
+               memset(sg_cpu, 0, sizeof(*sg_cpu));
                 sg_cpu->sg_policy = sg_policy;
-               if (policy_is_shared(policy)) {
-                       sg_cpu->util = 0;
-                       sg_cpu->max = 0;
-                       sg_cpu->flags = SCHED_CPUFREQ_RT;
-                       sg_cpu->last_update = 0;
-                       sg_cpu->cached_raw_freq = 0;
-                       sg_cpu->iowait_boost = 0;
-                       sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
-                       cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
-                                                    sugov_update_shared);
-               } else {
-                       cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
-                                                    sugov_update_single);
-               }
+               sg_cpu->flags = SCHED_CPUFREQ_RT;
+               sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+               cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
+                                            policy_is_shared(policy) ?
+                                                       sugov_update_shared :
+                                                       sugov_update_single);
         }
         return 0;
  }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c

index f3778e2b46c8dc00c90d9165f6ae9efbf1f16dc7..aea3135c5d90f434ee72980c30f0db1129ef752b 100644 (file)
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -34,6 +34,18 @@ void disable_sched_clock_irqtime(void)
         sched_clock_irqtime = 0;
  }
  
+static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
+                                 enum cpu_usage_stat idx)
+{
+       u64 *cpustat = kcpustat_this_cpu->cpustat;
+
+       u64_stats_update_begin(&irqtime->sync);
+       cpustat[idx] += delta;
+       irqtime->total += delta;
+       irqtime->tick_delta += delta;
+       u64_stats_update_end(&irqtime->sync);
+}
+
  /*
   * Called before incrementing preempt_count on {soft,}irq_enter
   * and before decrementing preempt_count on {soft,}irq_exit.
@@ -41,7 +53,6 @@ void disable_sched_clock_irqtime(void)
  void irqtime_account_irq(struct task_struct *curr)
  {
         struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
-       u64 *cpustat = kcpustat_this_cpu->cpustat;
         s64 delta;
         int cpu;
  
@@ -52,22 +63,16 @@ void irqtime_account_irq(struct task_struct *curr)
         delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
         irqtime->irq_start_time += delta;
  
-       u64_stats_update_begin(&irqtime->sync);
         /*
          * We do not account for softirq time from ksoftirqd here.
          * We want to continue accounting softirq time to ksoftirqd thread
          * in that case, so as not to confuse scheduler with a special task
          * that do not consume any time, but still wants to run.
          */
-       if (hardirq_count()) {
-               cpustat[CPUTIME_IRQ] += delta;
-               irqtime->tick_delta += delta;
-       } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) {
-               cpustat[CPUTIME_SOFTIRQ] += delta;
-               irqtime->tick_delta += delta;
-       }
-
-       u64_stats_update_end(&irqtime->sync);
+       if (hardirq_count())
+               irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
+       else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
+               irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
  }
  EXPORT_SYMBOL_GPL(irqtime_account_irq);
  
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c

index 99b2c33a9fbcb4411fd7b75d6dbaff36bf07f803..a2ce59015642c3ccc753006837a9485b2d9fbcd3 100644 (file)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
   *
   * This function returns true if:
   *
- *   runtime / (deadline - t) > dl_runtime / dl_period ,
+ *   runtime / (deadline - t) > dl_runtime / dl_deadline ,
   *
   * IOW we can't recycle current parameters.
   *
- * Notice that the bandwidth check is done against the period. For
+ * Notice that the bandwidth check is done against the deadline. For
   * task with deadline equal to period this is the same of using
- * dl_deadline instead of dl_period in the equation above.
+ * dl_period instead of dl_deadline in the equation above.
   */
  static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
                                struct sched_dl_entity *pi_se, u64 t)
@@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
          * of anything below microseconds resolution is actually fiction
          * (but still we want to give the user that illusion >;).
          */
-       left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
+       left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
         right = ((dl_se->deadline - t) >> DL_SCALE) *
                 (pi_se->dl_runtime >> DL_SCALE);
  
@@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
         }
  }
  
+static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
+{
+       return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
+}
+
  /*
   * If the entity depleted all its runtime, and if we want it to sleep
   * while waiting for some new execution time to become available, we
- * set the bandwidth enforcement timer to the replenishment instant
+ * set the bandwidth replenishment timer to the replenishment instant
   * and try to activate it.
   *
   * Notice that it is important for the caller to know if the timer
@@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p)
          * that it is actually coming from rq->clock and not from
          * hrtimer's time base reading.
          */
-       act = ns_to_ktime(dl_se->deadline);
+       act = ns_to_ktime(dl_next_period(dl_se));
         now = hrtimer_cb_get_time(timer);
         delta = ktime_to_ns(now) - rq_clock(rq);
         act = ktime_add_ns(act, delta);
@@ -638,6 +643,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
                 lockdep_unpin_lock(&rq->lock, rf.cookie);
                 rq = dl_task_offline_migration(rq, p);
                 rf.cookie = lockdep_pin_lock(&rq->lock);
+               update_rq_clock(rq);
  
                 /*
                  * Now that the task has been migrated to the new RQ and we
@@ -689,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
         timer->function = dl_task_timer;
  }
  
+/*
+ * During the activation, CBS checks if it can reuse the current task's
+ * runtime and period. If the deadline of the task is in the past, CBS
+ * cannot use the runtime, and so it replenishes the task. This rule
+ * works fine for implicit deadline tasks (deadline == period), and the
+ * CBS was designed for implicit deadline tasks. However, a task with
+ * constrained deadline (deadine < period) might be awakened after the
+ * deadline, but before the next period. In this case, replenishing the
+ * task would allow it to run for runtime / deadline. As in this case
+ * deadline < period, CBS enables a task to run for more than the
+ * runtime / period. In a very loaded system, this can cause a domino
+ * effect, making other tasks miss their deadlines.
+ *
+ * To avoid this problem, in the activation of a constrained deadline
+ * task after the deadline but before the next period, throttle the
+ * task and set the replenishing timer to the begin of the next period,
+ * unless it is boosted.
+ */
+static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
+{
+       struct task_struct *p = dl_task_of(dl_se);
+       struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
+
+       if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
+           dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
+               if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
+                       return;
+               dl_se->dl_throttled = 1;
+       }
+}
+
  static
  int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
  {
@@ -922,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
         __dequeue_dl_entity(dl_se);
  }
  
+static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
+{
+       return dl_se->dl_deadline < dl_se->dl_period;
+}
+
  static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
  {
         struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -947,6 +989,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
                 return;
         }
  
+       /*
+        * Check if a constrained deadline task was activated
+        * after the deadline but before the next period.
+        * If that is the case, the task will be throttled and
+        * the replenishment timer will be set to the next period.
+        */
+       if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
+               dl_check_constrained_dl(&p->dl);
+
         /*
          * If p is throttled, we do nothing. In fact, if it exhausted
          * its budget it needs a replenishment and, since it now is on
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 3e88b35ac1571cd2dc1719378902ca75c08cfbb9..dea138964b9107b3e22542a8b80f5cf1d43c1dee 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5799,7 +5799,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
          * Due to large variance we need a large fuzz factor; hackbench in
          * particularly is sensitive here.
          */
-       if ((avg_idle / 512) < avg_cost)
+       if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost)
                 return -1;
  
         time = local_clock();
diff --git a/kernel/sched/features.h b/kernel/sched/features.h

index 69631fa46c2f84fecd3e15599cba0e5935c1148e..1b3c8189b28656d2644a714ff60ceab7d015d97b 100644 (file)
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
   */
  SCHED_FEAT(TTWU_QUEUE, true)
  
+/*
+ * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
+ */
+SCHED_FEAT(SIS_AVG_CPU, false)
+
  #ifdef HAVE_RT_PUSH_IPI
  /*
   * In order to avoid a thundering herd attack of CPUs that are
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c

index 7296b7308ecaebb6cca949e1a9e2d4361750f7c6..f15fb2bdbc0dee60d770da951424f8cf0635f5f6 100644 (file)
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -169,7 +169,7 @@ static inline int calc_load_write_idx(void)
          * If the folding window started, make sure we start writing in the
          * next idle-delta.
          */
-       if (!time_before(jiffies, calc_load_update))
+       if (!time_before(jiffies, READ_ONCE(calc_load_update)))
                 idx++;
  
         return idx & 1;
@@ -202,8 +202,9 @@ void calc_load_exit_idle(void)
         struct rq *this_rq = this_rq();
  
         /*
-        * If we're still before the sample window, we're done.
+        * If we're still before the pending sample window, we're done.
          */
+       this_rq->calc_load_update = READ_ONCE(calc_load_update);
         if (time_before(jiffies, this_rq->calc_load_update))
                 return;
  
@@ -212,7 +213,6 @@ void calc_load_exit_idle(void)
          * accounted through the nohz accounting, so skip the entire deal and
          * sync up for the next window.
          */
-       this_rq->calc_load_update = calc_load_update;
         if (time_before(jiffies, this_rq->calc_load_update + 10))
                 this_rq->calc_load_update += LOAD_FREQ;
  }
@@ -308,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp,
   */
  static void calc_global_nohz(void)
  {
+       unsigned long sample_window;
         long delta, active, n;
  
-       if (!time_before(jiffies, calc_load_update + 10)) {
+       sample_window = READ_ONCE(calc_load_update);
+       if (!time_before(jiffies, sample_window + 10)) {
                 /*
                  * Catch-up, fold however many we are behind still
                  */
-               delta = jiffies - calc_load_update - 10;
+               delta = jiffies - sample_window - 10;
                 n = 1 + (delta / LOAD_FREQ);
  
                 active = atomic_long_read(&calc_load_tasks);
@@ -324,7 +326,7 @@ static void calc_global_nohz(void)
                 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
                 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
  
-               calc_load_update += n * LOAD_FREQ;
+               WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ);
         }
  
         /*
@@ -352,9 +354,11 @@ static inline void calc_global_nohz(void) { }
   */
  void calc_global_load(unsigned long ticks)
  {
+       unsigned long sample_window;
         long active, delta;
  
-       if (time_before(jiffies, calc_load_update + 10))
+       sample_window = READ_ONCE(calc_load_update);
+       if (time_before(jiffies, sample_window + 10))
                 return;
  
         /*
@@ -371,7 +375,7 @@ void calc_global_load(unsigned long ticks)
         avenrun[1] = calc_load(avenrun[1], EXP_5, active);
         avenrun[2] = calc_load(avenrun[2], EXP_15, active);
  
-       calc_load_update += LOAD_FREQ;
+       WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
  
         /*
          * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 5cbf92214ad89287d111ab8300e5b55923d83ffe..767aab3505a81d14789382686f9c4ccba095cfaa 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1869,6 +1869,7 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { }
  
  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  struct irqtime {
+       u64                     total;
         u64                     tick_delta;
         u64                     irq_start_time;
         struct u64_stats_sync   sync;
@@ -1876,16 +1877,20 @@ struct irqtime {
  
  DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
  
+/*
+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
+ * and never move forward.
+ */
  static inline u64 irq_time_read(int cpu)
  {
         struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-       u64 *cpustat = kcpustat_cpu(cpu).cpustat;
         unsigned int seq;
         u64 total;
  
         do {
                 seq = __u64_stats_fetch_begin(&irqtime->sync);
-               total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ];
+               total = irqtime->total;
         } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
  
         return total;
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c

index 4d2ea6f255683811f6eefb5d95fb18eb9e7c7192..b8c84c6dee64bd31ca28b4cfe7283a55945aa596 100644 (file)
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -242,6 +242,45 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
  }
  EXPORT_SYMBOL(prepare_to_wait_event);
  
+/*
+ * Note! These two wait functions are entered with the
+ * wait-queue lock held (and interrupts off in the _irq
+ * case), so there is no race with testing the wakeup
+ * condition in the caller before they add the wait
+ * entry to the wake queue.
+ */
+int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
+{
+       if (likely(list_empty(&wait->task_list)))
+               __add_wait_queue_tail(wq, wait);
+
+       set_current_state(TASK_INTERRUPTIBLE);
+       if (signal_pending(current))
+               return -ERESTARTSYS;
+
+       spin_unlock(&wq->lock);
+       schedule();
+       spin_lock(&wq->lock);
+       return 0;
+}
+EXPORT_SYMBOL(do_wait_intr);
+
+int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait)
+{
+       if (likely(list_empty(&wait->task_list)))
+               __add_wait_queue_tail(wq, wait);
+
+       set_current_state(TASK_INTERRUPTIBLE);
+       if (signal_pending(current))
+               return -ERESTARTSYS;
+
+       spin_unlock_irq(&wq->lock);
+       schedule();
+       spin_lock_irq(&wq->lock);
+       return 0;
+}
+EXPORT_SYMBOL(do_wait_intr_irq);
+
  /**
   * finish_wait - clean up after waiting in a queue
   * @q: waitqueue waited on
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index acf0a5a06da7c0c47003982e83bb74fcd4ebba5f..8c8714fcb53c35a390becf14f2fa8e1bfc20142c 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2133,9 +2133,12 @@ static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
         if (write) {
                 if (*negp)
                         return -EINVAL;
+               if (*lvalp > UINT_MAX)
+                       return -EINVAL;
                 *valp = *lvalp;
         } else {
                 unsigned int val = *valp;
+               *negp = false;
                 *lvalp = (unsigned long)val;
         }
         return 0;
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c

index 7906b3f0c41a1a5b662c703c428a892ad7816670..497719127bf9f65c1c992874abc33f0c52d5c766 100644 (file)
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -125,7 +125,7 @@ int register_refined_jiffies(long cycles_per_second)
         shift_hz += cycles_per_tick/2;
         do_div(shift_hz, cycles_per_tick);
         /* Calculate nsec_per_tick using shift_hz */
-       nsec_per_tick = (u64)TICK_NSEC << 8;
+       nsec_per_tick = (u64)NSEC_PER_SEC << 8;
         nsec_per_tick += (u32)shift_hz/2;
         do_div(nsec_per_tick, (u32)shift_hz);
  
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig

index d5038005eb5dc06dd8432cc17e5be3c2c1de7e81..d4a06e714645df56f75db97ba6bb052534a4bb41 100644 (file)
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -429,7 +429,7 @@ config BLK_DEV_IO_TRACE
  
           If unsure, say N.
  
-config KPROBE_EVENT
+config KPROBE_EVENTS
         depends on KPROBES
         depends on HAVE_REGS_AND_STACK_ACCESS_API
         bool "Enable kprobes-based dynamic events"
@@ -447,7 +447,7 @@ config KPROBE_EVENT
           This option is also required by perf-probe subcommand of perf tools.
           If you want to use perf tools, this option is strongly recommended.
  
-config UPROBE_EVENT
+config UPROBE_EVENTS
         bool "Enable uprobes-based dynamic events"
         depends on ARCH_SUPPORTS_UPROBES
         depends on MMU
@@ -466,7 +466,7 @@ config UPROBE_EVENT
  
  config BPF_EVENTS
         depends on BPF_SYSCALL
-       depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS
+       depends on (KPROBE_EVENTS || UPROBE_EVENTS) && PERF_EVENTS
         bool
         default y
         help
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile

index e5798084554911440844e1757c9ee656dc40cf12..90f2701d92a7eee98334f2b10e515b369307df2b 100644 (file)
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
  obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
  obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
  obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
-obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
+obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
  obj-$(CONFIG_TRACEPOINTS) += power-traces.o
  ifeq ($(CONFIG_PM),y)
  obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
@@ -66,7 +66,7 @@ ifeq ($(CONFIG_TRACING),y)
  obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
  endif
  obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
-obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
+obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
  
  obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
  
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c

index b2058a7f94bd8797f5629158aeed8bf045c20e4f..bd8ae8d5ae9ca865f3738c6b4df579a3a1ca12cc 100644 (file)
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -690,8 +690,8 @@ void blk_trace_shutdown(struct request_queue *q)
  
  /**
   * blk_add_trace_rq - Add a trace for a request oriented action
- * @q:         queue the io is for
   * @rq:                the source request
+ * @error:     return status to log
   * @nr_bytes:  number of completed bytes
   * @what:      the action
   *
@@ -699,10 +699,10 @@ void blk_trace_shutdown(struct request_queue *q)
   *     Records an action against a request. Will log the bio offset + size.
   *
   **/
-static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
+static void blk_add_trace_rq(struct request *rq, int error,
                              unsigned int nr_bytes, u32 what)
  {
-       struct blk_trace *bt = q->blk_trace;
+       struct blk_trace *bt = rq->q->blk_trace;
  
         if (likely(!bt))
                 return;
@@ -713,40 +713,32 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
                 what |= BLK_TC_ACT(BLK_TC_FS);
  
         __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
-                       rq->cmd_flags, what, rq->errors, 0, NULL);
-}
-
-static void blk_add_trace_rq_abort(void *ignore,
-                                  struct request_queue *q, struct request *rq)
-{
-       blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ABORT);
+                       rq->cmd_flags, what, error, 0, NULL);
  }
  
  static void blk_add_trace_rq_insert(void *ignore,
                                     struct request_queue *q, struct request *rq)
  {
-       blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_INSERT);
+       blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT);
  }
  
  static void blk_add_trace_rq_issue(void *ignore,
                                    struct request_queue *q, struct request *rq)
  {
-       blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ISSUE);
+       blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE);
  }
  
  static void blk_add_trace_rq_requeue(void *ignore,
                                      struct request_queue *q,
                                      struct request *rq)
  {
-       blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_REQUEUE);
+       blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE);
  }
  
-static void blk_add_trace_rq_complete(void *ignore,
-                                     struct request_queue *q,
-                                     struct request *rq,
-                                     unsigned int nr_bytes)
+static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
+                       int error, unsigned int nr_bytes)
  {
-       blk_add_trace_rq(q, rq, nr_bytes, BLK_TA_COMPLETE);
+       blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE);
  }
  
  /**
@@ -941,7 +933,7 @@ static void blk_add_trace_rq_remap(void *ignore,
         r.sector_from = cpu_to_be64(from);
  
         __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
-                       rq_data_dir(rq), 0, BLK_TA_REMAP, !!rq->errors,
+                       rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
                         sizeof(r), &r);
  }
  
@@ -966,7 +958,7 @@ void blk_add_driver_data(struct request_queue *q,
                 return;
  
         __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
-                               BLK_TA_DRV_DATA, rq->errors, len, data);
+                               BLK_TA_DRV_DATA, 0, len, data);
  }
  EXPORT_SYMBOL_GPL(blk_add_driver_data);
  
@@ -974,8 +966,6 @@ static void blk_register_tracepoints(void)
  {
         int ret;
  
-       ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
-       WARN_ON(ret);
         ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
         WARN_ON(ret);
         ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
@@ -1028,7 +1018,6 @@ static void blk_unregister_tracepoints(void)
         unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
         unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
         unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
-       unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
  
         tracepoint_synchronize_unregister();
  }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c

index 0d1597c9ee305780ff65200001b55928dcdf09e3..dd3e91d68dc73053e7ba0ca5bed0681b374ea8fc 100644 (file)
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3755,23 +3755,24 @@ static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash)
         ftrace_probe_registered = 1;
  }
  
-static void __disable_ftrace_function_probe(void)
+static bool __disable_ftrace_function_probe(void)
  {
         int i;
  
         if (!ftrace_probe_registered)
-               return;
+               return false;
  
         for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
                 struct hlist_head *hhd = &ftrace_func_hash[i];
                 if (hhd->first)
-                       return;
+                       return false;
         }
  
         /* no more funcs left */
         ftrace_shutdown(&trace_probe_ops, 0);
  
         ftrace_probe_registered = 0;
+       return true;
  }
  
  
@@ -3901,6 +3902,7 @@ static void
  __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                                   void *data, int flags)
  {
+       struct ftrace_ops_hash old_hash_ops;
         struct ftrace_func_entry *rec_entry;
         struct ftrace_func_probe *entry;
         struct ftrace_func_probe *p;
@@ -3912,6 +3914,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
         struct hlist_node *tmp;
         char str[KSYM_SYMBOL_LEN];
         int i, ret;
+       bool disabled;
  
         if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
                 func_g.search = NULL;
@@ -3930,6 +3933,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
  
         mutex_lock(&trace_probe_ops.func_hash->regex_lock);
  
+       old_hash_ops.filter_hash = old_hash;
+       /* Probes only have filters */
+       old_hash_ops.notrace_hash = NULL;
+
         hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
         if (!hash)
                 /* Hmm, should report this somehow */
@@ -3967,12 +3974,17 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                 }
         }
         mutex_lock(&ftrace_lock);
-       __disable_ftrace_function_probe();
+       disabled = __disable_ftrace_function_probe();
         /*
          * Remove after the disable is called. Otherwise, if the last
          * probe is removed, a null hash means *all enabled*.
          */
         ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
+
+       /* still need to update the function call sites */
+       if (ftrace_enabled && !disabled)
+               ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS,
+                                      &old_hash_ops);
         synchronize_sched();
         if (!ret)
                 free_ftrace_hash_rcu(old_hash);
@@ -4416,16 +4428,24 @@ static int __init set_graph_notrace_function(char *str)
  }
  __setup("ftrace_graph_notrace=", set_graph_notrace_function);
  
+static int __init set_graph_max_depth_function(char *str)
+{
+       if (!str)
+               return 0;
+       fgraph_max_depth = simple_strtoul(str, NULL, 0);
+       return 1;
+}
+__setup("ftrace_graph_max_depth=", set_graph_max_depth_function);
+
  static void __init set_ftrace_early_graph(char *buf, int enable)
  {
         int ret;
         char *func;
         struct ftrace_hash *hash;
  
-       if (enable)
-               hash = ftrace_graph_hash;
-       else
-               hash = ftrace_graph_notrace_hash;
+       hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+       if (WARN_ON(!hash))
+               return;
  
         while (buf) {
                 func = strsep(&buf, ",");
@@ -4435,6 +4455,11 @@ static void __init set_ftrace_early_graph(char *buf, int enable)
                         printk(KERN_DEBUG "ftrace: function %s not "
                                           "traceable\n", func);
         }
+
+       if (enable)
+               ftrace_graph_hash = hash;
+       else
+               ftrace_graph_notrace_hash = hash;
  }
  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
  
@@ -5488,7 +5513,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
   * Normally the mcount trampoline will call the ops->func, but there
   * are times that it should not. For example, if the ops does not
   * have its own recursion protection, then it should call the
- * ftrace_ops_recurs_func() instead.
+ * ftrace_ops_assist_func() instead.
   *
   * Returns the function that the trampoline should call for @ops.
   */
@@ -5541,6 +5566,15 @@ static void clear_ftrace_pids(struct trace_array *tr)
         trace_free_pid_list(pid_list);
  }
  
+void ftrace_clear_pids(struct trace_array *tr)
+{
+       mutex_lock(&ftrace_lock);
+
+       clear_ftrace_pids(tr);
+
+       mutex_unlock(&ftrace_lock);
+}
+
  static void ftrace_pid_reset(struct trace_array *tr)
  {
         mutex_lock(&ftrace_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c

index 96fc3c043ad654e0c2de8a9d65a38695788b46b2..ca47a4fa2986c953dffa0b74fc791647bf0409fb 100644 (file)
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3405,11 +3405,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
  int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
  {
         struct ring_buffer_per_cpu *cpu_buffer;
+       struct buffer_page *reader;
+       struct buffer_page *head_page;
+       struct buffer_page *commit_page;
+       unsigned commit;
  
         cpu_buffer = iter->cpu_buffer;
  
-       return iter->head_page == cpu_buffer->commit_page &&
-               iter->head == rb_commit_index(cpu_buffer);
+       /* Remember, trace recording is off when iterator is in use */
+       reader = cpu_buffer->reader_page;
+       head_page = cpu_buffer->head_page;
+       commit_page = cpu_buffer->commit_page;
+       commit = rb_page_commit(commit_page);
+
+       return ((iter->head_page == commit_page && iter->head == commit) ||
+               (iter->head_page == reader && commit_page == head_page &&
+                head_page->read == commit &&
+                iter->head == rb_page_commit(cpu_buffer->reader_page)));
  }
  EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
  
@@ -4826,9 +4838,9 @@ static __init int test_ringbuffer(void)
                 rb_data[cpu].cnt = cpu;
                 rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
                                                  "rbtester/%d", cpu);
-               if (WARN_ON(!rb_threads[cpu])) {
+               if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
                         pr_cont("FAILED\n");
-                       ret = -1;
+                       ret = PTR_ERR(rb_threads[cpu]);
                         goto out_free;
                 }
  
@@ -4838,9 +4850,9 @@ static __init int test_ringbuffer(void)
  
         /* Now create the rb hammer! */
         rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
-       if (WARN_ON(!rb_hammer)) {
+       if (WARN_ON(IS_ERR(rb_hammer))) {
                 pr_cont("FAILED\n");
-               ret = -1;
+               ret = PTR_ERR(rb_hammer);
                 goto out_free;
         }
  
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c

index 707445ceb7efd4e098ba3ad5a129bb19a03122f9..0ad75e9698f6b0f918df83af90da1204e99d8308 100644 (file)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4341,22 +4341,22 @@ static const char readme_msg[] =
         "\t\t\t  traces\n"
  #endif
  #endif /* CONFIG_STACK_TRACER */
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
         "\t\t\t  Write into this file to define/undefine new trace events.\n"
  #endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
         "\t\t\t  Write into this file to define/undefine new trace events.\n"
  #endif
-#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
         "\t  accepts: event-definitions (one definition per line)\n"
         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
         "\t           -:[<group>/]<event>\n"
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
  #endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
         "\t    place: <path>:<offset>\n"
  #endif
         "\t     args: <name>=fetcharg[:type]\n"
@@ -6733,11 +6733,13 @@ ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
                 return ret;
  
   out_reg:
-       ret = register_ftrace_function_probe(glob, ops, count);
+       ret = alloc_snapshot(&global_trace);
+       if (ret < 0)
+               goto out;
  
-       if (ret >= 0)
-               alloc_snapshot(&global_trace);
+       ret = register_ftrace_function_probe(glob, ops, count);
  
+ out:
         return ret < 0 ? ret : 0;
  }
  
@@ -7402,6 +7404,7 @@ static int instance_rmdir(const char *name)
  
         tracing_set_nop(tr);
         event_trace_del_tracer(tr);
+       ftrace_clear_pids(tr);
         ftrace_destroy_function_files(tr);
         tracefs_remove_recursive(tr->dir);
         free_trace_buffers(tr);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h

index ae1cce91fead25a065899109e426a6cc1e597d28..d19d52d600d623e9d9f0676891e19c6e5e880bce 100644 (file)
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -896,6 +896,7 @@ int using_ftrace_ops_list_func(void);
  void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer);
  void ftrace_init_tracefs_toplevel(struct trace_array *tr,
                                   struct dentry *d_tracer);
+void ftrace_clear_pids(struct trace_array *tr);
  #else
  static inline int ftrace_trace_task(struct trace_array *tr)
  {
@@ -914,6 +915,7 @@ ftrace_init_global_array_ops(struct trace_array *tr) { }
  static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
  static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { }
  static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d) { }
+static inline void ftrace_clear_pids(struct trace_array *tr) { }
  /* ftace_func_t type is not defined, use macro instead of static inline */
  #define ftrace_init_array_ops(tr, func) do { } while (0)
  #endif /* CONFIG_FUNCTION_TRACER */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h

index 0c0ae54d44c616d5d09876165c2516c3e032af77..903273c93e6167afcbe2de99451a906c2e79ab1f 100644 (file)
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -248,7 +248,7 @@ ASSIGN_FETCH_FUNC(file_offset, ftype),                      \
  #define FETCH_TYPE_STRING      0
  #define FETCH_TYPE_STRSIZE     1
  
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
  struct symbol_cache;
  unsigned long update_symbol_cache(struct symbol_cache *sc);
  void free_symbol_cache(struct symbol_cache *sc);
@@ -278,7 +278,7 @@ alloc_symbol_cache(const char *sym, long offset)
  {
         return NULL;
  }
-#endif /* CONFIG_KPROBE_EVENT */
+#endif /* CONFIG_KPROBE_EVENTS */
  
  struct probe_arg {
         struct fetch_param      fetch;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c

index 1d68b5b7ad4133d102a39006f575bdfe49d808ea..5fb1f2c87e6b846b7f9d32823ef3aede4b28db9e 100644 (file)
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -65,7 +65,7 @@ void stack_trace_print(void)
  }
  
  /*
- * When arch-specific code overides this function, the following
+ * When arch-specific code overrides this function, the following
   * data should be filled up, assuming stack_trace_max_lock is held to
   * prevent concurrent updates.
   *     stack_trace_index[]
diff --git a/kernel/ucount.c b/kernel/ucount.c

index 62630a40ab3a4225291c7804ff67917aa997646c..b4eeee03934fe8f083b70e9907be0721759bc3be 100644 (file)
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -144,7 +144,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
  
                 new->ns = ns;
                 new->uid = uid;
-               atomic_set(&new->count, 0);
+               new->count = 0;
  
                 spin_lock_irq(&ucounts_lock);
                 ucounts = find_ucounts(ns, uid, hashent);
@@ -155,8 +155,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
                         ucounts = new;
                 }
         }
-       if (!atomic_add_unless(&ucounts->count, 1, INT_MAX))
+       if (ucounts->count == INT_MAX)
                 ucounts = NULL;
+       else
+               ucounts->count += 1;
         spin_unlock_irq(&ucounts_lock);
         return ucounts;
  }
@@ -165,13 +167,15 @@ static void put_ucounts(struct ucounts *ucounts)
  {
         unsigned long flags;
  
-       if (atomic_dec_and_test(&ucounts->count)) {
-               spin_lock_irqsave(&ucounts_lock, flags);
+       spin_lock_irqsave(&ucounts_lock, flags);
+       ucounts->count -= 1;
+       if (!ucounts->count)
                 hlist_del_init(&ucounts->node);
-               spin_unlock_irqrestore(&ucounts_lock, flags);
+       else
+               ucounts = NULL;
+       spin_unlock_irqrestore(&ucounts_lock, flags);
  
-               kfree(ucounts);
-       }
+       kfree(ucounts);
  }
  
  static inline bool atomic_inc_below(atomic_t *v, int u)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 072cbc9b175dc1efbe95c14858f810f92db12130..c0168b7da1eaf22c216147ca5ebd03ef7311dca8 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1507,6 +1507,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
         struct timer_list *timer = &dwork->timer;
         struct work_struct *work = &dwork->work;
  
+       WARN_ON_ONCE(!wq);
         WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
                      timer->data != (unsigned long)dwork);
         WARN_ON_ONCE(timer_pending(timer));
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index 97d62c2da6c25dd5721f8c1c75264c83201f7247..fa16c0f82d6e4c159ac4b8751a1fc52631974438 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1103,9 +1103,6 @@ config PROVE_LOCKING
  
          For more details, see Documentation/locking/lockdep-design.txt.
  
-config PROVE_LOCKING_SMALL
-       bool
-
  config LOCKDEP
         bool
         depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -1114,6 +1111,9 @@ config LOCKDEP
         select KALLSYMS
         select KALLSYMS_ALL
  
+config LOCKDEP_SMALL
+       bool
+
  config LOCK_STAT
         bool "Lock usage statistics"
         depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
diff --git a/lib/ioremap.c b/lib/ioremap.c

index a3e14ce92a5684a662c2c8f80f97e6fef95943b7..4bb30206b9426f1fcece4324cc0dfe76b8855c65 100644 (file)
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -14,6 +14,7 @@
  #include <asm/pgtable.h>
  
  #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+static int __read_mostly ioremap_p4d_capable;
  static int __read_mostly ioremap_pud_capable;
  static int __read_mostly ioremap_pmd_capable;
  static int __read_mostly ioremap_huge_disabled;
@@ -35,6 +36,11 @@ void __init ioremap_huge_init(void)
         }
  }
  
+static inline int ioremap_p4d_enabled(void)
+{
+       return ioremap_p4d_capable;
+}
+
  static inline int ioremap_pud_enabled(void)
  {
         return ioremap_pud_capable;
@@ -46,6 +52,7 @@ static inline int ioremap_pmd_enabled(void)
  }
  
  #else  /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
+static inline int ioremap_p4d_enabled(void) { return 0; }
  static inline int ioremap_pud_enabled(void) { return 0; }
  static inline int ioremap_pmd_enabled(void) { return 0; }
  #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
@@ -94,14 +101,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
         return 0;
  }
  
-static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
+static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
                 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
  {
         pud_t *pud;
         unsigned long next;
  
         phys_addr -= addr;
-       pud = pud_alloc(&init_mm, pgd, addr);
+       pud = pud_alloc(&init_mm, p4d, addr);
         if (!pud)
                 return -ENOMEM;
         do {
@@ -120,6 +127,32 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
         return 0;
  }
  
+static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr,
+               unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+{
+       p4d_t *p4d;
+       unsigned long next;
+
+       phys_addr -= addr;
+       p4d = p4d_alloc(&init_mm, pgd, addr);
+       if (!p4d)
+               return -ENOMEM;
+       do {
+               next = p4d_addr_end(addr, end);
+
+               if (ioremap_p4d_enabled() &&
+                   ((next - addr) == P4D_SIZE) &&
+                   IS_ALIGNED(phys_addr + addr, P4D_SIZE)) {
+                       if (p4d_set_huge(p4d, phys_addr + addr, prot))
+                               continue;
+               }
+
+               if (ioremap_pud_range(p4d, addr, next, phys_addr + addr, prot))
+                       return -ENOMEM;
+       } while (p4d++, addr = next, addr != end);
+       return 0;
+}
+
  int ioremap_page_range(unsigned long addr,
                        unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
  {
@@ -135,7 +168,7 @@ int ioremap_page_range(unsigned long addr,
         pgd = pgd_offset_k(addr);
         do {
                 next = pgd_addr_end(addr, end);
-               err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
+               err = ioremap_p4d_range(pgd, addr, next, phys_addr+addr, prot);
                 if (err)
                         break;
         } while (pgd++, addr = next, addr != end);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c

index e68604ae3cedf41ce98bc06de2142629fa115cbd..cc001a542cb55326ba83e69805db3185d0c4301e 100644 (file)
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -786,6 +786,68 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
  }
  EXPORT_SYMBOL(iov_iter_advance);
  
+void iov_iter_revert(struct iov_iter *i, size_t unroll)
+{
+       if (!unroll)
+               return;
+       i->count += unroll;
+       if (unlikely(i->type & ITER_PIPE)) {
+               struct pipe_inode_info *pipe = i->pipe;
+               int idx = i->idx;
+               size_t off = i->iov_offset;
+               while (1) {
+                       size_t n = off - pipe->bufs[idx].offset;
+                       if (unroll < n) {
+                               off -= unroll;
+                               break;
+                       }
+                       unroll -= n;
+                       if (!unroll && idx == i->start_idx) {
+                               off = 0;
+                               break;
+                       }
+                       if (!idx--)
+                               idx = pipe->buffers - 1;
+                       off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
+               }
+               i->iov_offset = off;
+               i->idx = idx;
+               pipe_truncate(i);
+               return;
+       }
+       if (unroll <= i->iov_offset) {
+               i->iov_offset -= unroll;
+               return;
+       }
+       unroll -= i->iov_offset;
+       if (i->type & ITER_BVEC) {
+               const struct bio_vec *bvec = i->bvec;
+               while (1) {
+                       size_t n = (--bvec)->bv_len;
+                       i->nr_segs++;
+                       if (unroll <= n) {
+                               i->bvec = bvec;
+                               i->iov_offset = n - unroll;
+                               return;
+                       }
+                       unroll -= n;
+               }
+       } else { /* same logics for iovec and kvec */
+               const struct iovec *iov = i->iov;
+               while (1) {
+                       size_t n = (--iov)->iov_len;
+                       i->nr_segs++;
+                       if (unroll <= n) {
+                               i->iov = iov;
+                               i->iov_offset = n - unroll;
+                               return;
+                       }
+                       unroll -= n;
+               }
+       }
+}
+EXPORT_SYMBOL(iov_iter_revert);
+
  /*
   * Return the count of just the current iov_iter segment.
   */
@@ -839,6 +901,7 @@ void iov_iter_pipe(struct iov_iter *i, int direction,
         i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
         i->iov_offset = 0;
         i->count = count;
+       i->start_idx = i->idx;
  }
  EXPORT_SYMBOL(iov_iter_pipe);
  
diff --git a/lib/kobject.c b/lib/kobject.c

index 445dcaeb0f56de7882a1b1278a90ef7931f4099c..763d70a189410cb7307b87474801a662f631f806 100644 (file)
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -601,12 +601,15 @@ struct kobject *kobject_get(struct kobject *kobj)
  }
  EXPORT_SYMBOL(kobject_get);
  
-static struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj)
+struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj)
  {
+       if (!kobj)
+               return NULL;
         if (!kref_get_unless_zero(&kobj->kref))
                 kobj = NULL;
         return kobj;
  }
+EXPORT_SYMBOL(kobject_get_unless_zero);
  
  /*
   * kobject_cleanup - free kobject resources.
diff --git a/lib/radix-tree.c b/lib/radix-tree.c

index 5ed506d648c4e53ee955e9c19b942fd0d666eee1..691a9ad48497b02e3b09304d6565165ef2317b16 100644 (file)
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2129,8 +2129,8 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
                 struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
                 if (!bitmap)
                         return 0;
-               bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
-               kfree(bitmap);
+               if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
+                       kfree(bitmap);
         }
  
         return 1;
diff --git a/lib/refcount.c b/lib/refcount.c

index 1d33366189d10c88bf10616e6a9b25c64c4bf570..aa09ad3c30b0dc37a920c46f0da711f366d29423 100644 (file)
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -58,7 +58,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
                 val = old;
         }
  
-       WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+       WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
  
         return true;
  }
@@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(refcount_add_not_zero);
  
  void refcount_add(unsigned int i, refcount_t *r)
  {
-       WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
+       WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
  }
  EXPORT_SYMBOL_GPL(refcount_add);
  
@@ -97,7 +97,7 @@ bool refcount_inc_not_zero(refcount_t *r)
                 val = old;
         }
  
-       WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+       WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
  
         return true;
  }
@@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(refcount_inc_not_zero);
   */
  void refcount_inc(refcount_t *r)
  {
-       WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+       WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
  }
  EXPORT_SYMBOL_GPL(refcount_inc);
  
@@ -125,7 +125,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
  
                 new = val - i;
                 if (new > val) {
-                       WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+                       WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n");
                         return false;
                 }
  
@@ -164,7 +164,7 @@ EXPORT_SYMBOL_GPL(refcount_dec_and_test);
  
  void refcount_dec(refcount_t *r)
  {
-       WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
+       WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
  }
  EXPORT_SYMBOL_GPL(refcount_dec);
  
@@ -204,7 +204,7 @@ bool refcount_dec_not_one(refcount_t *r)
  
                 new = val - 1;
                 if (new > val) {
-                       WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+                       WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n");
                         return true;
                 }
  
diff --git a/lib/sbitmap.c b/lib/sbitmap.c

index 60e800e0b5a0d986ec0e2664dd9e4185aa50328a..80aa8d5463faf9f4c39d5c58bb7efdb927836cdb 100644 (file)
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -79,15 +79,15 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
  }
  EXPORT_SYMBOL_GPL(sbitmap_resize);
  
-static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint,
-                             bool wrap)
+static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
+                             unsigned int hint, bool wrap)
  {
         unsigned int orig_hint = hint;
         int nr;
  
         while (1) {
-               nr = find_next_zero_bit(&word->word, word->depth, hint);
-               if (unlikely(nr >= word->depth)) {
+               nr = find_next_zero_bit(word, depth, hint);
+               if (unlikely(nr >= depth)) {
                         /*
                          * We started with an offset, and we didn't reset the
                          * offset to 0 in a failure case, so start from 0 to
@@ -100,11 +100,11 @@ static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint,
                         return -1;
                 }
  
-               if (!test_and_set_bit(nr, &word->word))
+               if (!test_and_set_bit(nr, word))
                         break;
  
                 hint = nr + 1;
-               if (hint >= word->depth - 1)
+               if (hint >= depth - 1)
                         hint = 0;
         }
  
@@ -119,7 +119,8 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
         index = SB_NR_TO_INDEX(sb, alloc_hint);
  
         for (i = 0; i < sb->map_nr; i++) {
-               nr = __sbitmap_get_word(&sb->map[index],
+               nr = __sbitmap_get_word(&sb->map[index].word,
+                                       sb->map[index].depth,
                                         SB_NR_TO_BIT(sb, alloc_hint),
                                         !round_robin);
                 if (nr != -1) {
@@ -141,6 +142,37 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
  }
  EXPORT_SYMBOL_GPL(sbitmap_get);
  
+int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
+                       unsigned long shallow_depth)
+{
+       unsigned int i, index;
+       int nr = -1;
+
+       index = SB_NR_TO_INDEX(sb, alloc_hint);
+
+       for (i = 0; i < sb->map_nr; i++) {
+               nr = __sbitmap_get_word(&sb->map[index].word,
+                                       min(sb->map[index].depth, shallow_depth),
+                                       SB_NR_TO_BIT(sb, alloc_hint), true);
+               if (nr != -1) {
+                       nr += index << sb->shift;
+                       break;
+               }
+
+               /* Jump to next index. */
+               index++;
+               alloc_hint = index << sb->shift;
+
+               if (index >= sb->map_nr) {
+                       index = 0;
+                       alloc_hint = 0;
+               }
+       }
+
+       return nr;
+}
+EXPORT_SYMBOL_GPL(sbitmap_get_shallow);
+
  bool sbitmap_any_bit_set(const struct sbitmap *sb)
  {
         unsigned int i;
@@ -342,6 +374,35 @@ int __sbitmap_queue_get(struct sbitmap_queue *sbq)
  }
  EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
  
+int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
+                               unsigned int shallow_depth)
+{
+       unsigned int hint, depth;
+       int nr;
+
+       hint = this_cpu_read(*sbq->alloc_hint);
+       depth = READ_ONCE(sbq->sb.depth);
+       if (unlikely(hint >= depth)) {
+               hint = depth ? prandom_u32() % depth : 0;
+               this_cpu_write(*sbq->alloc_hint, hint);
+       }
+       nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth);
+
+       if (nr == -1) {
+               /* If the map is full, a hint won't do us much good. */
+               this_cpu_write(*sbq->alloc_hint, 0);
+       } else if (nr == hint || unlikely(sbq->round_robin)) {
+               /* Only update the hint if we used it. */
+               hint = nr + 1;
+               if (hint >= depth - 1)
+                       hint = 0;
+               this_cpu_write(*sbq->alloc_hint, hint);
+       }
+
+       return nr;
+}
+EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
+
  static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
  {
         int i, wake_index;
diff --git a/lib/syscall.c b/lib/syscall.c

index 17d5ff5fa6a388c4dc947a47615608bca79b2c40..2c6cd1b5c3ea86668bc73196c4aa980c724ad34c 100644 (file)
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -12,6 +12,7 @@ static int collect_syscall(struct task_struct *target, long *callno,
  
         if (!try_get_task_stack(target)) {
                 /* Task has no stack, so the task isn't in a syscall. */
+               *sp = *pc = 0;
                 *callno = -1;
                 return 0;
         }
diff --git a/lib/test_kasan.c b/lib/test_kasan.c

index 0b1d3140fbb87738ec37031997075e50c5670575..a25c9763fce19f17c723b9db3645ae93ba47dcb6 100644 (file)
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -20,6 +20,7 @@
  #include <linux/string.h>
  #include <linux/uaccess.h>
  #include <linux/module.h>
+#include <linux/kasan.h>
  
  /*
   * Note: test functions are marked noinline so that their names appear in
@@ -474,6 +475,12 @@ static noinline void __init use_after_scope_test(void)
  
  static int __init kmalloc_tests_init(void)
  {
+       /*
+        * Temporarily enable multi-shot mode. Otherwise, we'd only get a
+        * report for the first case.
+        */
+       bool multishot = kasan_save_enable_multi_shot();
+
         kmalloc_oob_right();
         kmalloc_oob_left();
         kmalloc_node_oob_right();
@@ -499,6 +506,9 @@ static int __init kmalloc_tests_init(void)
         ksize_unpoisons_memory();
         copy_user_test();
         use_after_scope_test();
+
+       kasan_restore_multi_shot(multishot);
+
         return -EAGAIN;
  }
  
diff --git a/mm/backing-dev.c b/mm/backing-dev.c

index 6d861d090e9fc79d39e2b48f57b1d9f4bc91463f..f028a9a472fd9b2c7098bce8fe622fd58ba2f140 100644 (file)
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -12,8 +12,6 @@
  #include <linux/device.h>
  #include <trace/events/writeback.h>
  
-static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
-
  struct backing_dev_info noop_backing_dev_info = {
         .name           = "noop",
         .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
@@ -242,6 +240,8 @@ static __init int bdi_class_init(void)
  }
  postcore_initcall(bdi_class_init);
  
+static int bdi_init(struct backing_dev_info *bdi);
+
  static int __init default_bdi_init(void)
  {
         int err;
@@ -294,6 +294,8 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
  
         memset(wb, 0, sizeof(*wb));
  
+       if (wb != &bdi->wb)
+               bdi_get(bdi);
         wb->bdi = bdi;
         wb->last_old_flush = jiffies;
         INIT_LIST_HEAD(&wb->b_dirty);
@@ -314,8 +316,10 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
         wb->dirty_sleep = jiffies;
  
         wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
-       if (!wb->congested)
-               return -ENOMEM;
+       if (!wb->congested) {
+               err = -ENOMEM;
+               goto out_put_bdi;
+       }
  
         err = fprop_local_init_percpu(&wb->completions, gfp);
         if (err)
@@ -335,9 +339,14 @@ out_destroy_stat:
         fprop_local_destroy_percpu(&wb->completions);
  out_put_cong:
         wb_congested_put(wb->congested);
+out_put_bdi:
+       if (wb != &bdi->wb)
+               bdi_put(bdi);
         return err;
  }
  
+static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb);
+
  /*
   * Remove bdi from the global list and shutdown any threads we have running
   */
@@ -347,10 +356,18 @@ static void wb_shutdown(struct bdi_writeback *wb)
         spin_lock_bh(&wb->work_lock);
         if (!test_and_clear_bit(WB_registered, &wb->state)) {
                 spin_unlock_bh(&wb->work_lock);
+               /*
+                * Wait for wb shutdown to finish if someone else is just
+                * running wb_shutdown(). Otherwise we could proceed to wb /
+                * bdi destruction before wb_shutdown() is finished.
+                */
+               wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE);
                 return;
         }
+       set_bit(WB_shutting_down, &wb->state);
         spin_unlock_bh(&wb->work_lock);
  
+       cgwb_remove_from_bdi_list(wb);
         /*
          * Drain work list and shutdown the delayed_work.  !WB_registered
          * tells wb_workfn() that @wb is dying and its work_list needs to
@@ -359,6 +376,12 @@ static void wb_shutdown(struct bdi_writeback *wb)
         mod_delayed_work(bdi_wq, &wb->dwork, 0);
         flush_delayed_work(&wb->dwork);
         WARN_ON(!list_empty(&wb->work_list));
+       /*
+        * Make sure bit gets cleared after shutdown is finished. Matches with
+        * the barrier provided by test_and_clear_bit() above.
+        */
+       smp_wmb();
+       clear_bit(WB_shutting_down, &wb->state);
  }
  
  static void wb_exit(struct bdi_writeback *wb)
@@ -372,6 +395,8 @@ static void wb_exit(struct bdi_writeback *wb)
  
         fprop_local_destroy_percpu(&wb->completions);
         wb_congested_put(wb->congested);
+       if (wb != &wb->bdi->wb)
+               bdi_put(wb->bdi);
  }
  
  #ifdef CONFIG_CGROUP_WRITEBACK
@@ -381,11 +406,9 @@ static void wb_exit(struct bdi_writeback *wb)
  /*
   * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
   * blkcg->cgwb_list, and memcg->cgwb_list.  bdi->cgwb_tree is also RCU
- * protected.  cgwb_release_wait is used to wait for the completion of cgwb
- * releases from bdi destruction path.
+ * protected.
   */
  static DEFINE_SPINLOCK(cgwb_lock);
-static DECLARE_WAIT_QUEUE_HEAD(cgwb_release_wait);
  
  /**
   * wb_congested_get_create - get or create a wb_congested
@@ -438,7 +461,7 @@ retry:
                 return NULL;
  
         atomic_set(&new_congested->refcnt, 0);
-       new_congested->bdi = bdi;
+       new_congested->__bdi = bdi;
         new_congested->blkcg_id = blkcg_id;
         goto retry;
  
@@ -466,10 +489,10 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
         }
  
         /* bdi might already have been destroyed leaving @congested unlinked */
-       if (congested->bdi) {
+       if (congested->__bdi) {
                 rb_erase(&congested->rb_node,
-                        &congested->bdi->cgwb_congested_tree);
-               congested->bdi = NULL;
+                        &congested->__bdi->cgwb_congested_tree);
+               congested->__bdi = NULL;
         }
  
         spin_unlock_irqrestore(&cgwb_lock, flags);
@@ -480,11 +503,6 @@ static void cgwb_release_workfn(struct work_struct *work)
  {
         struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
                                                 release_work);
-       struct backing_dev_info *bdi = wb->bdi;
-
-       spin_lock_irq(&cgwb_lock);
-       list_del_rcu(&wb->bdi_node);
-       spin_unlock_irq(&cgwb_lock);
  
         wb_shutdown(wb);
  
@@ -495,9 +513,6 @@ static void cgwb_release_workfn(struct work_struct *work)
         percpu_ref_exit(&wb->refcnt);
         wb_exit(wb);
         kfree_rcu(wb, rcu);
-
-       if (atomic_dec_and_test(&bdi->usage_cnt))
-               wake_up_all(&cgwb_release_wait);
  }
  
  static void cgwb_release(struct percpu_ref *refcnt)
@@ -517,6 +532,13 @@ static void cgwb_kill(struct bdi_writeback *wb)
         percpu_ref_kill(&wb->refcnt);
  }
  
+static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
+{
+       spin_lock_irq(&cgwb_lock);
+       list_del_rcu(&wb->bdi_node);
+       spin_unlock_irq(&cgwb_lock);
+}
+
  static int cgwb_create(struct backing_dev_info *bdi,
                        struct cgroup_subsys_state *memcg_css, gfp_t gfp)
  {
@@ -580,7 +602,6 @@ static int cgwb_create(struct backing_dev_info *bdi,
                 /* we might have raced another instance of this function */
                 ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
                 if (!ret) {
-                       atomic_inc(&bdi->usage_cnt);
                         list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
                         list_add(&wb->memcg_node, memcg_cgwb_list);
                         list_add(&wb->blkcg_node, blkcg_cgwb_list);
@@ -670,7 +691,6 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
  
         INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
         bdi->cgwb_congested_tree = RB_ROOT;
-       atomic_set(&bdi->usage_cnt, 1);
  
         ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
         if (!ret) {
@@ -680,36 +700,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
         return ret;
  }
  
-static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
+static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
  {
         struct radix_tree_iter iter;
-       struct rb_node *rbn;
         void **slot;
+       struct bdi_writeback *wb;
  
         WARN_ON(test_bit(WB_registered, &bdi->wb.state));
  
         spin_lock_irq(&cgwb_lock);
-
         radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
                 cgwb_kill(*slot);
  
-       while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
-               struct bdi_writeback_congested *congested =
-                       rb_entry(rbn, struct bdi_writeback_congested, rb_node);
-
-               rb_erase(rbn, &bdi->cgwb_congested_tree);
-               congested->bdi = NULL;  /* mark @congested unlinked */
+       while (!list_empty(&bdi->wb_list)) {
+               wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
+                                     bdi_node);
+               spin_unlock_irq(&cgwb_lock);
+               wb_shutdown(wb);
+               spin_lock_irq(&cgwb_lock);
         }
-
         spin_unlock_irq(&cgwb_lock);
-
-       /*
-        * All cgwb's and their congested states must be shutdown and
-        * released before returning.  Drain the usage counter to wait for
-        * all cgwb's and cgwb_congested's ever created on @bdi.
-        */
-       atomic_dec(&bdi->usage_cnt);
-       wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt));
  }
  
  /**
@@ -749,6 +759,28 @@ void wb_blkcg_offline(struct blkcg *blkcg)
         spin_unlock_irq(&cgwb_lock);
  }
  
+static void cgwb_bdi_exit(struct backing_dev_info *bdi)
+{
+       struct rb_node *rbn;
+
+       spin_lock_irq(&cgwb_lock);
+       while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
+               struct bdi_writeback_congested *congested =
+                       rb_entry(rbn, struct bdi_writeback_congested, rb_node);
+
+               rb_erase(rbn, &bdi->cgwb_congested_tree);
+               congested->__bdi = NULL;        /* mark @congested unlinked */
+       }
+       spin_unlock_irq(&cgwb_lock);
+}
+
+static void cgwb_bdi_register(struct backing_dev_info *bdi)
+{
+       spin_lock_irq(&cgwb_lock);
+       list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
+       spin_unlock_irq(&cgwb_lock);
+}
+
  #else  /* CONFIG_CGROUP_WRITEBACK */
  
  static int cgwb_bdi_init(struct backing_dev_info *bdi)
@@ -769,14 +801,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
         return 0;
  }
  
-static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
+static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { }
+
+static void cgwb_bdi_exit(struct backing_dev_info *bdi)
  {
         wb_congested_put(bdi->wb_congested);
  }
  
+static void cgwb_bdi_register(struct backing_dev_info *bdi)
+{
+       list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
+}
+
+static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
+{
+       list_del_rcu(&wb->bdi_node);
+}
+
  #endif /* CONFIG_CGROUP_WRITEBACK */
  
-int bdi_init(struct backing_dev_info *bdi)
+static int bdi_init(struct backing_dev_info *bdi)
  {
         int ret;
  
@@ -792,11 +836,8 @@ int bdi_init(struct backing_dev_info *bdi)
  
         ret = cgwb_bdi_init(bdi);
  
-       list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
-
         return ret;
  }
-EXPORT_SYMBOL(bdi_init);
  
  struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
  {
@@ -813,22 +854,20 @@ struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
         }
         return bdi;
  }
+EXPORT_SYMBOL(bdi_alloc_node);
  
-int bdi_register(struct backing_dev_info *bdi, struct device *parent,
-               const char *fmt, ...)
+int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
  {
-       va_list args;
         struct device *dev;
  
         if (bdi->dev)   /* The driver needs to use separate queues per device */
                 return 0;
  
-       va_start(args, fmt);
-       dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
-       va_end(args);
+       dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
         if (IS_ERR(dev))
                 return PTR_ERR(dev);
  
+       cgwb_bdi_register(bdi);
         bdi->dev = dev;
  
         bdi_debug_register(bdi, dev_name(dev));
@@ -841,22 +880,29 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
         trace_writeback_bdi_register(bdi);
         return 0;
  }
-EXPORT_SYMBOL(bdi_register);
+EXPORT_SYMBOL(bdi_register_va);
  
-int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
+int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
  {
-       return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
+       va_list args;
+       int ret;
+
+       va_start(args, fmt);
+       ret = bdi_register_va(bdi, fmt, args);
+       va_end(args);
+       return ret;
  }
-EXPORT_SYMBOL(bdi_register_dev);
+EXPORT_SYMBOL(bdi_register);
  
  int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
  {
         int rc;
  
-       rc = bdi_register(bdi, NULL, "%u:%u", MAJOR(owner->devt),
-                       MINOR(owner->devt));
+       rc = bdi_register(bdi, "%u:%u", MAJOR(owner->devt), MINOR(owner->devt));
         if (rc)
                 return rc;
+       /* Leaking owner reference... */
+       WARN_ON(bdi->owner);
         bdi->owner = owner;
         get_device(owner);
         return 0;
@@ -880,7 +926,7 @@ void bdi_unregister(struct backing_dev_info *bdi)
         /* make sure nobody finds us on the bdi_list anymore */
         bdi_remove_from_list(bdi);
         wb_shutdown(&bdi->wb);
-       cgwb_bdi_destroy(bdi);
+       cgwb_bdi_unregister(bdi);
  
         if (bdi->dev) {
                 bdi_debug_unregister(bdi);
@@ -894,18 +940,16 @@ void bdi_unregister(struct backing_dev_info *bdi)
         }
  }
  
-static void bdi_exit(struct backing_dev_info *bdi)
-{
-       WARN_ON_ONCE(bdi->dev);
-       wb_exit(&bdi->wb);
-}
-
  static void release_bdi(struct kref *ref)
  {
         struct backing_dev_info *bdi =
                         container_of(ref, struct backing_dev_info, refcnt);
  
-       bdi_exit(bdi);
+       if (test_bit(WB_registered, &bdi->wb.state))
+               bdi_unregister(bdi);
+       WARN_ON_ONCE(bdi->dev);
+       wb_exit(&bdi->wb);
+       cgwb_bdi_exit(bdi);
         kfree(bdi);
  }
  
@@ -913,38 +957,7 @@ void bdi_put(struct backing_dev_info *bdi)
  {
         kref_put(&bdi->refcnt, release_bdi);
  }
-
-void bdi_destroy(struct backing_dev_info *bdi)
-{
-       bdi_unregister(bdi);
-       bdi_exit(bdi);
-}
-EXPORT_SYMBOL(bdi_destroy);
-
-/*
- * For use from filesystems to quickly init and register a bdi associated
- * with dirty writeback
- */
-int bdi_setup_and_register(struct backing_dev_info *bdi, char *name)
-{
-       int err;
-
-       bdi->name = name;
-       bdi->capabilities = 0;
-       err = bdi_init(bdi);
-       if (err)
-               return err;
-
-       err = bdi_register(bdi, NULL, "%.28s-%ld", name,
-                          atomic_long_inc_return(&bdi_seq));
-       if (err) {
-               bdi_destroy(bdi);
-               return err;
-       }
-
-       return 0;
-}
-EXPORT_SYMBOL(bdi_setup_and_register);
+EXPORT_SYMBOL(bdi_put);
  
  static wait_queue_head_t congestion_wqh[2] = {
                 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
diff --git a/mm/gup.c b/mm/gup.c

index 9c047e951aa3d0399f331eb3914b09d207dc989b..04aa405350dce8656db4293a34e95e9bfbe166d8 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -226,6 +226,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
                               unsigned int *page_mask)
  {
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
         pmd_t *pmd;
         spinlock_t *ptl;
@@ -243,8 +244,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
         pgd = pgd_offset(mm, address);
         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
                 return no_page_table(vma, flags);
-
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       if (p4d_none(*p4d))
+               return no_page_table(vma, flags);
+       BUILD_BUG_ON(p4d_huge(*p4d));
+       if (unlikely(p4d_bad(*p4d)))
+               return no_page_table(vma, flags);
+       pud = pud_offset(p4d, address);
         if (pud_none(*pud))
                 return no_page_table(vma, flags);
         if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
@@ -325,6 +331,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
                 struct page **page)
  {
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
         pmd_t *pmd;
         pte_t *pte;
@@ -338,7 +345,9 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
         else
                 pgd = pgd_offset_gate(mm, address);
         BUG_ON(pgd_none(*pgd));
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       BUG_ON(p4d_none(*p4d));
+       pud = pud_offset(p4d, address);
         BUG_ON(pud_none(*pud));
         pmd = pmd_offset(pud, address);
         if (pmd_none(*pmd))
@@ -1400,13 +1409,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
         return 1;
  }
  
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
                          int write, struct page **pages, int *nr)
  {
         unsigned long next;
         pud_t *pudp;
  
-       pudp = pud_offset(&pgd, addr);
+       pudp = pud_offset(&p4d, addr);
         do {
                 pud_t pud = READ_ONCE(*pudp);
  
@@ -1428,6 +1437,31 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
         return 1;
  }
  
+static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
+                        int write, struct page **pages, int *nr)
+{
+       unsigned long next;
+       p4d_t *p4dp;
+
+       p4dp = p4d_offset(&pgd, addr);
+       do {
+               p4d_t p4d = READ_ONCE(*p4dp);
+
+               next = p4d_addr_end(addr, end);
+               if (p4d_none(p4d))
+                       return 0;
+               BUILD_BUG_ON(p4d_huge(p4d));
+               if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
+                       if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
+                                        P4D_SHIFT, next, write, pages, nr))
+                               return 0;
+               } else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
+                       return 0;
+       } while (p4dp++, addr = next, addr != end);
+
+       return 1;
+}
+
  /*
   * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
   * the regular GUP. It will only return non-negative values.
@@ -1478,7 +1512,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
                         if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
                                          PGDIR_SHIFT, next, write, pages, &nr))
                                 break;
-               } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+               } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
                         break;
         } while (pgdp++, addr = next, addr != end);
         local_irq_restore(flags);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index d36b2af4d1bf4b6621974823f36c52dda405f181..f3c4f9d22821f889104340332eee93c5e124df4d 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -240,18 +240,18 @@ static ssize_t defrag_store(struct kobject *kobj,
                 clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
                 clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
                 set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
-       } else if (!memcmp("defer", buf,
-                   min(sizeof("defer")-1, count))) {
-               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
-               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
-               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
-               set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
         } else if (!memcmp("defer+madvise", buf,
                     min(sizeof("defer+madvise")-1, count))) {
                 clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
                 clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
                 clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
                 set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+       } else if (!memcmp("defer", buf,
+                   min(sizeof("defer")-1, count))) {
+               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+               set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
         } else if (!memcmp("madvise", buf,
                            min(sizeof("madvise")-1, count))) {
                 clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
@@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                 deactivate_page(page);
  
         if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
-               orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
-                       tlb->fullmm);
+               pmdp_invalidate(vma, addr, pmd);
                 orig_pmd = pmd_mkold(orig_pmd);
                 orig_pmd = pmd_mkclean(orig_pmd);
  
@@ -1724,37 +1723,69 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
  {
         struct mm_struct *mm = vma->vm_mm;
         spinlock_t *ptl;
-       int ret = 0;
+       pmd_t entry;
+       bool preserve_write;
+       int ret;
  
         ptl = __pmd_trans_huge_lock(pmd, vma);
-       if (ptl) {
-               pmd_t entry;
-               bool preserve_write = prot_numa && pmd_write(*pmd);
-               ret = 1;
+       if (!ptl)
+               return 0;
  
-               /*
-                * Avoid trapping faults against the zero page. The read-only
-                * data is likely to be read-cached on the local CPU and
-                * local/remote hits to the zero page are not interesting.
-                */
-               if (prot_numa && is_huge_zero_pmd(*pmd)) {
-                       spin_unlock(ptl);
-                       return ret;
-               }
+       preserve_write = prot_numa && pmd_write(*pmd);
+       ret = 1;
  
-               if (!prot_numa || !pmd_protnone(*pmd)) {
-                       entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
-                       entry = pmd_modify(entry, newprot);
-                       if (preserve_write)
-                               entry = pmd_mk_savedwrite(entry);
-                       ret = HPAGE_PMD_NR;
-                       set_pmd_at(mm, addr, pmd, entry);
-                       BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
-                                       pmd_write(entry));
-               }
-               spin_unlock(ptl);
-       }
+       /*
+        * Avoid trapping faults against the zero page. The read-only
+        * data is likely to be read-cached on the local CPU and
+        * local/remote hits to the zero page are not interesting.
+        */
+       if (prot_numa && is_huge_zero_pmd(*pmd))
+               goto unlock;
+
+       if (prot_numa && pmd_protnone(*pmd))
+               goto unlock;
+
+       /*
+        * In case prot_numa, we are under down_read(mmap_sem). It's critical
+        * to not clear pmd intermittently to avoid race with MADV_DONTNEED
+        * which is also under down_read(mmap_sem):
+        *
+        *      CPU0:                           CPU1:
+        *                              change_huge_pmd(prot_numa=1)
+        *                               pmdp_huge_get_and_clear_notify()
+        * madvise_dontneed()
+        *  zap_pmd_range()
+        *   pmd_trans_huge(*pmd) == 0 (without ptl)
+        *   // skip the pmd
+        *                               set_pmd_at();
+        *                               // pmd is re-established
+        *
+        * The race makes MADV_DONTNEED miss the huge pmd and don't clear it
+        * which may break userspace.
+        *
+        * pmdp_invalidate() is required to make sure we don't miss
+        * dirty/young flags set by hardware.
+        */
+       entry = *pmd;
+       pmdp_invalidate(vma, addr, pmd);
+
+       /*
+        * Recover dirty/young flags.  It relies on pmdp_invalidate to not
+        * corrupt them.
+        */
+       if (pmd_dirty(*pmd))
+               entry = pmd_mkdirty(entry);
+       if (pmd_young(*pmd))
+               entry = pmd_mkyoung(entry);
  
+       entry = pmd_modify(entry, newprot);
+       if (preserve_write)
+               entry = pmd_mk_savedwrite(entry);
+       ret = HPAGE_PMD_NR;
+       set_pmd_at(mm, addr, pmd, entry);
+       BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
+unlock:
+       spin_unlock(ptl);
         return ret;
  }
  
@@ -1828,7 +1859,7 @@ static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud,
         VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma);
         VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud));
  
-       count_vm_event(THP_SPLIT_PMD);
+       count_vm_event(THP_SPLIT_PUD);
  
         pudp_huge_clear_flush_notify(vma, haddr, pud);
  }
@@ -2048,6 +2079,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
                 bool freeze, struct page *page)
  {
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
         pmd_t *pmd;
  
@@ -2055,7 +2087,11 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
         if (!pgd_present(*pgd))
                 return;
  
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       if (!p4d_present(*p4d))
+               return;
+
+       pud = pud_offset(p4d, address);
         if (!pud_present(*pud))
                 return;
  
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index a7aa811b7d14c5a6f3825ad32df773fee5526991..e5828875f7bbd7a770d5c23334a0e3994ffe544f 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4403,7 +4403,9 @@ int hugetlb_reserve_pages(struct inode *inode,
         return 0;
  out_err:
         if (!vma || vma->vm_flags & VM_MAYSHARE)
-               region_abort(resv_map, from, to);
+               /* Don't call region_abort if region_chg failed */
+               if (chg >= 0)
+                       region_abort(resv_map, from, to);
         if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
                 kref_put(&resv_map->refs, resv_map_release);
         return ret;
@@ -4555,7 +4557,8 @@ out:
  int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
  {
         pgd_t *pgd = pgd_offset(mm, *addr);
-       pud_t *pud = pud_offset(pgd, *addr);
+       p4d_t *p4d = p4d_offset(pgd, *addr);
+       pud_t *pud = pud_offset(p4d, *addr);
  
         BUG_ON(page_count(virt_to_page(ptep)) == 0);
         if (page_count(virt_to_page(ptep)) == 1)
@@ -4586,11 +4589,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                         unsigned long addr, unsigned long sz)
  {
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
         pte_t *pte = NULL;
  
         pgd = pgd_offset(mm, addr);
-       pud = pud_alloc(mm, pgd, addr);
+       p4d = p4d_offset(pgd, addr);
+       pud = pud_alloc(mm, p4d, addr);
         if (pud) {
                 if (sz == PUD_SIZE) {
                         pte = (pte_t *)pud;
@@ -4610,18 +4615,22 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
  pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
  {
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
-       pmd_t *pmd = NULL;
+       pmd_t *pmd;
  
         pgd = pgd_offset(mm, addr);
-       if (pgd_present(*pgd)) {
-               pud = pud_offset(pgd, addr);
-               if (pud_present(*pud)) {
-                       if (pud_huge(*pud))
-                               return (pte_t *)pud;
-                       pmd = pmd_offset(pud, addr);
-               }
-       }
+       if (!pgd_present(*pgd))
+               return NULL;
+       p4d = p4d_offset(pgd, addr);
+       if (!p4d_present(*p4d))
+               return NULL;
+       pud = pud_offset(p4d, addr);
+       if (!pud_present(*pud))
+               return NULL;
+       if (pud_huge(*pud))
+               return (pte_t *)pud;
+       pmd = pmd_offset(pud, addr);
         return (pte_t *) pmd;
  }
  
@@ -4644,6 +4653,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
  {
         struct page *page = NULL;
         spinlock_t *ptl;
+       pte_t pte;
  retry:
         ptl = pmd_lockptr(mm, pmd);
         spin_lock(ptl);
@@ -4653,12 +4663,13 @@ retry:
          */
         if (!pmd_huge(*pmd))
                 goto out;
-       if (pmd_present(*pmd)) {
+       pte = huge_ptep_get((pte_t *)pmd);
+       if (pte_present(pte)) {
                 page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
                 if (flags & FOLL_GET)
                         get_page(page);
         } else {
-               if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) {
+               if (is_hugetlb_entry_migration(pte)) {
                         spin_unlock(ptl);
                         __migration_entry_wait(mm, (pte_t *)pmd, ptl);
                         goto retry;
diff --git a/mm/internal.h b/mm/internal.h

index ccfc2a2969f4402bdbfb27e0b48df151f4da68b7..266efaeaa370a46debcc5b6b614a72e33833ac4d 100644 (file)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -481,6 +481,13 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
  enum ttu_flags;
  struct tlbflush_unmap_batch;
  
+
+/*
+ * only for MM internal work items which do not depend on
+ * any allocations or locks which might depend on allocations
+ */
+extern struct workqueue_struct *mm_percpu_wq;
+
  #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
  void try_to_unmap_flush(void);
  void try_to_unmap_flush_dirty(void);
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h

index 1c260e6b3b3c6a1f26fc1e13a0fdb39099bbbf68..dd2dea8eb0771a506c0b510efc79c3fc5253bda5 100644 (file)
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -96,11 +96,6 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
                 << KASAN_SHADOW_SCALE_SHIFT);
  }
  
-static inline bool kasan_report_enabled(void)
-{
-       return !current->kasan_depth;
-}
-
  void kasan_report(unsigned long addr, size_t size,
                 bool is_write, unsigned long ip);
  void kasan_report_double_free(struct kmem_cache *cache, void *object,
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c

index 31238dad85fbc6c630963323c236811c0e4f44f5..b96a5f773d880869c1c84510fbb0063ee63faed9 100644 (file)
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -30,6 +30,9 @@
   */
  unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
  
+#if CONFIG_PGTABLE_LEVELS > 4
+p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss;
+#endif
  #if CONFIG_PGTABLE_LEVELS > 3
  pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
  #endif
@@ -82,10 +85,10 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
         } while (pmd++, addr = next, addr != end);
  }
  
-static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
+static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
                                 unsigned long end)
  {
-       pud_t *pud = pud_offset(pgd, addr);
+       pud_t *pud = pud_offset(p4d, addr);
         unsigned long next;
  
         do {
@@ -107,6 +110,23 @@ static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
         } while (pud++, addr = next, addr != end);
  }
  
+static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
+                               unsigned long end)
+{
+       p4d_t *p4d = p4d_offset(pgd, addr);
+       unsigned long next;
+
+       do {
+               next = p4d_addr_end(addr, end);
+
+               if (p4d_none(*p4d)) {
+                       p4d_populate(&init_mm, p4d,
+                               early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+               }
+               zero_pud_populate(p4d, addr, next);
+       } while (p4d++, addr = next, addr != end);
+}
+
  /**
   * kasan_populate_zero_shadow - populate shadow memory region with
   *                               kasan_zero_page
@@ -125,6 +145,7 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
                 next = pgd_addr_end(addr, end);
  
                 if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
+                       p4d_t *p4d;
                         pud_t *pud;
                         pmd_t *pmd;
  
@@ -135,9 +156,22 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
                          * 3,2 - level page tables where we don't have
                          * puds,pmds, so pgd_populate(), pud_populate()
                          * is noops.
+                        *
+                        * The ifndef is required to avoid build breakage.
+                        *
+                        * With 5level-fixup.h, pgd_populate() is not nop and
+                        * we reference kasan_zero_p4d. It's not defined
+                        * unless 5-level paging enabled.
+                        *
+                        * The ifndef can be dropped once all KASAN-enabled
+                        * architectures will switch to pgtable-nop4d.h.
                          */
-                       pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_pud));
-                       pud = pud_offset(pgd, addr);
+#ifndef __ARCH_HAS_5LEVEL_HACK
+                       pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_p4d));
+#endif
+                       p4d = p4d_offset(pgd, addr);
+                       p4d_populate(&init_mm, p4d, lm_alias(kasan_zero_pud));
+                       pud = pud_offset(p4d, addr);
                         pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd));
                         pmd = pmd_offset(pud, addr);
                         pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte));
@@ -148,6 +182,6 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
                         pgd_populate(&init_mm, pgd,
                                 early_alloc(PAGE_SIZE, NUMA_NO_NODE));
                 }
-               zero_pud_populate(pgd, addr, next);
+               zero_p4d_populate(pgd, addr, next);
         } while (pgd++, addr = next, addr != end);
  }
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c

index 6f1ed16308736918730ea836b5fecafc908e487b..3a8ddf8baf7dc3d52597bf0e53753c0cc17503cd 100644 (file)
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -25,6 +25,7 @@
  #include <linux/printk.h>
  #include <linux/shrinker.h>
  #include <linux/slab.h>
+#include <linux/srcu.h>
  #include <linux/string.h>
  #include <linux/types.h>
  
@@ -103,6 +104,7 @@ static int quarantine_tail;
  /* Total size of all objects in global_quarantine across all batches. */
  static unsigned long quarantine_size;
  static DEFINE_SPINLOCK(quarantine_lock);
+DEFINE_STATIC_SRCU(remove_cache_srcu);
  
  /* Maximum size of the global queue. */
  static unsigned long quarantine_max_size;
@@ -173,17 +175,22 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
         struct qlist_head *q;
         struct qlist_head temp = QLIST_INIT;
  
+       /*
+        * Note: irq must be disabled until after we move the batch to the
+        * global quarantine. Otherwise quarantine_remove_cache() can miss
+        * some objects belonging to the cache if they are in our local temp
+        * list. quarantine_remove_cache() executes on_each_cpu() at the
+        * beginning which ensures that it either sees the objects in per-cpu
+        * lists or in the global quarantine.
+        */
         local_irq_save(flags);
  
         q = this_cpu_ptr(&cpu_quarantine);
         qlist_put(q, &info->quarantine_link, cache->size);
-       if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE))
+       if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
                 qlist_move_all(q, &temp);
  
-       local_irq_restore(flags);
-
-       if (unlikely(!qlist_empty(&temp))) {
-               spin_lock_irqsave(&quarantine_lock, flags);
+               spin_lock(&quarantine_lock);
                 WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
                 qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
                 if (global_quarantine[quarantine_tail].bytes >=
@@ -196,20 +203,33 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
                         if (new_tail != quarantine_head)
                                 quarantine_tail = new_tail;
                 }
-               spin_unlock_irqrestore(&quarantine_lock, flags);
+               spin_unlock(&quarantine_lock);
         }
+
+       local_irq_restore(flags);
  }
  
  void quarantine_reduce(void)
  {
         size_t total_size, new_quarantine_size, percpu_quarantines;
         unsigned long flags;
+       int srcu_idx;
         struct qlist_head to_free = QLIST_INIT;
  
         if (likely(READ_ONCE(quarantine_size) <=
                    READ_ONCE(quarantine_max_size)))
                 return;
  
+       /*
+        * srcu critical section ensures that quarantine_remove_cache()
+        * will not miss objects belonging to the cache while they are in our
+        * local to_free list. srcu is chosen because (1) it gives us private
+        * grace period domain that does not interfere with anything else,
+        * and (2) it allows synchronize_srcu() to return without waiting
+        * if there are no pending read critical sections (which is the
+        * expected case).
+        */
+       srcu_idx = srcu_read_lock(&remove_cache_srcu);
         spin_lock_irqsave(&quarantine_lock, flags);
  
         /*
@@ -237,6 +257,7 @@ void quarantine_reduce(void)
         spin_unlock_irqrestore(&quarantine_lock, flags);
  
         qlist_free_all(&to_free, NULL);
+       srcu_read_unlock(&remove_cache_srcu, srcu_idx);
  }
  
  static void qlist_move_cache(struct qlist_head *from,
@@ -280,12 +301,28 @@ void quarantine_remove_cache(struct kmem_cache *cache)
         unsigned long flags, i;
         struct qlist_head to_free = QLIST_INIT;
  
+       /*
+        * Must be careful to not miss any objects that are being moved from
+        * per-cpu list to the global quarantine in quarantine_put(),
+        * nor objects being freed in quarantine_reduce(). on_each_cpu()
+        * achieves the first goal, while synchronize_srcu() achieves the
+        * second.
+        */
         on_each_cpu(per_cpu_remove_cache, cache, 1);
  
         spin_lock_irqsave(&quarantine_lock, flags);
-       for (i = 0; i < QUARANTINE_BATCHES; i++)
+       for (i = 0; i < QUARANTINE_BATCHES; i++) {
+               if (qlist_empty(&global_quarantine[i]))
+                       continue;
                 qlist_move_cache(&global_quarantine[i], &to_free, cache);
+               /* Scanning whole quarantine can take a while. */
+               spin_unlock_irqrestore(&quarantine_lock, flags);
+               cond_resched();
+               spin_lock_irqsave(&quarantine_lock, flags);
+       }
         spin_unlock_irqrestore(&quarantine_lock, flags);
  
         qlist_free_all(&to_free, cache);
+
+       synchronize_srcu(&remove_cache_srcu);
  }
diff --git a/mm/kasan/report.c b/mm/kasan/report.c

index f479365530b6484bbd5cae42064521fed362961e..ab42a0803f161c6834b1362aefd5ded1990eb04f 100644 (file)
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -13,7 +13,9 @@
   *
   */
  
+#include <linux/bitops.h>
  #include <linux/ftrace.h>
+#include <linux/init.h>
  #include <linux/kernel.h>
  #include <linux/mm.h>
  #include <linux/printk.h>
@@ -293,6 +295,40 @@ static void kasan_report_error(struct kasan_access_info *info)
         kasan_end_report(&flags);
  }
  
+static unsigned long kasan_flags;
+
+#define KASAN_BIT_REPORTED     0
+#define KASAN_BIT_MULTI_SHOT   1
+
+bool kasan_save_enable_multi_shot(void)
+{
+       return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+}
+EXPORT_SYMBOL_GPL(kasan_save_enable_multi_shot);
+
+void kasan_restore_multi_shot(bool enabled)
+{
+       if (!enabled)
+               clear_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+}
+EXPORT_SYMBOL_GPL(kasan_restore_multi_shot);
+
+static int __init kasan_set_multi_shot(char *str)
+{
+       set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+       return 1;
+}
+__setup("kasan_multi_shot", kasan_set_multi_shot);
+
+static inline bool kasan_report_enabled(void)
+{
+       if (current->kasan_depth)
+               return false;
+       if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
+               return true;
+       return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags);
+}
+
  void kasan_report(unsigned long addr, size_t size,
                 bool is_write, unsigned long ip)
  {
diff --git a/mm/kmemleak.c b/mm/kmemleak.c

index 26c874e90b12ef164d7b80171bb8bea979df5b1a..20036d4f9f13d4dc7b5b091e389b8a7b6b2ca32f 100644 (file)
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1416,7 +1416,7 @@ static void kmemleak_scan(void)
         /* data/bss scanning */
         scan_large_block(_sdata, _edata);
         scan_large_block(__bss_start, __bss_stop);
-       scan_large_block(__start_data_ro_after_init, __end_data_ro_after_init);
+       scan_large_block(__start_ro_after_init, __end_ro_after_init);
  
  #ifdef CONFIG_SMP
         /* per-cpu sections scanning */
diff --git a/mm/madvise.c b/mm/madvise.c

index dc5927c812d3d1f9a209fbdbea3a36a61cbde17d..7a2abf0127aef7a9d4879278293d8cab766133e1 100644 (file)
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -513,7 +513,43 @@ static long madvise_dontneed(struct vm_area_struct *vma,
         if (!can_madv_dontneed_vma(vma))
                 return -EINVAL;
  
-       userfaultfd_remove(vma, prev, start, end);
+       if (!userfaultfd_remove(vma, start, end)) {
+               *prev = NULL; /* mmap_sem has been dropped, prev is stale */
+
+               down_read(&current->mm->mmap_sem);
+               vma = find_vma(current->mm, start);
+               if (!vma)
+                       return -ENOMEM;
+               if (start < vma->vm_start) {
+                       /*
+                        * This "vma" under revalidation is the one
+                        * with the lowest vma->vm_start where start
+                        * is also < vma->vm_end. If start <
+                        * vma->vm_start it means an hole materialized
+                        * in the user address space within the
+                        * virtual range passed to MADV_DONTNEED.
+                        */
+                       return -ENOMEM;
+               }
+               if (!can_madv_dontneed_vma(vma))
+                       return -EINVAL;
+               if (end > vma->vm_end) {
+                       /*
+                        * Don't fail if end > vma->vm_end. If the old
+                        * vma was splitted while the mmap_sem was
+                        * released the effect of the concurrent
+                        * operation may not cause MADV_DONTNEED to
+                        * have an undefined result. There may be an
+                        * adjacent next vma that we'll walk
+                        * next. userfaultfd_remove() will generate an
+                        * UFFD_EVENT_REMOVE repetition on the
+                        * end-vma->vm_end range, but the manager can
+                        * handle a repetition fine.
+                        */
+                       end = vma->vm_end;
+               }
+               VM_WARN_ON(start >= end);
+       }
         zap_page_range(vma, start, end - start);
         return 0;
  }
@@ -554,8 +590,10 @@ static long madvise_remove(struct vm_area_struct *vma,
          * mmap_sem.
          */
         get_file(f);
-       userfaultfd_remove(vma, prev, start, end);
-       up_read(&current->mm->mmap_sem);
+       if (userfaultfd_remove(vma, start, end)) {
+               /* mmap_sem was not released by userfaultfd_remove() */
+               up_read(&current->mm->mmap_sem);
+       }
         error = vfs_fallocate(f,
                                 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                                 offset, end - start);
diff --git a/mm/memblock.c b/mm/memblock.c

index b64b47803e529a87d87f3e3f022e97f17ff606be..696f06d17c4e89b676f19c3c3a5a4c1908697caf 100644 (file)
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1118,7 +1118,10 @@ unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
                 }
         } while (left < right);
  
-       return min(PHYS_PFN(type->regions[right].base), max_pfn);
+       if (right == type->cnt)
+               return max_pfn;
+       else
+               return min(PHYS_PFN(type->regions[right].base), max_pfn);
  }
  
  /**
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index c52ec893e241cf6b52764797f6aea5ed56219e23..2bd7541d7c11231431c060ca6cfe84a89f096fe3 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -466,6 +466,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
         struct mem_cgroup_tree_per_node *mctz;
  
         mctz = soft_limit_tree_from_page(page);
+       if (!mctz)
+               return;
         /*
          * Necessary to update all ancestors when hierarchy is used.
          * because their event counter is not touched.
@@ -503,7 +505,8 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
         for_each_node(nid) {
                 mz = mem_cgroup_nodeinfo(memcg, nid);
                 mctz = soft_limit_tree_node(nid);
-               mem_cgroup_remove_exceeded(mz, mctz);
+               if (mctz)
+                       mem_cgroup_remove_exceeded(mz, mctz);
         }
  }
  
@@ -2558,7 +2561,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
          * is empty. Do it lockless to prevent lock bouncing. Races
          * are acceptable as soft limit is best effort anyway.
          */
-       if (RB_EMPTY_ROOT(&mctz->rb_root))
+       if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root))
                 return 0;
  
         /*
@@ -4135,17 +4138,22 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
         kfree(memcg->nodeinfo[node]);
  }
  
-static void mem_cgroup_free(struct mem_cgroup *memcg)
+static void __mem_cgroup_free(struct mem_cgroup *memcg)
  {
         int node;
  
-       memcg_wb_domain_exit(memcg);
         for_each_node(node)
                 free_mem_cgroup_per_node_info(memcg, node);
         free_percpu(memcg->stat);
         kfree(memcg);
  }
  
+static void mem_cgroup_free(struct mem_cgroup *memcg)
+{
+       memcg_wb_domain_exit(memcg);
+       __mem_cgroup_free(memcg);
+}
+
  static struct mem_cgroup *mem_cgroup_alloc(void)
  {
         struct mem_cgroup *memcg;
@@ -4196,7 +4204,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
  fail:
         if (memcg->id.id > 0)
                 idr_remove(&mem_cgroup_idr, memcg->id.id);
-       mem_cgroup_free(memcg);
+       __mem_cgroup_free(memcg);
         return NULL;
  }
  
diff --git a/mm/memory.c b/mm/memory.c

index a97a4cec2e1fcd94c5e1eb5f1af334749efe64fa..235ba51b2fbf07ffeeeb6b70d6522b4b0addb3de 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -445,7 +445,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
         mm_dec_nr_pmds(tlb->mm);
  }
  
-static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
                                 unsigned long addr, unsigned long end,
                                 unsigned long floor, unsigned long ceiling)
  {
@@ -454,7 +454,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
         unsigned long start;
  
         start = addr;
-       pud = pud_offset(pgd, addr);
+       pud = pud_offset(p4d, addr);
         do {
                 next = pud_addr_end(addr, end);
                 if (pud_none_or_clear_bad(pud))
@@ -462,6 +462,39 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
                 free_pmd_range(tlb, pud, addr, next, floor, ceiling);
         } while (pud++, addr = next, addr != end);
  
+       start &= P4D_MASK;
+       if (start < floor)
+               return;
+       if (ceiling) {
+               ceiling &= P4D_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
+               return;
+
+       pud = pud_offset(p4d, start);
+       p4d_clear(p4d);
+       pud_free_tlb(tlb, pud, start);
+}
+
+static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
+                               unsigned long addr, unsigned long end,
+                               unsigned long floor, unsigned long ceiling)
+{
+       p4d_t *p4d;
+       unsigned long next;
+       unsigned long start;
+
+       start = addr;
+       p4d = p4d_offset(pgd, addr);
+       do {
+               next = p4d_addr_end(addr, end);
+               if (p4d_none_or_clear_bad(p4d))
+                       continue;
+               free_pud_range(tlb, p4d, addr, next, floor, ceiling);
+       } while (p4d++, addr = next, addr != end);
+
         start &= PGDIR_MASK;
         if (start < floor)
                 return;
@@ -473,9 +506,9 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
         if (end - 1 > ceiling - 1)
                 return;
  
-       pud = pud_offset(pgd, start);
+       p4d = p4d_offset(pgd, start);
         pgd_clear(pgd);
-       pud_free_tlb(tlb, pud, start);
+       p4d_free_tlb(tlb, p4d, start);
  }
  
  /*
@@ -539,7 +572,7 @@ void free_pgd_range(struct mmu_gather *tlb,
                 next = pgd_addr_end(addr, end);
                 if (pgd_none_or_clear_bad(pgd))
                         continue;
-               free_pud_range(tlb, pgd, addr, next, floor, ceiling);
+               free_p4d_range(tlb, pgd, addr, next, floor, ceiling);
         } while (pgd++, addr = next, addr != end);
  }
  
@@ -658,7 +691,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
                           pte_t pte, struct page *page)
  {
         pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
-       pud_t *pud = pud_offset(pgd, addr);
+       p4d_t *p4d = p4d_offset(pgd, addr);
+       pud_t *pud = pud_offset(p4d, addr);
         pmd_t *pmd = pmd_offset(pud, addr);
         struct address_space *mapping;
         pgoff_t index;
@@ -1023,16 +1057,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
  }
  
  static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-               pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+               p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma,
                 unsigned long addr, unsigned long end)
  {
         pud_t *src_pud, *dst_pud;
         unsigned long next;
  
-       dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
+       dst_pud = pud_alloc(dst_mm, dst_p4d, addr);
         if (!dst_pud)
                 return -ENOMEM;
-       src_pud = pud_offset(src_pgd, addr);
+       src_pud = pud_offset(src_p4d, addr);
         do {
                 next = pud_addr_end(addr, end);
                 if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) {
@@ -1056,6 +1090,28 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src
         return 0;
  }
  
+static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+               pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+               unsigned long addr, unsigned long end)
+{
+       p4d_t *src_p4d, *dst_p4d;
+       unsigned long next;
+
+       dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr);
+       if (!dst_p4d)
+               return -ENOMEM;
+       src_p4d = p4d_offset(src_pgd, addr);
+       do {
+               next = p4d_addr_end(addr, end);
+               if (p4d_none_or_clear_bad(src_p4d))
+                       continue;
+               if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d,
+                                               vma, addr, next))
+                       return -ENOMEM;
+       } while (dst_p4d++, src_p4d++, addr = next, addr != end);
+       return 0;
+}
+
  int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 struct vm_area_struct *vma)
  {
@@ -1111,7 +1167,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 next = pgd_addr_end(addr, end);
                 if (pgd_none_or_clear_bad(src_pgd))
                         continue;
-               if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+               if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd,
                                             vma, addr, next))) {
                         ret = -ENOMEM;
                         break;
@@ -1267,14 +1323,14 @@ next:
  }
  
  static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
-                               struct vm_area_struct *vma, pgd_t *pgd,
+                               struct vm_area_struct *vma, p4d_t *p4d,
                                 unsigned long addr, unsigned long end,
                                 struct zap_details *details)
  {
         pud_t *pud;
         unsigned long next;
  
-       pud = pud_offset(pgd, addr);
+       pud = pud_offset(p4d, addr);
         do {
                 next = pud_addr_end(addr, end);
                 if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
@@ -1295,6 +1351,25 @@ next:
         return addr;
  }
  
+static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pgd_t *pgd,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
+{
+       p4d_t *p4d;
+       unsigned long next;
+
+       p4d = p4d_offset(pgd, addr);
+       do {
+               next = p4d_addr_end(addr, end);
+               if (p4d_none_or_clear_bad(p4d))
+                       continue;
+               next = zap_pud_range(tlb, vma, p4d, addr, next, details);
+       } while (p4d++, addr = next, addr != end);
+
+       return addr;
+}
+
  void unmap_page_range(struct mmu_gather *tlb,
                              struct vm_area_struct *vma,
                              unsigned long addr, unsigned long end,
@@ -1310,7 +1385,7 @@ void unmap_page_range(struct mmu_gather *tlb,
                 next = pgd_addr_end(addr, end);
                 if (pgd_none_or_clear_bad(pgd))
                         continue;
-               next = zap_pud_range(tlb, vma, pgd, addr, next, details);
+               next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
         } while (pgd++, addr = next, addr != end);
         tlb_end_vma(tlb, vma);
  }
@@ -1465,16 +1540,24 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes);
  pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
                         spinlock_t **ptl)
  {
-       pgd_t *pgd = pgd_offset(mm, addr);
-       pud_t *pud = pud_alloc(mm, pgd, addr);
-       if (pud) {
-               pmd_t *pmd = pmd_alloc(mm, pud, addr);
-               if (pmd) {
-                       VM_BUG_ON(pmd_trans_huge(*pmd));
-                       return pte_alloc_map_lock(mm, pmd, addr, ptl);
-               }
-       }
-       return NULL;
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       pgd = pgd_offset(mm, addr);
+       p4d = p4d_alloc(mm, pgd, addr);
+       if (!p4d)
+               return NULL;
+       pud = pud_alloc(mm, p4d, addr);
+       if (!pud)
+               return NULL;
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return NULL;
+
+       VM_BUG_ON(pmd_trans_huge(*pmd));
+       return pte_alloc_map_lock(mm, pmd, addr, ptl);
  }
  
  /*
@@ -1740,7 +1823,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
         return 0;
  }
  
-static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
                         unsigned long addr, unsigned long end,
                         unsigned long pfn, pgprot_t prot)
  {
@@ -1748,7 +1831,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
         unsigned long next;
  
         pfn -= addr >> PAGE_SHIFT;
-       pud = pud_alloc(mm, pgd, addr);
+       pud = pud_alloc(mm, p4d, addr);
         if (!pud)
                 return -ENOMEM;
         do {
@@ -1760,6 +1843,26 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
         return 0;
  }
  
+static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
+                       unsigned long addr, unsigned long end,
+                       unsigned long pfn, pgprot_t prot)
+{
+       p4d_t *p4d;
+       unsigned long next;
+
+       pfn -= addr >> PAGE_SHIFT;
+       p4d = p4d_alloc(mm, pgd, addr);
+       if (!p4d)
+               return -ENOMEM;
+       do {
+               next = p4d_addr_end(addr, end);
+               if (remap_pud_range(mm, p4d, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot))
+                       return -ENOMEM;
+       } while (p4d++, addr = next, addr != end);
+       return 0;
+}
+
  /**
   * remap_pfn_range - remap kernel memory to userspace
   * @vma: user vma to map to
@@ -1816,7 +1919,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
         flush_cache_range(vma, addr, end);
         do {
                 next = pgd_addr_end(addr, end);
-               err = remap_pud_range(mm, pgd, addr, next,
+               err = remap_p4d_range(mm, pgd, addr, next,
                                 pfn + (addr >> PAGE_SHIFT), prot);
                 if (err)
                         break;
@@ -1932,7 +2035,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
         return err;
  }
  
-static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
+static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
                                      unsigned long addr, unsigned long end,
                                      pte_fn_t fn, void *data)
  {
@@ -1940,7 +2043,7 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
         unsigned long next;
         int err;
  
-       pud = pud_alloc(mm, pgd, addr);
+       pud = pud_alloc(mm, p4d, addr);
         if (!pud)
                 return -ENOMEM;
         do {
@@ -1952,6 +2055,26 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
         return err;
  }
  
+static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
+                                    unsigned long addr, unsigned long end,
+                                    pte_fn_t fn, void *data)
+{
+       p4d_t *p4d;
+       unsigned long next;
+       int err;
+
+       p4d = p4d_alloc(mm, pgd, addr);
+       if (!p4d)
+               return -ENOMEM;
+       do {
+               next = p4d_addr_end(addr, end);
+               err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
+               if (err)
+                       break;
+       } while (p4d++, addr = next, addr != end);
+       return err;
+}
+
  /*
   * Scan a region of virtual memory, filling in page tables as necessary
   * and calling a provided function on each leaf page table.
@@ -1970,7 +2093,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
         pgd = pgd_offset(mm, addr);
         do {
                 next = pgd_addr_end(addr, end);
-               err = apply_to_pud_range(mm, pgd, addr, next, fn, data);
+               err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
                 if (err)
                         break;
         } while (pgd++, addr = next, addr != end);
@@ -3653,11 +3776,15 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
         };
         struct mm_struct *mm = vma->vm_mm;
         pgd_t *pgd;
+       p4d_t *p4d;
         int ret;
  
         pgd = pgd_offset(mm, address);
+       p4d = p4d_alloc(mm, pgd, address);
+       if (!p4d)
+               return VM_FAULT_OOM;
  
-       vmf.pud = pud_alloc(mm, pgd, address);
+       vmf.pud = pud_alloc(mm, p4d, address);
         if (!vmf.pud)
                 return VM_FAULT_OOM;
         if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) {
@@ -3779,12 +3906,35 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
  }
  EXPORT_SYMBOL_GPL(handle_mm_fault);
  
+#ifndef __PAGETABLE_P4D_FOLDED
+/*
+ * Allocate p4d page table.
+ * We've already handled the fast-path in-line.
+ */
+int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+       p4d_t *new = p4d_alloc_one(mm, address);
+       if (!new)
+               return -ENOMEM;
+
+       smp_wmb(); /* See comment in __pte_alloc */
+
+       spin_lock(&mm->page_table_lock);
+       if (pgd_present(*pgd))          /* Another has populated it */
+               p4d_free(mm, new);
+       else
+               pgd_populate(mm, pgd, new);
+       spin_unlock(&mm->page_table_lock);
+       return 0;
+}
+#endif /* __PAGETABLE_P4D_FOLDED */
+
  #ifndef __PAGETABLE_PUD_FOLDED
  /*
   * Allocate page upper directory.
   * We've already handled the fast-path in-line.
   */
-int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
  {
         pud_t *new = pud_alloc_one(mm, address);
         if (!new)
@@ -3793,10 +3943,17 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
         smp_wmb(); /* See comment in __pte_alloc */
  
         spin_lock(&mm->page_table_lock);
-       if (pgd_present(*pgd))          /* Another has populated it */
+#ifndef __ARCH_HAS_5LEVEL_HACK
+       if (p4d_present(*p4d))          /* Another has populated it */
                 pud_free(mm, new);
         else
-               pgd_populate(mm, pgd, new);
+               p4d_populate(mm, p4d, new);
+#else
+       if (pgd_present(*p4d))          /* Another has populated it */
+               pud_free(mm, new);
+       else
+               pgd_populate(mm, p4d, new);
+#endif /* __ARCH_HAS_5LEVEL_HACK */
         spin_unlock(&mm->page_table_lock);
         return 0;
  }
@@ -3839,6 +3996,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
                 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
  {
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
         pmd_t *pmd;
         pte_t *ptep;
@@ -3847,7 +4005,11 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
                 goto out;
  
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
+               goto out;
+
+       pud = pud_offset(p4d, address);
         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
                 goto out;
  
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index 295479b792ec488b6d984ef98e7e715f6ac162b4..6fa7208bcd564ec8fb6bcf25e206aef9bd724ecb 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -125,9 +125,12 @@ void put_online_mems(void)
  
  }
  
+/* Serializes write accesses to mem_hotplug.active_writer. */
+static DEFINE_MUTEX(memory_add_remove_lock);
+
  void mem_hotplug_begin(void)
  {
-       assert_held_device_hotplug();
+       mutex_lock(&memory_add_remove_lock);
  
         mem_hotplug.active_writer = current;
  
@@ -147,6 +150,7 @@ void mem_hotplug_done(void)
         mem_hotplug.active_writer = NULL;
         mutex_unlock(&mem_hotplug.lock);
         memhp_lock_release();
+       mutex_unlock(&memory_add_remove_lock);
  }
  
  /* add this memory to iomem resource */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 75b2745bac4145933a5b969e7b8b2a36fc73d45d..37d0b334bfe9f09222a9626a99b91f4bfca9d784 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1529,7 +1529,6 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
  COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
                        compat_ulong_t, maxnode)
  {
-       long err = 0;
         unsigned long __user *nm = NULL;
         unsigned long nr_bits, alloc_size;
         DECLARE_BITMAP(bm, MAX_NUMNODES);
@@ -1538,14 +1537,13 @@ COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
         alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
  
         if (nmask) {
-               err = compat_get_bitmap(bm, nmask, nr_bits);
+               if (compat_get_bitmap(bm, nmask, nr_bits))
+                       return -EFAULT;
                 nm = compat_alloc_user_space(alloc_size);
-               err |= copy_to_user(nm, bm, alloc_size);
+               if (copy_to_user(nm, bm, alloc_size))
+                       return -EFAULT;
         }
  
-       if (err)
-               return -EFAULT;
-
         return sys_set_mempolicy(mode, nm, nr_bits+1);
  }
  
@@ -1553,7 +1551,6 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
                        compat_ulong_t, mode, compat_ulong_t __user *, nmask,
                        compat_ulong_t, maxnode, compat_ulong_t, flags)
  {
-       long err = 0;
         unsigned long __user *nm = NULL;
         unsigned long nr_bits, alloc_size;
         nodemask_t bm;
@@ -1562,14 +1559,13 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
         alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
  
         if (nmask) {
-               err = compat_get_bitmap(nodes_addr(bm), nmask, nr_bits);
+               if (compat_get_bitmap(nodes_addr(bm), nmask, nr_bits))
+                       return -EFAULT;
                 nm = compat_alloc_user_space(alloc_size);
-               err |= copy_to_user(nm, nodes_addr(bm), alloc_size);
+               if (copy_to_user(nm, nodes_addr(bm), alloc_size))
+                       return -EFAULT;
         }
  
-       if (err)
-               return -EFAULT;
-
         return sys_mbind(start, len, mode, nm, nr_bits+1, flags);
  }
  
diff --git a/mm/migrate.c b/mm/migrate.c

index 9a0897a14d37be3d7759d577f98060fa75c7be0f..738f1d5f83503e546960d005a034abf2dde2c0e7 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -184,9 +184,9 @@ void putback_movable_pages(struct list_head *l)
                         unlock_page(page);
                         put_page(page);
                 } else {
-                       putback_lru_page(page);
                         dec_node_page_state(page, NR_ISOLATED_ANON +
                                         page_is_file_cache(page));
+                       putback_lru_page(page);
                 }
         }
  }
@@ -209,8 +209,11 @@ static int remove_migration_pte(struct page *page, struct vm_area_struct *vma,
  
         VM_BUG_ON_PAGE(PageTail(page), page);
         while (page_vma_mapped_walk(&pvmw)) {
-               new = page - pvmw.page->index +
-                       linear_page_index(vma, pvmw.address);
+               if (PageKsm(page))
+                       new = page;
+               else
+                       new = page - pvmw.page->index +
+                               linear_page_index(vma, pvmw.address);
  
                 get_page(new);
                 pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
diff --git a/mm/mlock.c b/mm/mlock.c

index 1050511f8b2bdbfbd55a69b8fcfc41ea6cd51b72..0dd9ca18e19ed7ddb499a480c5831c312791b10a 100644 (file)
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -380,6 +380,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
         pte = get_locked_pte(vma->vm_mm, start, &ptl);
         /* Make sure we do not cross the page table boundary */
         end = pgd_addr_end(start, end);
+       end = p4d_addr_end(start, end);
         end = pud_addr_end(start, end);
         end = pmd_addr_end(start, end);
  
@@ -442,7 +443,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
  
         while (start < end) {
                 struct page *page;
-               unsigned int page_mask;
+               unsigned int page_mask = 0;
                 unsigned long page_increm;
                 struct pagevec pvec;
                 struct zone *zone;
@@ -456,8 +457,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
                  * suits munlock very well (and if somehow an abnormal page
                  * has sneaked into the range, we won't oops here: great).
                  */
-               page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
-                               &page_mask);
+               page = follow_page(vma, start, FOLL_GET | FOLL_DUMP);
  
                 if (page && !IS_ERR(page)) {
                         if (PageTransTail(page)) {
@@ -468,8 +468,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
                                 /*
                                  * Any THP page found by follow_page_mask() may
                                  * have gotten split before reaching
-                                * munlock_vma_page(), so we need to recompute
-                                * the page_mask here.
+                                * munlock_vma_page(), so we need to compute
+                                * the page_mask here instead.
                                  */
                                 page_mask = munlock_vma_page(page);
                                 unlock_page(page);
diff --git a/mm/mprotect.c b/mm/mprotect.c

index 848e946b08e58e31bf6482bd091338a43bb66fe1..8edd0d576254d4c6a3974a42dd2a27eff17fa8bf 100644 (file)
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -193,14 +193,14 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
  }
  
  static inline unsigned long change_pud_range(struct vm_area_struct *vma,
-               pgd_t *pgd, unsigned long addr, unsigned long end,
+               p4d_t *p4d, unsigned long addr, unsigned long end,
                 pgprot_t newprot, int dirty_accountable, int prot_numa)
  {
         pud_t *pud;
         unsigned long next;
         unsigned long pages = 0;
  
-       pud = pud_offset(pgd, addr);
+       pud = pud_offset(p4d, addr);
         do {
                 next = pud_addr_end(addr, end);
                 if (pud_none_or_clear_bad(pud))
@@ -212,6 +212,26 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma,
         return pages;
  }
  
+static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
+               pgd_t *pgd, unsigned long addr, unsigned long end,
+               pgprot_t newprot, int dirty_accountable, int prot_numa)
+{
+       p4d_t *p4d;
+       unsigned long next;
+       unsigned long pages = 0;
+
+       p4d = p4d_offset(pgd, addr);
+       do {
+               next = p4d_addr_end(addr, end);
+               if (p4d_none_or_clear_bad(p4d))
+                       continue;
+               pages += change_pud_range(vma, p4d, addr, next, newprot,
+                                dirty_accountable, prot_numa);
+       } while (p4d++, addr = next, addr != end);
+
+       return pages;
+}
+
  static unsigned long change_protection_range(struct vm_area_struct *vma,
                 unsigned long addr, unsigned long end, pgprot_t newprot,
                 int dirty_accountable, int prot_numa)
@@ -230,7 +250,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
                 next = pgd_addr_end(addr, end);
                 if (pgd_none_or_clear_bad(pgd))
                         continue;
-               pages += change_pud_range(vma, pgd, addr, next, newprot,
+               pages += change_p4d_range(vma, pgd, addr, next, newprot,
                                  dirty_accountable, prot_numa);
         } while (pgd++, addr = next, addr != end);
  
diff --git a/mm/mremap.c b/mm/mremap.c

index 8233b0105c8258ec5757c42c0a65e34b2908272c..cd8a1b199ef9496ef63a50d97f92e648b8eecd58 100644 (file)
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -32,6 +32,7 @@
  static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
  {
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
         pmd_t *pmd;
  
@@ -39,7 +40,11 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
         if (pgd_none_or_clear_bad(pgd))
                 return NULL;
  
-       pud = pud_offset(pgd, addr);
+       p4d = p4d_offset(pgd, addr);
+       if (p4d_none_or_clear_bad(p4d))
+               return NULL;
+
+       pud = pud_offset(p4d, addr);
         if (pud_none_or_clear_bad(pud))
                 return NULL;
  
@@ -54,11 +59,15 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
                             unsigned long addr)
  {
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
         pmd_t *pmd;
  
         pgd = pgd_offset(mm, addr);
-       pud = pud_alloc(mm, pgd, addr);
+       p4d = p4d_alloc(mm, pgd, addr);
+       if (!p4d)
+               return NULL;
+       pud = pud_alloc(mm, p4d, addr);
         if (!pud)
                 return NULL;
  
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index eaa64d2ffdc553af8ce6146bfa60ba73908f8b6a..07efbc3a86567676986105005f77c64f9f99597a 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -873,7 +873,8 @@ done_merging:
                 higher_page = page + (combined_pfn - pfn);
                 buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1);
                 higher_buddy = higher_page + (buddy_pfn - combined_pfn);
-               if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
+               if (pfn_valid_within(buddy_pfn) &&
+                   page_is_buddy(higher_page, higher_buddy, order + 1)) {
                         list_add_tail(&page->lru,
                                 &zone->free_area[order].free_list[migratetype]);
                         goto out;
@@ -1089,10 +1090,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
  {
         int migratetype = 0;
         int batch_free = 0;
-       unsigned long nr_scanned, flags;
+       unsigned long nr_scanned;
         bool isolated_pageblocks;
  
-       spin_lock_irqsave(&zone->lock, flags);
+       spin_lock(&zone->lock);
         isolated_pageblocks = has_isolate_pageblock(zone);
         nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
         if (nr_scanned)
@@ -1141,7 +1142,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                         trace_mm_page_pcpu_drain(page, 0, mt);
                 } while (--count && --batch_free && !list_empty(list));
         }
-       spin_unlock_irqrestore(&zone->lock, flags);
+       spin_unlock(&zone->lock);
  }
  
  static void free_one_page(struct zone *zone,
@@ -1149,9 +1150,8 @@ static void free_one_page(struct zone *zone,
                                 unsigned int order,
                                 int migratetype)
  {
-       unsigned long nr_scanned, flags;
-       spin_lock_irqsave(&zone->lock, flags);
-       __count_vm_events(PGFREE, 1 << order);
+       unsigned long nr_scanned;
+       spin_lock(&zone->lock);
         nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
         if (nr_scanned)
                 __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
@@ -1161,7 +1161,7 @@ static void free_one_page(struct zone *zone,
                 migratetype = get_pfnblock_migratetype(page, pfn);
         }
         __free_one_page(page, pfn, zone, order, migratetype);
-       spin_unlock_irqrestore(&zone->lock, flags);
+       spin_unlock(&zone->lock);
  }
  
  static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -1239,6 +1239,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
  
  static void __free_pages_ok(struct page *page, unsigned int order)
  {
+       unsigned long flags;
         int migratetype;
         unsigned long pfn = page_to_pfn(page);
  
@@ -1246,7 +1247,10 @@ static void __free_pages_ok(struct page *page, unsigned int order)
                 return;
  
         migratetype = get_pfnblock_migratetype(page, pfn);
+       local_irq_save(flags);
+       __count_vm_events(PGFREE, 1 << order);
         free_one_page(page_zone(page), page, pfn, order, migratetype);
+       local_irq_restore(flags);
  }
  
  static void __init __free_pages_boot_core(struct page *page, unsigned int order)
@@ -2218,9 +2222,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
                         int migratetype, bool cold)
  {
         int i, alloced = 0;
-       unsigned long flags;
  
-       spin_lock_irqsave(&zone->lock, flags);
+       spin_lock(&zone->lock);
         for (i = 0; i < count; ++i) {
                 struct page *page = __rmqueue(zone, order, migratetype);
                 if (unlikely(page == NULL))
@@ -2256,7 +2259,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
          * pages added to the pcp list.
          */
         __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
-       spin_unlock_irqrestore(&zone->lock, flags);
+       spin_unlock(&zone->lock);
         return alloced;
  }
  
@@ -2372,6 +2375,13 @@ void drain_all_pages(struct zone *zone)
          */
         static cpumask_t cpus_with_pcps;
  
+       /*
+        * Make sure nobody triggers this path before mm_percpu_wq is fully
+        * initialized.
+        */
+       if (WARN_ON_ONCE(!mm_percpu_wq))
+               return;
+
         /* Workqueues cannot recurse */
         if (current->flags & PF_WQ_WORKER)
                 return;
@@ -2421,7 +2431,7 @@ void drain_all_pages(struct zone *zone)
         for_each_cpu(cpu, &cpus_with_pcps) {
                 struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
                 INIT_WORK(work, drain_local_pages_wq);
-               schedule_work_on(cpu, work);
+               queue_work_on(cpu, mm_percpu_wq, work);
         }
         for_each_cpu(cpu, &cpus_with_pcps)
                 flush_work(per_cpu_ptr(&pcpu_drain, cpu));
@@ -2477,20 +2487,17 @@ void free_hot_cold_page(struct page *page, bool cold)
  {
         struct zone *zone = page_zone(page);
         struct per_cpu_pages *pcp;
+       unsigned long flags;
         unsigned long pfn = page_to_pfn(page);
         int migratetype;
  
-       if (in_interrupt()) {
-               __free_pages_ok(page, 0);
-               return;
-       }
-
         if (!free_pcp_prepare(page))
                 return;
  
         migratetype = get_pfnblock_migratetype(page, pfn);
         set_pcppage_migratetype(page, migratetype);
-       preempt_disable();
+       local_irq_save(flags);
+       __count_vm_event(PGFREE);
  
         /*
          * We only track unmovable, reclaimable and movable on pcp lists.
@@ -2507,7 +2514,6 @@ void free_hot_cold_page(struct page *page, bool cold)
                 migratetype = MIGRATE_MOVABLE;
         }
  
-       __count_vm_event(PGFREE);
         pcp = &this_cpu_ptr(zone->pageset)->pcp;
         if (!cold)
                 list_add(&page->lru, &pcp->lists[migratetype]);
@@ -2521,7 +2527,7 @@ void free_hot_cold_page(struct page *page, bool cold)
         }
  
  out:
-       preempt_enable();
+       local_irq_restore(flags);
  }
  
  /*
@@ -2646,8 +2652,6 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
  {
         struct page *page;
  
-       VM_BUG_ON(in_interrupt());
-
         do {
                 if (list_empty(list)) {
                         pcp->count += rmqueue_bulk(zone, 0,
@@ -2678,8 +2682,9 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
         struct list_head *list;
         bool cold = ((gfp_flags & __GFP_COLD) != 0);
         struct page *page;
+       unsigned long flags;
  
-       preempt_disable();
+       local_irq_save(flags);
         pcp = &this_cpu_ptr(zone->pageset)->pcp;
         list = &pcp->lists[migratetype];
         page = __rmqueue_pcplist(zone,  migratetype, cold, pcp, list);
@@ -2687,7 +2692,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
                 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
                 zone_statistics(preferred_zone, zone);
         }
-       preempt_enable();
+       local_irq_restore(flags);
         return page;
  }
  
@@ -2703,7 +2708,7 @@ struct page *rmqueue(struct zone *preferred_zone,
         unsigned long flags;
         struct page *page;
  
-       if (likely(order == 0) && !in_interrupt()) {
+       if (likely(order == 0)) {
                 page = rmqueue_pcplist(preferred_zone, zone, order,
                                 gfp_flags, migratetype);
                 goto out;
@@ -4518,13 +4523,13 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                         K(node_page_state(pgdat, NR_FILE_MAPPED)),
                         K(node_page_state(pgdat, NR_FILE_DIRTY)),
                         K(node_page_state(pgdat, NR_WRITEBACK)),
+                       K(node_page_state(pgdat, NR_SHMEM)),
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
                         K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR),
                         K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)
                                         * HPAGE_PMD_NR),
                         K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
  #endif
-                       K(node_page_state(pgdat, NR_SHMEM)),
                         K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
                         K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
                         node_page_state(pgdat, NR_PAGES_SCANNED),
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c

index a23001a22c151886919d3c24508d64ed1a7c8c16..de9c40d7304aa0e714bdd32abe79517ec3d73038 100644 (file)
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -104,18 +104,15 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
         struct mm_struct *mm = pvmw->vma->vm_mm;
         struct page *page = pvmw->page;
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
  
         /* The only possible pmd mapping has been handled on last iteration */
         if (pvmw->pmd && !pvmw->pte)
                 return not_found(pvmw);
  
-       /* Only for THP, seek to next pte entry makes sense */
-       if (pvmw->pte) {
-               if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
-                       return not_found(pvmw);
+       if (pvmw->pte)
                 goto next_pte;
-       }
  
         if (unlikely(PageHuge(pvmw->page))) {
                 /* when pud is not present, pte will be NULL */
@@ -133,7 +130,10 @@ restart:
         pgd = pgd_offset(mm, pvmw->address);
         if (!pgd_present(*pgd))
                 return false;
-       pud = pud_offset(pgd, pvmw->address);
+       p4d = p4d_offset(pgd, pvmw->address);
+       if (!p4d_present(*p4d))
+               return false;
+       pud = pud_offset(p4d, pvmw->address);
         if (!pud_present(*pud))
                 return false;
         pvmw->pmd = pmd_offset(pud, pvmw->address);
@@ -161,9 +161,14 @@ restart:
         while (1) {
                 if (check_pte(pvmw))
                         return true;
-next_pte:      do {
+next_pte:
+               /* Seek to next pte only makes sense for THP */
+               if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
+                       return not_found(pvmw);
+               do {
                         pvmw->address += PAGE_SIZE;
-                       if (pvmw->address >=
+                       if (pvmw->address >= pvmw->vma->vm_end ||
+                           pvmw->address >=
                                         __vma_address(pvmw->page, pvmw->vma) +
                                         hpage_nr_pages(pvmw->page) * PAGE_SIZE)
                                 return not_found(pvmw);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c

index 03761577ae86e462cf2a7218892be7a28ae5877d..60f7856e508fb90e6010feadad2233f4d148341e 100644 (file)
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -69,14 +69,14 @@ again:
         return err;
  }
  
-static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
                           struct mm_walk *walk)
  {
         pud_t *pud;
         unsigned long next;
         int err = 0;
  
-       pud = pud_offset(pgd, addr);
+       pud = pud_offset(p4d, addr);
         do {
   again:
                 next = pud_addr_end(addr, end);
@@ -113,6 +113,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
         return err;
  }
  
+static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+                         struct mm_walk *walk)
+{
+       p4d_t *p4d;
+       unsigned long next;
+       int err = 0;
+
+       p4d = p4d_offset(pgd, addr);
+       do {
+               next = p4d_addr_end(addr, end);
+               if (p4d_none_or_clear_bad(p4d)) {
+                       if (walk->pte_hole)
+                               err = walk->pte_hole(addr, next, walk);
+                       if (err)
+                               break;
+                       continue;
+               }
+               if (walk->pmd_entry || walk->pte_entry)
+                       err = walk_pud_range(p4d, addr, next, walk);
+               if (err)
+                       break;
+       } while (p4d++, addr = next, addr != end);
+
+       return err;
+}
+
  static int walk_pgd_range(unsigned long addr, unsigned long end,
                           struct mm_walk *walk)
  {
@@ -131,7 +157,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
                         continue;
                 }
                 if (walk->pmd_entry || walk->pte_entry)
-                       err = walk_pud_range(pgd, addr, next, walk);
+                       err = walk_p4d_range(pgd, addr, next, walk);
                 if (err)
                         break;
         } while (pgd++, addr = next, addr != end);
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c

index 538998a137d24e069969dcc3ed00cedc6c25616f..9ac639499bd1146347557141b10f1135ee2c0048 100644 (file)
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -21,7 +21,6 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
  
  /**
   * pcpu_get_pages - get temp pages array
- * @chunk: chunk of interest
   *
   * Returns pointer to array of pointers to struct page which can be indexed
   * with pcpu_page_idx().  Note that there is only one array and accesses
@@ -30,7 +29,7 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
   * RETURNS:
   * Pointer to temp pages array on success.
   */
-static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc)
+static struct page **pcpu_get_pages(void)
  {
         static struct page **pages;
         size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
@@ -275,7 +274,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
  {
         struct page **pages;
  
-       pages = pcpu_get_pages(chunk);
+       pages = pcpu_get_pages();
         if (!pages)
                 return -ENOMEM;
  
@@ -313,7 +312,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
          * successful population attempt so the temp pages array must
          * be available now.
          */
-       pages = pcpu_get_pages(chunk);
+       pages = pcpu_get_pages();
         BUG_ON(!pages);
  
         /* unmap and free */
diff --git a/mm/percpu.c b/mm/percpu.c

index 5696039b5c0707eddcb652bc120a8556ca3cc58b..60a6488e9e6d49d5e9c5d4b611a5f5b037342316 100644 (file)
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1011,8 +1011,11 @@ area_found:
                 mutex_unlock(&pcpu_alloc_mutex);
         }
  
-       if (chunk != pcpu_reserved_chunk)
+       if (chunk != pcpu_reserved_chunk) {
+               spin_lock_irqsave(&pcpu_lock, flags);
                 pcpu_nr_empty_pop_pages -= occ_pages;
+               spin_unlock_irqrestore(&pcpu_lock, flags);
+       }
  
         if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
                 pcpu_schedule_balance_work();
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c

index 4ed5908c65b0f17d29f128f0f0b6e21ca9a53c36..c99d9512a45b8a1599f0d679ec92d9e4511b3d68 100644 (file)
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -22,6 +22,12 @@ void pgd_clear_bad(pgd_t *pgd)
         pgd_clear(pgd);
  }
  
+void p4d_clear_bad(p4d_t *p4d)
+{
+       p4d_ERROR(*p4d);
+       p4d_clear(p4d);
+}
+
  void pud_clear_bad(pud_t *pud)
  {
         pud_ERROR(*pud);
diff --git a/mm/rmap.c b/mm/rmap.c

index 2da487d6cea83b4f51db93bcbd05feaad31b927c..f6838015810f5610abe039daec170aa1da634422 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -684,6 +684,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
  pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
  {
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
         pmd_t *pmd = NULL;
         pmd_t pmde;
@@ -692,7 +693,11 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
         if (!pgd_present(*pgd))
                 goto out;
  
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       if (!p4d_present(*p4d))
+               goto out;
+
+       pud = pud_offset(p4d, address);
         if (!pud_present(*pud))
                 goto out;
  
@@ -1154,7 +1159,7 @@ void page_add_file_rmap(struct page *page, bool compound)
                         goto out;
         }
         __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr);
-       mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+       mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr);
  out:
         unlock_page_memcg(page);
  }
@@ -1194,7 +1199,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
          * pte lock(a spinlock) is held, which implies preemption disabled.
          */
         __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr);
-       mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+       mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr);
  
         if (unlikely(PageMlocked(page)))
                 clear_page_mlock(page);
@@ -1316,12 +1321,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         }
  
         while (page_vma_mapped_walk(&pvmw)) {
-               subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
-               address = pvmw.address;
-
-               /* Unexpected PMD-mapped THP? */
-               VM_BUG_ON_PAGE(!pvmw.pte, page);
-
                 /*
                  * If the page is mlock()d, we cannot swap it out.
                  * If it's recently referenced (perhaps page_referenced
@@ -1345,6 +1344,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                                 continue;
                 }
  
+               /* Unexpected PMD-mapped THP? */
+               VM_BUG_ON_PAGE(!pvmw.pte, page);
+
+               subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+               address = pvmw.address;
+
+
                 if (!(flags & TTU_IGNORE_ACCESS)) {
                         if (ptep_clear_flush_young_notify(vma, address,
                                                 pvmw.pte)) {
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c

index 574c67b663fe8a6ef802b36cb0379d21c96cb77c..a56c3989f77312085f31124f7705908a5f69609a 100644 (file)
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -196,9 +196,9 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
         return pmd;
  }
  
-pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
+pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
  {
-       pud_t *pud = pud_offset(pgd, addr);
+       pud_t *pud = pud_offset(p4d, addr);
         if (pud_none(*pud)) {
                 void *p = vmemmap_alloc_block(PAGE_SIZE, node);
                 if (!p)
@@ -208,6 +208,18 @@ pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
         return pud;
  }
  
+p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
+{
+       p4d_t *p4d = p4d_offset(pgd, addr);
+       if (p4d_none(*p4d)) {
+               void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+               if (!p)
+                       return NULL;
+               p4d_populate(&init_mm, p4d, p);
+       }
+       return p4d;
+}
+
  pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
  {
         pgd_t *pgd = pgd_offset_k(addr);
@@ -225,6 +237,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
  {
         unsigned long addr = start;
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
         pmd_t *pmd;
         pte_t *pte;
@@ -233,7 +246,10 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
                 pgd = vmemmap_pgd_populate(addr, node);
                 if (!pgd)
                         return -ENOMEM;
-               pud = vmemmap_pud_populate(pgd, addr, node);
+               p4d = vmemmap_p4d_populate(pgd, addr, node);
+               if (!p4d)
+                       return -ENOMEM;
+               pud = vmemmap_pud_populate(p4d, addr, node);
                 if (!pud)
                         return -ENOMEM;
                 pmd = vmemmap_pmd_populate(pud, addr, node);
diff --git a/mm/swap.c b/mm/swap.c

index c4910f14f9579ef1d8b165355f9294715968bf2d..5dabf444d724db98595567b0f7daed7d53fc877e 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -670,30 +670,19 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
  
  static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
  
-/*
- * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
- * workqueue, aiding in getting memory freed.
- */
-static struct workqueue_struct *lru_add_drain_wq;
-
-static int __init lru_init(void)
-{
-       lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);
-
-       if (WARN(!lru_add_drain_wq,
-               "Failed to create workqueue lru_add_drain_wq"))
-               return -ENOMEM;
-
-       return 0;
-}
-early_initcall(lru_init);
-
  void lru_add_drain_all(void)
  {
         static DEFINE_MUTEX(lock);
         static struct cpumask has_work;
         int cpu;
  
+       /*
+        * Make sure nobody triggers this path before mm_percpu_wq is fully
+        * initialized.
+        */
+       if (WARN_ON(!mm_percpu_wq))
+               return;
+
         mutex_lock(&lock);
         get_online_cpus();
         cpumask_clear(&has_work);
@@ -707,7 +696,7 @@ void lru_add_drain_all(void)
                     pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
                     need_activate_page_drain(cpu)) {
                         INIT_WORK(work, lru_add_drain_per_cpu);
-                       queue_work_on(cpu, lru_add_drain_wq, work);
+                       queue_work_on(cpu, mm_percpu_wq, work);
                         cpumask_set_cpu(cpu, &has_work);
                 }
         }
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c

index 310ac0b8f9746c53eff9306be52585fb9094fc00..ac6318a064d35e6dcc5385d1dc8062ff6e46554c 100644 (file)
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -201,6 +201,8 @@ void swap_cgroup_swapoff(int type)
                         struct page *page = map[i];
                         if (page)
                                 __free_page(page);
+                       if (!(i % SWAP_CLUSTER_MAX))
+                               cond_resched();
                 }
                 vfree(map);
         }
diff --git a/mm/swap_slots.c b/mm/swap_slots.c

index 9b5bc86f96ad731269e2051719583f168a74bc51..b1ccb58ad397403214a220e4a0ac7901a6b6ae1e 100644 (file)
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -267,8 +267,6 @@ int free_swap_slot(swp_entry_t entry)
  {
         struct swap_slots_cache *cache;
  
-       BUG_ON(!swap_slot_cache_initialized);
-
         cache = &get_cpu_var(swp_slots);
         if (use_swap_slot_cache && cache->slots_ret) {
                 spin_lock_irq(&cache->free_lock);
diff --git a/mm/swapfile.c b/mm/swapfile.c

index 521ef9b6064fea1429ba196447855a69591e6963..178130880b908515a105eccf9fa428f7cf61719a 100644 (file)
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1517,7 +1517,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
         return 0;
  }
  
-static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
                                 unsigned long addr, unsigned long end,
                                 swp_entry_t entry, struct page *page)
  {
@@ -1525,7 +1525,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
         unsigned long next;
         int ret;
  
-       pud = pud_offset(pgd, addr);
+       pud = pud_offset(p4d, addr);
         do {
                 next = pud_addr_end(addr, end);
                 if (pud_none_or_clear_bad(pud))
@@ -1537,6 +1537,26 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
         return 0;
  }
  
+static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
+                               unsigned long addr, unsigned long end,
+                               swp_entry_t entry, struct page *page)
+{
+       p4d_t *p4d;
+       unsigned long next;
+       int ret;
+
+       p4d = p4d_offset(pgd, addr);
+       do {
+               next = p4d_addr_end(addr, end);
+               if (p4d_none_or_clear_bad(p4d))
+                       continue;
+               ret = unuse_pud_range(vma, p4d, addr, next, entry, page);
+               if (ret)
+                       return ret;
+       } while (p4d++, addr = next, addr != end);
+       return 0;
+}
+
  static int unuse_vma(struct vm_area_struct *vma,
                                 swp_entry_t entry, struct page *page)
  {
@@ -1560,7 +1580,7 @@ static int unuse_vma(struct vm_area_struct *vma,
                 next = pgd_addr_end(addr, end);
                 if (pgd_none_or_clear_bad(pgd))
                         continue;
-               ret = unuse_pud_range(vma, pgd, addr, next, entry, page);
+               ret = unuse_p4d_range(vma, pgd, addr, next, entry, page);
                 if (ret)
                         return ret;
         } while (pgd++, addr = next, addr != end);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c

index 479e631d43c2f609466b1dee97ca2d0314ae12ba..8bcb501bce60b84f8bbc3c79cb2790bae2daa86a 100644 (file)
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -128,19 +128,22 @@ out_unlock:
  static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
  {
         pgd_t *pgd;
+       p4d_t *p4d;
         pud_t *pud;
-       pmd_t *pmd = NULL;
  
         pgd = pgd_offset(mm, address);
-       pud = pud_alloc(mm, pgd, address);
-       if (pud)
-               /*
-                * Note that we didn't run this because the pmd was
-                * missing, the *pmd may be already established and in
-                * turn it may also be a trans_huge_pmd.
-                */
-               pmd = pmd_alloc(mm, pud, address);
-       return pmd;
+       p4d = p4d_alloc(mm, pgd, address);
+       if (!p4d)
+               return NULL;
+       pud = pud_alloc(mm, p4d, address);
+       if (!pud)
+               return NULL;
+       /*
+        * Note that we didn't run this because the pmd was
+        * missing, the *pmd may be already established and in
+        * turn it may also be a trans_huge_pmd.
+        */
+       return pmd_alloc(mm, pud, address);
  }
  
  #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/vmalloc.c b/mm/vmalloc.c

index b4024d688f38698bdbea86034e9a72f71f052da4..0b057628a7ba5c45d722710082ce32df3f7e8e13 100644 (file)
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -86,12 +86,12 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
         } while (pmd++, addr = next, addr != end);
  }
  
-static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
+static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end)
  {
         pud_t *pud;
         unsigned long next;
  
-       pud = pud_offset(pgd, addr);
+       pud = pud_offset(p4d, addr);
         do {
                 next = pud_addr_end(addr, end);
                 if (pud_clear_huge(pud))
@@ -102,6 +102,22 @@ static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
         } while (pud++, addr = next, addr != end);
  }
  
+static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+       p4d_t *p4d;
+       unsigned long next;
+
+       p4d = p4d_offset(pgd, addr);
+       do {
+               next = p4d_addr_end(addr, end);
+               if (p4d_clear_huge(p4d))
+                       continue;
+               if (p4d_none_or_clear_bad(p4d))
+                       continue;
+               vunmap_pud_range(p4d, addr, next);
+       } while (p4d++, addr = next, addr != end);
+}
+
  static void vunmap_page_range(unsigned long addr, unsigned long end)
  {
         pgd_t *pgd;
@@ -113,7 +129,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
                 next = pgd_addr_end(addr, end);
                 if (pgd_none_or_clear_bad(pgd))
                         continue;
-               vunmap_pud_range(pgd, addr, next);
+               vunmap_p4d_range(pgd, addr, next);
         } while (pgd++, addr = next, addr != end);
  }
  
@@ -160,13 +176,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr,
         return 0;
  }
  
-static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
                 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
  {
         pud_t *pud;
         unsigned long next;
  
-       pud = pud_alloc(&init_mm, pgd, addr);
+       pud = pud_alloc(&init_mm, p4d, addr);
         if (!pud)
                 return -ENOMEM;
         do {
@@ -177,6 +193,23 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
         return 0;
  }
  
+static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
+               unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+{
+       p4d_t *p4d;
+       unsigned long next;
+
+       p4d = p4d_alloc(&init_mm, pgd, addr);
+       if (!p4d)
+               return -ENOMEM;
+       do {
+               next = p4d_addr_end(addr, end);
+               if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
+                       return -ENOMEM;
+       } while (p4d++, addr = next, addr != end);
+       return 0;
+}
+
  /*
   * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
   * will have pfns corresponding to the "pages" array.
@@ -196,7 +229,7 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
         pgd = pgd_offset_k(addr);
         do {
                 next = pgd_addr_end(addr, end);
-               err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
+               err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
                 if (err)
                         return err;
         } while (pgd++, addr = next, addr != end);
@@ -237,6 +270,10 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
         unsigned long addr = (unsigned long) vmalloc_addr;
         struct page *page = NULL;
         pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *ptep, pte;
  
         /*
          * XXX we might need to change this if we add VIRTUAL_BUG_ON for
@@ -244,21 +281,23 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
          */
         VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
  
-       if (!pgd_none(*pgd)) {
-               pud_t *pud = pud_offset(pgd, addr);
-               if (!pud_none(*pud)) {
-                       pmd_t *pmd = pmd_offset(pud, addr);
-                       if (!pmd_none(*pmd)) {
-                               pte_t *ptep, pte;
-
-                               ptep = pte_offset_map(pmd, addr);
-                               pte = *ptep;
-                               if (pte_present(pte))
-                                       page = pte_page(pte);
-                               pte_unmap(ptep);
-                       }
-               }
-       }
+       if (pgd_none(*pgd))
+               return NULL;
+       p4d = p4d_offset(pgd, addr);
+       if (p4d_none(*p4d))
+               return NULL;
+       pud = pud_offset(p4d, addr);
+       if (pud_none(*pud))
+               return NULL;
+       pmd = pmd_offset(pud, addr);
+       if (pmd_none(*pmd))
+               return NULL;
+
+       ptep = pte_offset_map(pmd, addr);
+       pte = *ptep;
+       if (pte_present(pte))
+               page = pte_page(pte);
+       pte_unmap(ptep);
         return page;
  }
  EXPORT_SYMBOL(vmalloc_to_page);
@@ -1644,7 +1683,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
  
                 if (fatal_signal_pending(current)) {
                         area->nr_pages = i;
-                       goto fail;
+                       goto fail_no_warn;
                 }
  
                 if (node == NUMA_NO_NODE)
@@ -1670,6 +1709,7 @@ fail:
         warn_alloc(gfp_mask, NULL,
                           "vmalloc: allocation failure, allocated %ld of %ld bytes",
                           (area->nr_pages*PAGE_SIZE), area->size);
+fail_no_warn:
         vfree(area->addr);
         return NULL;
  }
diff --git a/mm/vmstat.c b/mm/vmstat.c

index 69f9aff39a2eaf608d4f7cfaed8904bd3c3312c8..5a4f5c5a31e88ee558f536d22f61f05a3fd13c45 100644 (file)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1065,6 +1065,9 @@ const char * const vmstat_text[] = {
         "thp_split_page_failed",
         "thp_deferred_split_page",
         "thp_split_pmd",
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+       "thp_split_pud",
+#endif
         "thp_zero_page_alloc",
         "thp_zero_page_alloc_failed",
  #endif
@@ -1549,7 +1552,6 @@ static const struct file_operations proc_vmstat_file_operations = {
  #endif /* CONFIG_PROC_FS */
  
  #ifdef CONFIG_SMP
-static struct workqueue_struct *vmstat_wq;
  static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
  int sysctl_stat_interval __read_mostly = HZ;
  
@@ -1620,7 +1622,7 @@ static void vmstat_update(struct work_struct *w)
                  * to occur in the future. Keep on running the
                  * update worker thread.
                  */
-               queue_delayed_work_on(smp_processor_id(), vmstat_wq,
+               queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
                                 this_cpu_ptr(&vmstat_work),
                                 round_jiffies_relative(sysctl_stat_interval));
         }
@@ -1699,7 +1701,7 @@ static void vmstat_shepherd(struct work_struct *w)
                 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
  
                 if (!delayed_work_pending(dw) && need_update(cpu))
-                       queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
+                       queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
         }
         put_online_cpus();
  
@@ -1715,7 +1717,6 @@ static void __init start_shepherd_timer(void)
                 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
                         vmstat_update);
  
-       vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
         schedule_delayed_work(&shepherd,
                 round_jiffies_relative(sysctl_stat_interval));
  }
@@ -1761,11 +1762,15 @@ static int vmstat_cpu_dead(unsigned int cpu)
  
  #endif
  
-static int __init setup_vmstat(void)
+struct workqueue_struct *mm_percpu_wq;
+
+void __init init_mm_internals(void)
  {
-#ifdef CONFIG_SMP
-       int ret;
+       int ret __maybe_unused;
  
+       mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
+
+#ifdef CONFIG_SMP
         ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
                                         NULL, vmstat_cpu_dead);
         if (ret < 0)
@@ -1789,9 +1794,7 @@ static int __init setup_vmstat(void)
         proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
         proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
  #endif
-       return 0;
  }
-module_init(setup_vmstat)
  
  #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
  
diff --git a/mm/workingset.c b/mm/workingset.c

index ac839fca0e76ae3cc5a025684cb1516301922d92..eda05c71fa49e6e1e4f93a4029ddef04a4f8ab4c 100644 (file)
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -532,7 +532,7 @@ static int __init workingset_init(void)
         pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
                timestamp_bits, max_order, bucket_order);
  
-       ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key);
+       ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key);
         if (ret)
                 goto err;
         ret = register_shrinker(&workingset_shadow_shrinker);
diff --git a/mm/z3fold.c b/mm/z3fold.c

index 8970a2fd3b1a5354fb4bc843292a1c7358eed51c..54f63c4a809ae123248200ee84642629c6db8ffc 100644 (file)
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr)
         spin_lock(&zhdr->page_lock);
  }
  
+/* Try to lock a z3fold page */
+static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
+{
+       return spin_trylock(&zhdr->page_lock);
+}
+
  /* Unlock a z3fold page */
  static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
  {
@@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
                         spin_lock(&pool->lock);
                         zhdr = list_first_entry_or_null(&pool->unbuddied[i],
                                                 struct z3fold_header, buddy);
-                       if (!zhdr) {
+                       if (!zhdr || !z3fold_page_trylock(zhdr)) {
                                 spin_unlock(&pool->lock);
                                 continue;
                         }
@@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
                         spin_unlock(&pool->lock);
  
                         page = virt_to_page(zhdr);
-                       z3fold_page_lock(zhdr);
                         if (zhdr->first_chunks == 0) {
                                 if (zhdr->middle_chunks != 0 &&
                                     chunks >= zhdr->start_middle)
@@ -667,6 +672,7 @@ next:
                         z3fold_page_unlock(zhdr);
                         spin_lock(&pool->lock);
                         if (kref_put(&zhdr->refcount, release_z3fold_page)) {
+                               spin_unlock(&pool->lock);
                                 atomic64_dec(&pool->pages_nr);
                                 return 0;
                         }
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c

index b7ee9c34dbd678fc984db3295a70c117ee2fc2f2..d41edd28298b68ff335e6324df7e4e793a481d93 100644 (file)
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -276,7 +276,7 @@ struct zs_pool {
  struct zspage {
         struct {
                 unsigned int fullness:FULLNESS_BITS;
-               unsigned int class:CLASS_BITS;
+               unsigned int class:CLASS_BITS + 1;
                 unsigned int isolated:ISOLATED_BITS;
                 unsigned int magic:MAGIC_VAL_BITS;
         };
diff --git a/net/9p/client.c b/net/9p/client.c

index 3ce672af1596cfdb8fbd67ce558366e7997b7695..8e5c6a8d0a3733d03a9c36ca8854f828cb8cc678 100644 (file)
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -2101,6 +2101,10 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
                 trace_9p_protocol_dump(clnt, req->rc);
                 goto free_and_error;
         }
+       if (rsize < count) {
+               pr_err("bogus RREADDIR count (%d > %d)\n", count, rsize);
+               count = rsize;
+       }
  
         p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
  
diff --git a/net/atm/svc.c b/net/atm/svc.c

index db9794ec61d88efe16419a6c4534daf7c8770bc1..5589de7086af4eca7634e786918600a81cf6b09c 100644 (file)
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -318,7 +318,8 @@ out:
         return error;
  }
  
-static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
+static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
+                     bool kern)
  {
         struct sock *sk = sock->sk;
         struct sk_buff *skb;
@@ -329,7 +330,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
  
         lock_sock(sk);
  
-       error = svc_create(sock_net(sk), newsock, 0, 0);
+       error = svc_create(sock_net(sk), newsock, 0, kern);
         if (error)
                 goto out;
  
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c

index a8e42cedf1dbc7e11a5803a3dbe857e1e4cd54e1..b7c486752b3acf64b821ccb8b0e1a9bc25c945da 100644 (file)
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1320,7 +1320,8 @@ out_release:
         return err;
  }
  
-static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
+                      bool kern)
  {
         struct sk_buff *skb;
         struct sock *newsk;
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c

index 7c3d994e90d87b868f2b1614cc5d26e2413e70ee..71343d0fec94b55f7318ec8578abc956148f7791 100644 (file)
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -2477,6 +2477,16 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
         batadv_iv_ogm_schedule(hard_iface);
  }
  
+/**
+ * batadv_iv_init_sel_class - initialize GW selection class
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv)
+{
+       /* set default TQ difference threshold to 20 */
+       atomic_set(&bat_priv->gw.sel_class, 20);
+}
+
  static struct batadv_gw_node *
  batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
  {
@@ -2823,6 +2833,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
                 .del_if = batadv_iv_ogm_orig_del_if,
         },
         .gw = {
+               .init_sel_class = batadv_iv_init_sel_class,
                 .get_best_gw_node = batadv_iv_gw_get_best_gw_node,
                 .is_eligible = batadv_iv_gw_is_eligible,
  #ifdef CONFIG_BATMAN_ADV_DEBUGFS
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c

index 0acd081dd286996444d121b526f4530c4c1c0845..a36c8e7291d61f171cdb128dee865739d22cb00e 100644 (file)
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -668,6 +668,16 @@ err_ifinfo1:
         return ret;
  }
  
+/**
+ * batadv_v_init_sel_class - initialize GW selection class
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_v_init_sel_class(struct batadv_priv *bat_priv)
+{
+       /* set default throughput difference threshold to 5Mbps */
+       atomic_set(&bat_priv->gw.sel_class, 50);
+}
+
  static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
                                         char *buff, size_t count)
  {
@@ -1052,6 +1062,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
                 .dump = batadv_v_orig_dump,
         },
         .gw = {
+               .init_sel_class = batadv_v_init_sel_class,
                 .store_sel_class = batadv_v_store_sel_class,
                 .show_sel_class = batadv_v_show_sel_class,
                 .get_best_gw_node = batadv_v_gw_get_best_gw_node,
@@ -1092,9 +1103,6 @@ int batadv_v_mesh_init(struct batadv_priv *bat_priv)
         if (ret < 0)
                 return ret;
  
-       /* set default throughput difference threshold to 5Mbps */
-       atomic_set(&bat_priv->gw.sel_class, 50);
-
         return 0;
  }
  
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c

index 11a23fd6e1a07fa0c541fa3ea0a13775f9933893..8f964beaac284905c487ecfc5babaf2dd72d822c 100644 (file)
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -404,7 +404,7 @@ out:
   * batadv_frag_create - create a fragment from skb
   * @skb: skb to create fragment from
   * @frag_head: header to use in new fragment
- * @mtu: size of new fragment
+ * @fragment_size: size of new fragment
   *
   * Split the passed skb into two fragments: A new one with size matching the
   * passed mtu and the old one with the rest. The new skb contains data from the
@@ -414,11 +414,11 @@ out:
   */
  static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
                                           struct batadv_frag_packet *frag_head,
-                                         unsigned int mtu)
+                                         unsigned int fragment_size)
  {
         struct sk_buff *skb_fragment;
         unsigned int header_size = sizeof(*frag_head);
-       unsigned int fragment_size = mtu - header_size;
+       unsigned int mtu = fragment_size + header_size;
  
         skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
         if (!skb_fragment)
@@ -456,7 +456,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
         struct sk_buff *skb_fragment;
         unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
         unsigned int header_size = sizeof(frag_header);
-       unsigned int max_fragment_size, max_packet_size;
+       unsigned int max_fragment_size, num_fragments;
         int ret;
  
         /* To avoid merge and refragmentation at next-hops we never send
@@ -464,10 +464,15 @@ int batadv_frag_send_packet(struct sk_buff *skb,
          */
         mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
         max_fragment_size = mtu - header_size;
-       max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
+
+       if (skb->len == 0 || max_fragment_size == 0)
+               return -EINVAL;
+
+       num_fragments = (skb->len - 1) / max_fragment_size + 1;
+       max_fragment_size = (skb->len - 1) / num_fragments + 1;
  
         /* Don't even try to fragment, if we need more than 16 fragments */
-       if (skb->len > max_packet_size) {
+       if (num_fragments > BATADV_FRAG_MAX_FRAGMENTS) {
                 ret = -EAGAIN;
                 goto free_skb;
         }
@@ -507,7 +512,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
                         goto put_primary_if;
                 }
  
-               skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
+               skb_fragment = batadv_frag_create(skb, &frag_header,
+                                                 max_fragment_size);
                 if (!skb_fragment) {
                         ret = -ENOMEM;
                         goto put_primary_if;
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c

index 5db2e43e3775ef40fc3832984c93411c7f0dbb08..33940c5c74a8730c4ed3e06f7246e022cfb798da 100644 (file)
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -253,6 +253,11 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
   */
  void batadv_gw_init(struct batadv_priv *bat_priv)
  {
+       if (bat_priv->algo_ops->gw.init_sel_class)
+               bat_priv->algo_ops->gw.init_sel_class(bat_priv);
+       else
+               atomic_set(&bat_priv->gw.sel_class, 1);
+
         batadv_tvlv_handler_register(bat_priv, batadv_gw_tvlv_ogm_handler_v1,
                                      NULL, BATADV_TVLV_GW, 1,
                                      BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c

index 5d099b2e6cfccb8a436d98a10a6d513d89e31dc1..d042c99af028e2083307de1ba8978f2061fee45d 100644 (file)
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -819,7 +819,6 @@ static int batadv_softif_init_late(struct net_device *dev)
         atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
  #endif
         atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
-       atomic_set(&bat_priv->gw.sel_class, 20);
         atomic_set(&bat_priv->gw.bandwidth_down, 100);
         atomic_set(&bat_priv->gw.bandwidth_up, 20);
         atomic_set(&bat_priv->orig_interval, 1000);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h

index 66b25e410a41375e5c70bd7400a5b353bdff4520..246f21b4973bc39d0678273ad831da1f5b7e0df3 100644 (file)
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1489,6 +1489,7 @@ struct batadv_algo_orig_ops {
  
  /**
   * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
+ * @init_sel_class: initialize GW selection class (optional)
   * @store_sel_class: parse and stores a new GW selection class (optional)
   * @show_sel_class: prints the current GW selection class (optional)
   * @get_best_gw_node: select the best GW from the list of available nodes
@@ -1499,6 +1500,7 @@ struct batadv_algo_orig_ops {
   * @dump: dump gateways to a netlink socket (optional)
   */
  struct batadv_algo_gw_ops {
+       void (*init_sel_class)(struct batadv_priv *bat_priv);
         ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
                                    size_t count);
         ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c

index f307b145ea5405482434a9c37cafeb6d3f32dee4..507b80d59dec4fd3b0eb3c50ed1cd95a78adfcb7 100644 (file)
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -301,7 +301,7 @@ done:
  }
  
  static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
-                            int flags)
+                            int flags, bool kern)
  {
         DEFINE_WAIT_FUNC(wait, woken_wake_function);
         struct sock *sk = sock->sk, *nsk;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c

index aa1a814ceddca77f790f0c570e9c89ef08ebe186..ac3c650cb234f9985ddf0b54924db9000c4586c3 100644 (file)
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -471,7 +471,8 @@ done:
         return err;
  }
  
-static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags,
+                             bool kern)
  {
         DEFINE_WAIT_FUNC(wait, woken_wake_function);
         struct sock *sk = sock->sk, *nsk;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c

index e4e9a2da1e7e7a0e4b9764fe6de8819b9908ef3f..728e0c8dc8e74ccb134b8ed1d493ea8ee49bf49b 100644 (file)
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -627,7 +627,7 @@ done:
  }
  
  static int sco_sock_accept(struct socket *sock, struct socket *newsock,
-                          int flags)
+                          int flags, bool kern)
  {
         DEFINE_WAIT_FUNC(wait, woken_wake_function);
         struct sock *sk = sock->sk, *ch;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c

index ea71513fca21a0aea0dd569b482717a1b1dbe673..430b53e7d941def09220a1c97a2e82d288304595 100644 (file)
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -119,6 +119,16 @@ static int br_dev_init(struct net_device *dev)
         return err;
  }
  
+static void br_dev_uninit(struct net_device *dev)
+{
+       struct net_bridge *br = netdev_priv(dev);
+
+       br_multicast_dev_del(br);
+       br_multicast_uninit_stats(br);
+       br_vlan_flush(br);
+       free_percpu(br->stats);
+}
+
  static int br_dev_open(struct net_device *dev)
  {
         struct net_bridge *br = netdev_priv(dev);
@@ -332,6 +342,7 @@ static const struct net_device_ops br_netdev_ops = {
         .ndo_open                = br_dev_open,
         .ndo_stop                = br_dev_stop,
         .ndo_init                = br_dev_init,
+       .ndo_uninit              = br_dev_uninit,
         .ndo_start_xmit          = br_dev_xmit,
         .ndo_get_stats64         = br_get_stats64,
         .ndo_set_mac_address     = br_set_mac_address,
@@ -356,14 +367,6 @@ static const struct net_device_ops br_netdev_ops = {
         .ndo_features_check      = passthru_features_check,
  };
  
-static void br_dev_free(struct net_device *dev)
-{
-       struct net_bridge *br = netdev_priv(dev);
-
-       free_percpu(br->stats);
-       free_netdev(dev);
-}
-
  static struct device_type br_type = {
         .name   = "bridge",
  };
@@ -376,7 +379,7 @@ void br_dev_setup(struct net_device *dev)
         ether_setup(dev);
  
         dev->netdev_ops = &br_netdev_ops;
-       dev->destructor = br_dev_free;
+       dev->destructor = free_netdev;
         dev->ethtool_ops = &br_ethtool_ops;
         SET_NETDEV_DEVTYPE(dev, &br_type);
         dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c

index 4f598dc2d9168cd323a3027d77d601854aa35f04..6e08b7199dd7442acdbd4f85e5ef6315b121ca06 100644 (file)
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -106,7 +106,7 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
         struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
         struct net_bridge_fdb_entry *fdb;
  
-       WARN_ON_ONCE(!br_hash_lock_held(br));
+       lockdep_assert_held_once(&br->hash_lock);
  
         rcu_read_lock();
         fdb = fdb_find_rcu(head, addr, vid);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c

index 8ac1770aa222f21f89027d303a218c49be9dc650..a8d0ed282a109a1f4075565a0934e742febb387b 100644 (file)
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -311,8 +311,6 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
  
         br_fdb_delete_by_port(br, NULL, 0, 1);
  
-       br_vlan_flush(br);
-       br_multicast_dev_del(br);
         cancel_delayed_work_sync(&br->gc_work);
  
         br_sysfs_delbr(br->dev);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c

index 236f34244dbe1f2cd2bdfaf9d4eceb0765276882..013f2290bfa56df90708879437a762c812dec101 100644 (file)
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -30,6 +30,7 @@ EXPORT_SYMBOL(br_should_route_hook);
  static int
  br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
+       br_drop_fake_rtable(skb);
         return netif_receive_skb(skb);
  }
  
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c

index b760f2620abf320307a65c3f5baf86ff91221545..faa7261a992fa6df54afd4656b0e810102d59c30 100644 (file)
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2031,8 +2031,6 @@ void br_multicast_dev_del(struct net_bridge *br)
  
  out:
         spin_unlock_bh(&br->multicast_lock);
-
-       free_percpu(br->mcast_stats);
  }
  
  int br_multicast_set_router(struct net_bridge *br, unsigned long val)
@@ -2531,6 +2529,11 @@ int br_multicast_init_stats(struct net_bridge *br)
         return 0;
  }
  
+void br_multicast_uninit_stats(struct net_bridge *br)
+{
+       free_percpu(br->mcast_stats);
+}
+
  static void mcast_stats_add_dir(u64 *dst, u64 *src)
  {
         dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX];
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c

index 95087e6e8258366af95579bb308d1a6e18266f0e..1f1e62095464f99eaca8de49a772289f057bd943 100644 (file)
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -521,21 +521,6 @@ static unsigned int br_nf_pre_routing(void *priv,
  }
  
  
-/* PF_BRIDGE/LOCAL_IN ************************************************/
-/* The packet is locally destined, which requires a real
- * dst_entry, so detach the fake one.  On the way up, the
- * packet would pass through PRE_ROUTING again (which already
- * took place when the packet entered the bridge), but we
- * register an IPv4 PRE_ROUTING 'sabotage' hook that will
- * prevent this from happening. */
-static unsigned int br_nf_local_in(void *priv,
-                                  struct sk_buff *skb,
-                                  const struct nf_hook_state *state)
-{
-       br_drop_fake_rtable(skb);
-       return NF_ACCEPT;
-}
-
  /* PF_BRIDGE/FORWARD *************************************************/
  static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
@@ -721,18 +706,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
  
  static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
-       struct nf_bridge_info *nf_bridge;
-       unsigned int mtu_reserved;
+       struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+       unsigned int mtu, mtu_reserved;
  
         mtu_reserved = nf_bridge_mtu_reduction(skb);
+       mtu = skb->dev->mtu;
  
-       if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
+       if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
+               mtu = nf_bridge->frag_max_size;
+
+       if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
                 nf_bridge_info_free(skb);
                 return br_dev_queue_push_xmit(net, sk, skb);
         }
  
-       nf_bridge = nf_bridge_info_get(skb);
-
         /* This is wrong! We should preserve the original fragment
          * boundaries by preserving frag_list rather than refragmenting.
          */
@@ -907,12 +894,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
                 .hooknum = NF_BR_PRE_ROUTING,
                 .priority = NF_BR_PRI_BRNF,
         },
-       {
-               .hook = br_nf_local_in,
-               .pf = NFPROTO_BRIDGE,
-               .hooknum = NF_BR_LOCAL_IN,
-               .priority = NF_BR_PRI_BRNF,
-       },
         {
                 .hook = br_nf_forward_ip,
                 .pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c

index a8f6acd23e309dcf51e6825076d9b3ba00996a9c..225ef7d5370166baff69080996cf64ff68b055f6 100644 (file)
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1165,11 +1165,14 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
                 spin_unlock_bh(&br->lock);
         }
  
-       err = br_changelink(dev, tb, data);
+       err = register_netdevice(dev);
         if (err)
                 return err;
  
-       return register_netdevice(dev);
+       err = br_changelink(dev, tb, data);
+       if (err)
+               unregister_netdevice(dev);
+       return err;
  }
  
  static size_t br_get_size(const struct net_device *brdev)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h

index 2288fca7756c5103fc4e8420ad61a2f9e633c097..0d177280aa849bf1bc3ba3de79d3a3f3c748d5eb 100644 (file)
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -531,15 +531,6 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
  int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
                               const unsigned char *addr, u16 vid);
  
-static inline bool br_hash_lock_held(struct net_bridge *br)
-{
-#ifdef CONFIG_LOCKDEP
-       return lockdep_is_held(&br->hash_lock);
-#else
-       return true;
-#endif
-}
-
  /* br_forward.c */
  enum br_pkt_type {
         BR_PKT_UNICAST,
@@ -629,6 +620,7 @@ void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
  void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
                         const struct sk_buff *skb, u8 type, u8 dir);
  int br_multicast_init_stats(struct net_bridge *br);
+void br_multicast_uninit_stats(struct net_bridge *br);
  void br_multicast_get_stats(const struct net_bridge *br,
                             const struct net_bridge_port *p,
                             struct br_mcast_stats *dest);
@@ -769,6 +761,10 @@ static inline int br_multicast_init_stats(struct net_bridge *br)
         return 0;
  }
  
+static inline void br_multicast_uninit_stats(struct net_bridge *br)
+{
+}
+
  static inline int br_multicast_igmp_type(const struct sk_buff *skb)
  {
         return 0;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c

index 464e88599b9d2918f191eae62e1fab7b80a4806e..108533859a53292cde61a3cedd052a2579684e87 100644 (file)
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -230,6 +230,7 @@ enum {
         Opt_osdkeepalivetimeout,
         Opt_mount_timeout,
         Opt_osd_idle_ttl,
+       Opt_osd_request_timeout,
         Opt_last_int,
         /* int args above */
         Opt_fsid,
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
         {Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
         {Opt_mount_timeout, "mount_timeout=%d"},
         {Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+       {Opt_osd_request_timeout, "osd_request_timeout=%d"},
         /* int args above */
         {Opt_fsid, "fsid=%s"},
         {Opt_name, "name=%s"},
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
         opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
         opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
         opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
+       opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
  
         /* get mon ip(s) */
         /* ip1[:port1][,ip2[:port2]...] */
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
                         }
                         opt->mount_timeout = msecs_to_jiffies(intval * 1000);
                         break;
+               case Opt_osd_request_timeout:
+                       /* 0 is "wait forever" (i.e. infinite timeout) */
+                       if (intval < 0 || intval > INT_MAX / 1000) {
+                               pr_err("osd_request_timeout out of range\n");
+                               err = -EINVAL;
+                               goto out;
+                       }
+                       opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
+                       break;
  
                 case Opt_share:
                         opt->flags &= ~CEPH_OPT_NOSHARE;
@@ -557,6 +569,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
         if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
                 seq_printf(m, "osdkeepalivetimeout=%d,",
                     jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
+       if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT)
+               seq_printf(m, "osd_request_timeout=%d,",
+                          jiffies_to_msecs(opt->osd_request_timeout) / 1000);
  
         /* drop redundant comma */
         if (m->count != pos)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c

index 38dcf1eb427de562776934b1c2dfff2c46f3ca12..f76bb333261384257490b0f5125207028e8352aa 100644 (file)
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -7,6 +7,7 @@
  #include <linux/kthread.h>
  #include <linux/net.h>
  #include <linux/nsproxy.h>
+#include <linux/sched/mm.h>
  #include <linux/slab.h>
  #include <linux/socket.h>
  #include <linux/string.h>
@@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con)
  {
         struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
         struct socket *sock;
+       unsigned int noio_flag;
         int ret;
  
         BUG_ON(con->sock);
+
+       /* sock_create_kern() allocates with GFP_KERNEL */
+       noio_flag = memalloc_noio_save();
         ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
                                SOCK_STREAM, IPPROTO_TCP, &sock);
+       memalloc_noio_restore(noio_flag);
         if (ret)
                 return ret;
         sock->sk->sk_allocation = GFP_NOFS;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c

index b65bbf9f45ebb22c8ac51af34c6b1c29ef7ed17c..e15ea9e4c4955fbd697e545cedfdb7f7925c347e 100644 (file)
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1709,6 +1709,8 @@ static void account_request(struct ceph_osd_request *req)
  
         req->r_flags |= CEPH_OSD_FLAG_ONDISK;
         atomic_inc(&req->r_osdc->num_requests);
+
+       req->r_start_stamp = jiffies;
  }
  
  static void submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -1789,6 +1791,14 @@ static void cancel_request(struct ceph_osd_request *req)
         ceph_osdc_put_request(req);
  }
  
+static void abort_request(struct ceph_osd_request *req, int err)
+{
+       dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err);
+
+       cancel_map_check(req);
+       complete_request(req, err);
+}
+
  static void check_pool_dne(struct ceph_osd_request *req)
  {
         struct ceph_osd_client *osdc = req->r_osdc;
@@ -2487,6 +2497,7 @@ static void handle_timeout(struct work_struct *work)
                 container_of(work, struct ceph_osd_client, timeout_work.work);
         struct ceph_options *opts = osdc->client->options;
         unsigned long cutoff = jiffies - opts->osd_keepalive_timeout;
+       unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout;
         LIST_HEAD(slow_osds);
         struct rb_node *n, *p;
  
@@ -2502,15 +2513,23 @@ static void handle_timeout(struct work_struct *work)
                 struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
                 bool found = false;
  
-               for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) {
+               for (p = rb_first(&osd->o_requests); p; ) {
                         struct ceph_osd_request *req =
                             rb_entry(p, struct ceph_osd_request, r_node);
  
+                       p = rb_next(p); /* abort_request() */
+
                         if (time_before(req->r_stamp, cutoff)) {
                                 dout(" req %p tid %llu on osd%d is laggy\n",
                                      req, req->r_tid, osd->o_osd);
                                 found = true;
                         }
+                       if (opts->osd_request_timeout &&
+                           time_before(req->r_start_stamp, expiry_cutoff)) {
+                               pr_err_ratelimited("tid %llu on osd%d timeout\n",
+                                      req->r_tid, osd->o_osd);
+                               abort_request(req, -ETIMEDOUT);
+                       }
                 }
                 for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) {
                         struct ceph_osd_linger_request *lreq =
@@ -2530,6 +2549,21 @@ static void handle_timeout(struct work_struct *work)
                         list_move_tail(&osd->o_keepalive_item, &slow_osds);
         }
  
+       if (opts->osd_request_timeout) {
+               for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) {
+                       struct ceph_osd_request *req =
+                           rb_entry(p, struct ceph_osd_request, r_node);
+
+                       p = rb_next(p); /* abort_request() */
+
+                       if (time_before(req->r_start_stamp, expiry_cutoff)) {
+                               pr_err_ratelimited("tid %llu on osd%d timeout\n",
+                                      req->r_tid, osdc->homeless_osd.o_osd);
+                               abort_request(req, -ETIMEDOUT);
+                       }
+               }
+       }
+
         if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds))
                 maybe_request_map(osdc);
  
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c

index 6824c0ec8373e721ac9ca2d837f488ff22233e1f..ffe9e904d4d1d130b0353edbe45d50d236b4f74e 100644 (file)
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -390,9 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         dout("crush decode tunable chooseleaf_stable = %d\n",
              c->chooseleaf_stable);
  
-       crush_finalize(c);
-
  done:
+       crush_finalize(c);
         dout("crush_decode success\n");
         return c;
  
@@ -1380,7 +1379,6 @@ static int decode_new_up_state_weight(void **p, void *end,
                 if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
                     (xorstate & CEPH_OSD_EXISTS)) {
                         pr_info("osd%d does not exist\n", osd);
-                       map->osd_weight[osd] = CEPH_OSD_IN;
                         ret = set_primary_affinity(map, osd,
                                                    CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
                         if (ret)
diff --git a/net/core/datagram.c b/net/core/datagram.c

index ea633342ab0d046cbc49e55b679440ef9e015c2d..f4947e737f34a0d5dafb2e8232260f46a43fbc27 100644 (file)
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -398,7 +398,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
                            struct iov_iter *to, int len)
  {
         int start = skb_headlen(skb);
-       int i, copy = start - offset;
+       int i, copy = start - offset, start_off = offset, n;
         struct sk_buff *frag_iter;
  
         trace_skb_copy_datagram_iovec(skb, len);
@@ -407,11 +407,12 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
         if (copy > 0) {
                 if (copy > len)
                         copy = len;
-               if (copy_to_iter(skb->data + offset, copy, to) != copy)
+               n = copy_to_iter(skb->data + offset, copy, to);
+               offset += n;
+               if (n != copy)
                         goto short_copy;
                 if ((len -= copy) == 0)
                         return 0;
-               offset += copy;
         }
  
         /* Copy paged appendix. Hmm... why does this look so complicated? */
@@ -425,13 +426,14 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
                 if ((copy = end - offset) > 0) {
                         if (copy > len)
                                 copy = len;
-                       if (copy_page_to_iter(skb_frag_page(frag),
+                       n = copy_page_to_iter(skb_frag_page(frag),
                                               frag->page_offset + offset -
-                                             start, copy, to) != copy)
+                                             start, copy, to);
+                       offset += n;
+                       if (n != copy)
                                 goto short_copy;
                         if (!(len -= copy))
                                 return 0;
-                       offset += copy;
                 }
                 start = end;
         }
@@ -463,6 +465,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
          */
  
  fault:
+       iov_iter_revert(to, offset - start_off);
         return -EFAULT;
  
  short_copy:
@@ -613,7 +616,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
                                       __wsum *csump)
  {
         int start = skb_headlen(skb);
-       int i, copy = start - offset;
+       int i, copy = start - offset, start_off = offset;
         struct sk_buff *frag_iter;
         int pos = 0;
         int n;
@@ -623,11 +626,11 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
                 if (copy > len)
                         copy = len;
                 n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
+               offset += n;
                 if (n != copy)
                         goto fault;
                 if ((len -= copy) == 0)
                         return 0;
-               offset += copy;
                 pos = copy;
         }
  
@@ -649,12 +652,12 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
                                                   offset - start, copy,
                                                   &csum2, to);
                         kunmap(page);
+                       offset += n;
                         if (n != copy)
                                 goto fault;
                         *csump = csum_block_add(*csump, csum2, pos);
                         if (!(len -= copy))
                                 return 0;
-                       offset += copy;
                         pos += copy;
                 }
                 start = end;
@@ -687,6 +690,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
                 return 0;
  
  fault:
+       iov_iter_revert(to, offset - start_off);
         return -EFAULT;
  }
  
@@ -771,6 +775,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
         }
         return 0;
  csum_error:
+       iov_iter_revert(&msg->msg_iter, chunk);
         return -EINVAL;
  fault:
         return -EFAULT;
diff --git a/net/core/dev.c b/net/core/dev.c

index 8637b2b71f3d4751366a2ca5ba46579e6a5fa953..9b5875388c23c4f3306124697fd291c40fb6e6cd 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1304,6 +1304,7 @@ void netdev_notify_peers(struct net_device *dev)
  {
         rtnl_lock();
         call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+       call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
         rtnl_unlock();
  }
  EXPORT_SYMBOL(netdev_notify_peers);
@@ -2449,6 +2450,9 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
  {
         unsigned long flags;
  
+       if (unlikely(!skb))
+               return;
+
         if (likely(atomic_read(&skb->users) == 1)) {
                 smp_rmb();
                 atomic_set(&skb->users, 0);
@@ -6756,7 +6760,6 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
  
         return err;
  }
-EXPORT_SYMBOL(dev_change_xdp_fd);
  
  /**
   *     dev_new_index   -       allocate an ifindex
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c

index c35aae13c8d22680cb07222cbd9f1ee976f0bd64..d98d4998213da6103665d62d5a85613631236f19 100644 (file)
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -390,7 +390,7 @@ mpls:
                         unsigned char ar_tip[4];
                 } *arp_eth, _arp_eth;
                 const struct arphdr *arp;
-               struct arphdr *_arp;
+               struct arphdr _arp;
  
                 arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
                                            hlen, &_arp);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c

index e7c12caa20c88acc9a5dd86f07d11644fb58341d..4526cbd7e28a1fcdecfc06a41985fd4d19634457 100644 (file)
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -860,7 +860,8 @@ static void neigh_probe(struct neighbour *neigh)
         if (skb)
                 skb = skb_clone(skb, GFP_ATOMIC);
         write_unlock(&neigh->lock);
-       neigh->ops->solicit(neigh, skb);
+       if (neigh->ops->solicit)
+               neigh->ops->solicit(neigh, skb);
         atomic_inc(&neigh->probes);
         kfree_skb(skb);
  }
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c

index 3945821e9c1f8f8c33290e55d33aba28ff68a9cd..65ea0ff4017c166fea648f3ef3db57966f44aa66 100644 (file)
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -953,7 +953,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
         while (--i >= new_num) {
                 struct kobject *kobj = &dev->_rx[i].kobj;
  
-               if (!list_empty(&dev_net(dev)->exit_list))
+               if (!atomic_read(&dev_net(dev)->count))
                         kobj->uevent_suppress = 1;
                 if (dev->sysfs_rx_queue_group)
                         sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
@@ -1371,7 +1371,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
         while (--i >= new_num) {
                 struct netdev_queue *queue = dev->_tx + i;
  
-               if (!list_empty(&dev_net(dev)->exit_list))
+               if (!atomic_read(&dev_net(dev)->count))
                         queue->kobj.uevent_suppress = 1;
  #ifdef CONFIG_BQL
                 sysfs_remove_group(&queue->kobj, &dql_group);
@@ -1558,7 +1558,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
  {
         struct device *dev = &(ndev->dev);
  
-       if (!list_empty(&dev_net(ndev)->exit_list))
+       if (!atomic_read(&dev_net(ndev)->count))
                 dev_set_uevent_suppress(dev, 1);
  
         kobject_get(&dev->kobj);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c

index 6ae56037bb1336d9cb6b6fc36043a203f3978202..029a61ac6cdd8a0b4dd54d2be3c5bdf047a82cb0 100644 (file)
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -71,27 +71,17 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
         return 0;
  }
  
-static void update_classid(struct cgroup_subsys_state *css, void *v)
+static void cgrp_attach(struct cgroup_taskset *tset)
  {
-       struct css_task_iter it;
+       struct cgroup_subsys_state *css;
         struct task_struct *p;
  
-       css_task_iter_start(css, &it);
-       while ((p = css_task_iter_next(&it))) {
+       cgroup_taskset_for_each(p, css, tset) {
                 task_lock(p);
-               iterate_fd(p->files, 0, update_classid_sock, v);
+               iterate_fd(p->files, 0, update_classid_sock,
+                          (void *)(unsigned long)css_cls_state(css)->classid);
                 task_unlock(p);
         }
-       css_task_iter_end(&it);
-}
-
-static void cgrp_attach(struct cgroup_taskset *tset)
-{
-       struct cgroup_subsys_state *css;
-
-       cgroup_taskset_first(tset, &css);
-       update_classid(css,
-                      (void *)(unsigned long)css_cls_state(css)->classid);
  }
  
  static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -103,12 +93,22 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
                          u64 value)
  {
         struct cgroup_cls_state *cs = css_cls_state(css);
+       struct css_task_iter it;
+       struct task_struct *p;
  
         cgroup_sk_alloc_disable();
  
         cs->classid = (u32)value;
  
-       update_classid(css, (void *)(unsigned long)cs->classid);
+       css_task_iter_start(css, &it);
+       while ((p = css_task_iter_next(&it))) {
+               task_lock(p);
+               iterate_fd(p->files, 0, update_classid_sock,
+                          (void *)(unsigned long)cs->classid);
+               task_unlock(p);
+       }
+       css_task_iter_end(&it);
+
         return 0;
  }
  
diff --git a/net/core/netpoll.c b/net/core/netpoll.c

index 9424673009c14e0fb288b8e4041dba596b37ee8d..29be2466970cd670daa7a8abdd54929c9af39026 100644 (file)
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -105,15 +105,21 @@ static void queue_process(struct work_struct *work)
         while ((skb = skb_dequeue(&npinfo->txq))) {
                 struct net_device *dev = skb->dev;
                 struct netdev_queue *txq;
+               unsigned int q_index;
  
                 if (!netif_device_present(dev) || !netif_running(dev)) {
                         kfree_skb(skb);
                         continue;
                 }
  
-               txq = skb_get_tx_queue(dev, skb);
-
                 local_irq_save(flags);
+               /* check if skb->queue_mapping is still valid */
+               q_index = skb_get_queue_mapping(skb);
+               if (unlikely(q_index >= dev->real_num_tx_queues)) {
+                       q_index = q_index % dev->real_num_tx_queues;
+                       skb_set_queue_mapping(skb, q_index);
+               }
+               txq = netdev_get_tx_queue(dev, q_index);
                 HARD_TX_LOCK(dev, txq, smp_processor_id());
                 if (netif_xmit_frozen_or_stopped(txq) ||
                     netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c

index 758f140b6bedc51669fed973b39ee317c2bf1570..d28da7d363f170f35d88623e2b864f04a67c3de5 100644 (file)
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -20,9 +20,11 @@
  #include <net/tcp.h>
  
  static siphash_key_t net_secret __read_mostly;
+static siphash_key_t ts_secret __read_mostly;
  
  static __always_inline void net_secret_init(void)
  {
+       net_get_random_once(&ts_secret, sizeof(ts_secret));
         net_get_random_once(&net_secret, sizeof(net_secret));
  }
  #endif
@@ -45,6 +47,23 @@ static u32 seq_scale(u32 seq)
  #endif
  
  #if IS_ENABLED(CONFIG_IPV6)
+static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
+{
+       const struct {
+               struct in6_addr saddr;
+               struct in6_addr daddr;
+       } __aligned(SIPHASH_ALIGNMENT) combined = {
+               .saddr = *(struct in6_addr *)saddr,
+               .daddr = *(struct in6_addr *)daddr,
+       };
+
+       if (sysctl_tcp_timestamps != 1)
+               return 0;
+
+       return siphash(&combined, offsetofend(typeof(combined), daddr),
+                      &ts_secret);
+}
+
  u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
                                  __be16 sport, __be16 dport, u32 *tsoff)
  {
@@ -63,7 +82,7 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
         net_secret_init();
         hash = siphash(&combined, offsetofend(typeof(combined), dport),
                        &net_secret);
-       *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+       *tsoff = secure_tcpv6_ts_off(saddr, daddr);
         return seq_scale(hash);
  }
  EXPORT_SYMBOL(secure_tcpv6_sequence_number);
@@ -88,6 +107,14 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
  #endif
  
  #ifdef CONFIG_INET
+static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
+{
+       if (sysctl_tcp_timestamps != 1)
+               return 0;
+
+       return siphash_2u32((__force u32)saddr, (__force u32)daddr,
+                           &ts_secret);
+}
  
  /* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
   * but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
@@ -103,7 +130,7 @@ u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
         hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
                             (__force u32)sport << 16 | (__force u32)dport,
                             &net_secret);
-       *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+       *tsoff = secure_tcp_ts_off(saddr, daddr);
         return seq_scale(hash);
  }
  
diff --git a/net/core/skbuff.c b/net/core/skbuff.c

index f3557958e9bf147631a90b51fef0630920acd97b..f1d04592ace02f32efa6e05df89c9a5e0023157f 100644 (file)
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1576,6 +1576,8 @@ done:
                 skb_set_tail_pointer(skb, len);
         }
  
+       if (!skb->sk || skb->destructor == sock_edemux)
+               skb_condense(skb);
         return 0;
  }
  EXPORT_SYMBOL(___pskb_trim);
@@ -3082,22 +3084,32 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
         if (sg && csum && (mss != GSO_BY_FRAGS))  {
                 if (!(features & NETIF_F_GSO_PARTIAL)) {
                         struct sk_buff *iter;
+                       unsigned int frag_len;
  
                         if (!list_skb ||
                             !net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
                                 goto normal;
  
-                       /* Split the buffer at the frag_list pointer.
-                        * This is based on the assumption that all
-                        * buffers in the chain excluding the last
-                        * containing the same amount of data.
+                       /* If we get here then all the required
+                        * GSO features except frag_list are supported.
+                        * Try to split the SKB to multiple GSO SKBs
+                        * with no frag_list.
+                        * Currently we can do that only when the buffers don't
+                        * have a linear part and all the buffers except
+                        * the last are of the same length.
                          */
+                       frag_len = list_skb->len;
                         skb_walk_frags(head_skb, iter) {
+                               if (frag_len != iter->len && iter->next)
+                                       goto normal;
                                 if (skb_headlen(iter))
                                         goto normal;
  
                                 len -= iter->len;
                         }
+
+                       if (len != frag_len)
+                               goto normal;
                 }
  
                 /* GSO partial only requires that we trim off any excess that
@@ -3694,6 +3706,15 @@ static void sock_rmem_free(struct sk_buff *skb)
         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
  }
  
+static void skb_set_err_queue(struct sk_buff *skb)
+{
+       /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
+        * So, it is safe to (mis)use it to mark skbs on the error queue.
+        */
+       skb->pkt_type = PACKET_OUTGOING;
+       BUILD_BUG_ON(PACKET_OUTGOING == 0);
+}
+
  /*
   * Note: We dont mem charge error packets (no sk_forward_alloc changes)
   */
@@ -3707,6 +3728,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
         skb->sk = sk;
         skb->destructor = sock_rmem_free;
         atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+       skb_set_err_queue(skb);
  
         /* before exiting rcu section, make sure dst is refcounted */
         skb_dst_force(skb);
@@ -3783,16 +3805,21 @@ EXPORT_SYMBOL(skb_clone_sk);
  
  static void __skb_complete_tx_timestamp(struct sk_buff *skb,
                                         struct sock *sk,
-                                       int tstype)
+                                       int tstype,
+                                       bool opt_stats)
  {
         struct sock_exterr_skb *serr;
         int err;
  
+       BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+
         serr = SKB_EXT_ERR(skb);
         memset(serr, 0, sizeof(*serr));
         serr->ee.ee_errno = ENOMSG;
         serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
         serr->ee.ee_info = tstype;
+       serr->opt_stats = opt_stats;
+       serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
         if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
                 serr->ee.ee_data = skb_shinfo(skb)->tskey;
                 if (sk->sk_protocol == IPPROTO_TCP &&
@@ -3828,13 +3855,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
         if (!skb_may_tx_timestamp(sk, false))
                 return;
  
-       /* take a reference to prevent skb_orphan() from freeing the socket */
-       sock_hold(sk);
-
-       *skb_hwtstamps(skb) = *hwtstamps;
-       __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
-
-       sock_put(sk);
+       /* Take a reference to prevent skb_orphan() from freeing the socket,
+        * but only if the socket refcount is not zero.
+        */
+       if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+               *skb_hwtstamps(skb) = *hwtstamps;
+               __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
+               sock_put(sk);
+       }
  }
  EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
  
@@ -3843,7 +3871,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
                      struct sock *sk, int tstype)
  {
         struct sk_buff *skb;
-       bool tsonly;
+       bool tsonly, opt_stats = false;
  
         if (!sk)
                 return;
@@ -3856,9 +3884,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
  #ifdef CONFIG_INET
                 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
                     sk->sk_protocol == IPPROTO_TCP &&
-                   sk->sk_type == SOCK_STREAM)
+                   sk->sk_type == SOCK_STREAM) {
                         skb = tcp_get_timestamping_opt_stats(sk);
-               else
+                       opt_stats = true;
+               } else
  #endif
                         skb = alloc_skb(0, GFP_ATOMIC);
         } else {
@@ -3877,7 +3906,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
         else
                 skb->tstamp = ktime_get_real();
  
-       __skb_complete_tx_timestamp(skb, sk, tstype);
+       __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
  }
  EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
  
@@ -3893,7 +3922,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
  {
         struct sock *sk = skb->sk;
         struct sock_exterr_skb *serr;
-       int err;
+       int err = 1;
  
         skb->wifi_acked_valid = 1;
         skb->wifi_acked = acked;
@@ -3903,14 +3932,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
         serr->ee.ee_errno = ENOMSG;
         serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
  
-       /* take a reference to prevent skb_orphan() from freeing the socket */
-       sock_hold(sk);
-
-       err = sock_queue_err_skb(sk, skb);
+       /* Take a reference to prevent skb_orphan() from freeing the socket,
+        * but only if the socket refcount is not zero.
+        */
+       if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+               err = sock_queue_err_skb(sk, skb);
+               sock_put(sk);
+       }
         if (err)
                 kfree_skb(skb);
-
-       sock_put(sk);
  }
  EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
  
diff --git a/net/core/sock.c b/net/core/sock.c

index f6fd79f33097f3fa279fcb0b610286259af9b111..2c4f574168fbdcebc0cc82c0c8a36214992d6224 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -197,66 +197,55 @@ EXPORT_SYMBOL(sk_net_capable);
  
  /*
   * Each address family might have different locking rules, so we have
- * one slock key per address family:
+ * one slock key per address family and separate keys for internal and
+ * userspace sockets.
   */
  static struct lock_class_key af_family_keys[AF_MAX];
+static struct lock_class_key af_family_kern_keys[AF_MAX];
  static struct lock_class_key af_family_slock_keys[AF_MAX];
+static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
  
  /*
   * Make lock validator output more readable. (we pre-construct these
   * strings build-time, so that runtime initialization of socket
   * locks is fast):
   */
+
+#define _sock_locks(x)                                           \
+  x "AF_UNSPEC",       x "AF_UNIX"     ,       x "AF_INET"     , \
+  x "AF_AX25"  ,       x "AF_IPX"      ,       x "AF_APPLETALK", \
+  x "AF_NETROM",       x "AF_BRIDGE"   ,       x "AF_ATMPVC"   , \
+  x "AF_X25"   ,       x "AF_INET6"    ,       x "AF_ROSE"     , \
+  x "AF_DECnet",       x "AF_NETBEUI"  ,       x "AF_SECURITY" , \
+  x "AF_KEY"   ,       x "AF_NETLINK"  ,       x "AF_PACKET"   , \
+  x "AF_ASH"   ,       x "AF_ECONET"   ,       x "AF_ATMSVC"   , \
+  x "AF_RDS"   ,       x "AF_SNA"      ,       x "AF_IRDA"     , \
+  x "AF_PPPOX" ,       x "AF_WANPIPE"  ,       x "AF_LLC"      , \
+  x "27"       ,       x "28"          ,       x "AF_CAN"      , \
+  x "AF_TIPC"  ,       x "AF_BLUETOOTH",       x "IUCV"        , \
+  x "AF_RXRPC" ,       x "AF_ISDN"     ,       x "AF_PHONET"   , \
+  x "AF_IEEE802154",   x "AF_CAIF"     ,       x "AF_ALG"      , \
+  x "AF_NFC"   ,       x "AF_VSOCK"    ,       x "AF_KCM"      , \
+  x "AF_QIPCRTR",      x "AF_SMC"      ,       x "AF_MAX"
+
  static const char *const af_family_key_strings[AF_MAX+1] = {
-  "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
-  "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
-  "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
-  "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
-  "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
-  "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
-  "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
-  "sk_lock-AF_RDS"   , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
-  "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
-  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
-  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
-  "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
-  "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG"      ,
-  "sk_lock-AF_NFC"   , "sk_lock-AF_VSOCK"    , "sk_lock-AF_KCM"      ,
-  "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC"     , "sk_lock-AF_MAX"
+       _sock_locks("sk_lock-")
  };
  static const char *const af_family_slock_key_strings[AF_MAX+1] = {
-  "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
-  "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
-  "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
-  "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
-  "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
-  "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
-  "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
-  "slock-AF_RDS"   , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
-  "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
-  "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
-  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
-  "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
-  "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
-  "slock-AF_NFC"   , "slock-AF_VSOCK"    ,"slock-AF_KCM"       ,
-  "slock-AF_QIPCRTR", "slock-AF_SMC"     , "slock-AF_MAX"
+       _sock_locks("slock-")
  };
  static const char *const af_family_clock_key_strings[AF_MAX+1] = {
-  "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
-  "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
-  "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
-  "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
-  "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
-  "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
-  "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
-  "clock-AF_RDS"   , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
-  "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
-  "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
-  "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
-  "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
-  "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG"      ,
-  "clock-AF_NFC"   , "clock-AF_VSOCK"    , "clock-AF_KCM"      ,
-  "clock-AF_QIPCRTR", "clock-AF_SMC"     , "clock-AF_MAX"
+       _sock_locks("clock-")
+};
+
+static const char *const af_family_kern_key_strings[AF_MAX+1] = {
+       _sock_locks("k-sk_lock-")
+};
+static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
+       _sock_locks("k-slock-")
+};
+static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
+       _sock_locks("k-clock-")
  };
  
  /*
@@ -264,6 +253,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
   * so split the lock classes by using a per-AF key:
   */
  static struct lock_class_key af_callback_keys[AF_MAX];
+static struct lock_class_key af_kern_callback_keys[AF_MAX];
  
  /* Take into consideration the size of the struct sk_buff overhead in the
   * determination of these values, since that is non-constant across
@@ -1293,7 +1283,16 @@ lenout:
   */
  static inline void sock_lock_init(struct sock *sk)
  {
-       sock_lock_init_class_and_name(sk,
+       if (sk->sk_kern_sock)
+               sock_lock_init_class_and_name(
+                       sk,
+                       af_family_kern_slock_key_strings[sk->sk_family],
+                       af_family_kern_slock_keys + sk->sk_family,
+                       af_family_kern_key_strings[sk->sk_family],
+                       af_family_kern_keys + sk->sk_family);
+       else
+               sock_lock_init_class_and_name(
+                       sk,
                         af_family_slock_key_strings[sk->sk_family],
                         af_family_slock_keys + sk->sk_family,
                         af_family_key_strings[sk->sk_family],
@@ -1399,6 +1398,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                  * why we need sk_prot_creator -acme
                  */
                 sk->sk_prot = sk->sk_prot_creator = prot;
+               sk->sk_kern_sock = kern;
                 sock_lock_init(sk);
                 sk->sk_net_refcnt = kern ? 0 : 1;
                 if (likely(sk->sk_net_refcnt))
@@ -1442,6 +1442,11 @@ static void __sk_destruct(struct rcu_head *head)
                 pr_debug("%s: optmem leakage (%d bytes) detected\n",
                          __func__, atomic_read(&sk->sk_omem_alloc));
  
+       if (sk->sk_frag.page) {
+               put_page(sk->sk_frag.page);
+               sk->sk_frag.page = NULL;
+       }
+
         if (sk->sk_peer_cred)
                 put_cred(sk->sk_peer_cred);
         put_pid(sk->sk_peer_pid);
@@ -1539,6 +1544,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                         is_charged = sk_filter_charge(newsk, filter);
  
                 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
+                       /* We need to make sure that we don't uncharge the new
+                        * socket if we couldn't charge it in the first place
+                        * as otherwise we uncharge the parent's filter.
+                        */
+                       if (!is_charged)
+                               RCU_INIT_POINTER(newsk->sk_filter, NULL);
                         sk_free_unlock_clone(newsk);
                         newsk = NULL;
                         goto out;
@@ -2277,7 +2288,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
  }
  EXPORT_SYMBOL(sock_no_socketpair);
  
-int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
+                  bool kern)
  {
         return -EOPNOTSUPP;
  }
@@ -2481,7 +2493,14 @@ void sock_init_data(struct socket *sock, struct sock *sk)
         }
  
         rwlock_init(&sk->sk_callback_lock);
-       lockdep_set_class_and_name(&sk->sk_callback_lock,
+       if (sk->sk_kern_sock)
+               lockdep_set_class_and_name(
+                       &sk->sk_callback_lock,
+                       af_kern_callback_keys + sk->sk_family,
+                       af_family_kern_clock_key_strings[sk->sk_family]);
+       else
+               lockdep_set_class_and_name(
+                       &sk->sk_callback_lock,
                         af_callback_keys + sk->sk_family,
                         af_family_clock_key_strings[sk->sk_family]);
  
@@ -2779,11 +2798,6 @@ void sk_common_release(struct sock *sk)
  
         sk_refcnt_debug_release(sk);
  
-       if (sk->sk_frag.page) {
-               put_page(sk->sk_frag.page);
-               sk->sk_frag.page = NULL;
-       }
-
         sock_put(sk);
  }
  EXPORT_SYMBOL(sk_common_release);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c

index 4ead336e14ea0b8fc5fdcf8e679da54dfca0716b..7f9cc400eca08c01c9014476aa4daf0852505b20 100644 (file)
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -408,14 +408,16 @@ static struct ctl_table net_core_table[] = {
                 .data           = &sysctl_net_busy_poll,
                 .maxlen         = sizeof(unsigned int),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
         },
         {
                 .procname       = "busy_read",
                 .data           = &sysctl_net_busy_read,
                 .maxlen         = sizeof(unsigned int),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
         },
  #endif
  #ifdef CONFIG_NET_SCHED
diff --git a/net/core/utils.c b/net/core/utils.c

index 6592d7bbed394086a8ba8efcb370fb1d75db4449..32c467cf52d65605ef63d63c6c5007636a7f65e5 100644 (file)
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -26,9 +26,11 @@
  #include <linux/percpu.h>
  #include <linux/init.h>
  #include <linux/ratelimit.h>
+#include <linux/socket.h>
  
  #include <net/sock.h>
  #include <net/net_ratelimit.h>
+#include <net/ipv6.h>
  
  #include <asm/byteorder.h>
  #include <linux/uaccess.h>
@@ -300,6 +302,107 @@ out:
  }
  EXPORT_SYMBOL(in6_pton);
  
+static int inet4_pton(const char *src, u16 port_num,
+               struct sockaddr_storage *addr)
+{
+       struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
+       int srclen = strlen(src);
+
+       if (srclen > INET_ADDRSTRLEN)
+               return -EINVAL;
+
+       if (in4_pton(src, srclen, (u8 *)&addr4->sin_addr.s_addr,
+                    '\n', NULL) == 0)
+               return -EINVAL;
+
+       addr4->sin_family = AF_INET;
+       addr4->sin_port = htons(port_num);
+
+       return 0;
+}
+
+static int inet6_pton(struct net *net, const char *src, u16 port_num,
+               struct sockaddr_storage *addr)
+{
+       struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
+       const char *scope_delim;
+       int srclen = strlen(src);
+
+       if (srclen > INET6_ADDRSTRLEN)
+               return -EINVAL;
+
+       if (in6_pton(src, srclen, (u8 *)&addr6->sin6_addr.s6_addr,
+                    '%', &scope_delim) == 0)
+               return -EINVAL;
+
+       if (ipv6_addr_type(&addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL &&
+           src + srclen != scope_delim && *scope_delim == '%') {
+               struct net_device *dev;
+               char scope_id[16];
+               size_t scope_len = min_t(size_t, sizeof(scope_id) - 1,
+                                        src + srclen - scope_delim - 1);
+
+               memcpy(scope_id, scope_delim + 1, scope_len);
+               scope_id[scope_len] = '\0';
+
+               dev = dev_get_by_name(net, scope_id);
+               if (dev) {
+                       addr6->sin6_scope_id = dev->ifindex;
+                       dev_put(dev);
+               } else if (kstrtouint(scope_id, 0, &addr6->sin6_scope_id)) {
+                       return -EINVAL;
+               }
+       }
+
+       addr6->sin6_family = AF_INET6;
+       addr6->sin6_port = htons(port_num);
+
+       return 0;
+}
+
+/**
+ * inet_pton_with_scope - convert an IPv4/IPv6 and port to socket address
+ * @net: net namespace (used for scope handling)
+ * @af: address family, AF_INET, AF_INET6 or AF_UNSPEC for either
+ * @src: the start of the address string
+ * @port: the start of the port string (or NULL for none)
+ * @addr: output socket address
+ *
+ * Return zero on success, return errno when any error occurs.
+ */
+int inet_pton_with_scope(struct net *net, __kernel_sa_family_t af,
+               const char *src, const char *port, struct sockaddr_storage *addr)
+{
+       u16 port_num;
+       int ret = -EINVAL;
+
+       if (port) {
+               if (kstrtou16(port, 0, &port_num))
+                       return -EINVAL;
+       } else {
+               port_num = 0;
+       }
+
+       switch (af) {
+       case AF_INET:
+               ret = inet4_pton(src, port_num, addr);
+               break;
+       case AF_INET6:
+               ret = inet6_pton(net, src, port_num, addr);
+               break;
+       case AF_UNSPEC:
+               ret = inet4_pton(src, port_num, addr);
+               if (ret)
+                       ret = inet6_pton(net, src, port_num, addr);
+               break;
+       default:
+               pr_err("unexpected address family %d\n", af);
+       };
+
+       return ret;
+}
+EXPORT_SYMBOL(inet_pton_with_scope);
+
  void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
                               __be32 from, __be32 to, bool pseudohdr)
  {
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c

index f053198e730c48c7ea8114706c3d4904228f41fb..5e3a7302f7747e4c4f3134eacab2f2c65b13402f 100644 (file)
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
         for (i = 0; i < hc->tx_seqbufc; i++)
                 kfree(hc->tx_seqbuf[i]);
         hc->tx_seqbufc = 0;
+       dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
  }
  
  static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c

index 409d0cfd34474812c3bf74f26cd423a3d65ee441..b99168b0fabf2a8c65defdd0b93d362630774e1a 100644 (file)
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
  
         switch (type) {
         case ICMP_REDIRECT:
-               dccp_do_redirect(skb, sk);
+               if (!sock_owned_by_user(sk))
+                       dccp_do_redirect(skb, sk);
                 goto out;
         case ICMP_SOURCE_QUENCH:
                 /* Just silently ignore these. */
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c

index 233b57367758c64c09ed40f7359cb8fcb1918d93..d9b6a4e403e701fd9b9ecf92bac496e45570054e 100644 (file)
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
         np = inet6_sk(sk);
  
         if (type == NDISC_REDIRECT) {
-               struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+               if (!sock_owned_by_user(sk)) {
+                       struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
  
-               if (dst)
-                       dst->ops->redirect(dst, sk, skb);
+                       if (dst)
+                               dst->ops->redirect(dst, sk, skb);
+               }
                 goto out;
         }
  
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c

index e267e6f4c9a5566b369a03a600a408e5bd41cbad..abd07a443219853b022bef41cb072e90ff8f07f0 100644 (file)
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -142,6 +142,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
         struct dccp_request_sock *dreq = dccp_rsk(req);
         bool own_req;
  
+       /* TCP/DCCP listeners became lockless.
+        * DCCP stores complex state in its request_sock, so we need
+        * a protection for them, now this code runs without being protected
+        * by the parent (listener) lock.
+        */
+       spin_lock_bh(&dreq->dreq_lock);
+
         /* Check for retransmitted REQUEST */
         if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
  
@@ -156,7 +163,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
                         inet_rtx_syn_ack(sk, req);
                 }
                 /* Network Duplicate, discard packet */
-               return NULL;
+               goto out;
         }
  
         DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
@@ -182,20 +189,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
  
         child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
                                                          req, &own_req);
-       if (!child)
-               goto listen_overflow;
-
-       return inet_csk_complete_hashdance(sk, child, req, own_req);
+       if (child) {
+               child = inet_csk_complete_hashdance(sk, child, req, own_req);
+               goto out;
+       }
  
-listen_overflow:
-       dccp_pr_debug("listen_overflow!\n");
         DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
  drop:
         if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
                 req->rsk_ops->send_reset(sk, skb);
  
         inet_csk_reqsk_queue_drop(sk, req);
-       return NULL;
+out:
+       spin_unlock_bh(&dreq->dreq_lock);
+       return child;
  }
  
  EXPORT_SYMBOL_GPL(dccp_check_req);
@@ -246,6 +253,7 @@ int dccp_reqsk_init(struct request_sock *req,
  {
         struct dccp_request_sock *dreq = dccp_rsk(req);
  
+       spin_lock_init(&dreq->dreq_lock);
         inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport;
         inet_rsk(req)->ir_num      = ntohs(dccp_hdr(skb)->dccph_dport);
         inet_rsk(req)->acked       = 0;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c

index e6e79eda97636df6f6e0e6e914405381c3efeaaa..7de5b40a5d0d1245ad995877f779e0d87d1cf398 100644 (file)
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1070,7 +1070,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
         return skb == NULL ? ERR_PTR(err) : skb;
  }
  
-static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
+static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
+                    bool kern)
  {
         struct sock *sk = sock->sk, *newsk;
         struct sk_buff *skb = NULL;
@@ -1099,7 +1100,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
  
         cb = DN_SKB_CB(skb);
         sk->sk_ack_backlog--;
-       newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0);
+       newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
         if (newsk == NULL) {
                 release_sock(sk);
                 kfree_skb(skb);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

index 602d40f43687c91db7250822439bacbe85318fa3..13a9a3297eae3ac48a77214e9365657202d44f08 100644 (file)
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -689,11 +689,12 @@ EXPORT_SYMBOL(inet_stream_connect);
   *     Accept a pending connection. The TCP layer now gives BSD semantics.
   */
  
-int inet_accept(struct socket *sock, struct socket *newsock, int flags)
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+               bool kern)
  {
         struct sock *sk1 = sock->sk;
         int err = -EINVAL;
-       struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
+       struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern);
  
         if (!sk2)
                 goto do_err;
@@ -1342,6 +1343,9 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
         if (*(u8 *)iph != 0x45)
                 goto out_unlock;
  
+       if (ip_is_fragment(iph))
+               goto out_unlock;
+
         if (unlikely(ip_fast_csum((u8 *)iph, 5)))
                 goto out_unlock;
  
@@ -1487,8 +1491,10 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
         int proto = iph->protocol;
         int err = -ENOSYS;
  
-       if (skb->encapsulation)
+       if (skb->encapsulation) {
+               skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
                 skb_set_inner_network_header(skb, nhoff);
+       }
  
         csum_replace2(&iph->check, iph->tot_len, newlen);
         iph->tot_len = newlen;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c

index 42bfd08109dd78ab509493e8d2205d72845bb3eb..8f2133ffc2ff1b94871408a5f934cb938d3462b5 100644 (file)
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1083,7 +1083,8 @@ static void nl_fib_input(struct sk_buff *skb)
  
         net = sock_net(skb->sk);
         nlh = nlmsg_hdr(skb);
-       if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
+       if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
+           skb->len < nlh->nlmsg_len ||
             nlmsg_len(nlh) < sizeof(*frn))
                 return;
  
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c

index b4d5980ade3b584c444d0f0c6523f03a2f71f884..5e313c1ac94fc88eca5fe3a0e9e46e551e955ff0 100644 (file)
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -424,7 +424,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
  /*
   * This will accept the next outstanding connection.
   */
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
  {
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct request_sock_queue *queue = &icsk->icsk_accept_queue;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c

index bbe7f72db9c157ba2d6c5292637c2f58ad39a123..b3cdeec85f1f2c612c362590e828f50596a5c247 100644 (file)
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -198,6 +198,7 @@ static void ip_expire(unsigned long arg)
         qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
         net = container_of(qp->q.net, struct net, ipv4.frags);
  
+       rcu_read_lock();
         spin_lock(&qp->q.lock);
  
         if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -207,7 +208,7 @@ static void ip_expire(unsigned long arg)
         __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
  
         if (!inet_frag_evicting(&qp->q)) {
-               struct sk_buff *head = qp->q.fragments;
+               struct sk_buff *clone, *head = qp->q.fragments;
                 const struct iphdr *iph;
                 int err;
  
@@ -216,32 +217,40 @@ static void ip_expire(unsigned long arg)
                 if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
                         goto out;
  
-               rcu_read_lock();
                 head->dev = dev_get_by_index_rcu(net, qp->iif);
                 if (!head->dev)
-                       goto out_rcu_unlock;
+                       goto out;
+
  
                 /* skb has no dst, perform route lookup again */
                 iph = ip_hdr(head);
                 err = ip_route_input_noref(head, iph->daddr, iph->saddr,
                                            iph->tos, head->dev);
                 if (err)
-                       goto out_rcu_unlock;
+                       goto out;
  
                 /* Only an end host needs to send an ICMP
                  * "Fragment Reassembly Timeout" message, per RFC792.
                  */
                 if (frag_expire_skip_icmp(qp->user) &&
                     (skb_rtable(head)->rt_type != RTN_LOCAL))
-                       goto out_rcu_unlock;
+                       goto out;
+
+               clone = skb_clone(head, GFP_ATOMIC);
  
                 /* Send an ICMP "Fragment Reassembly Timeout" message. */
-               icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-out_rcu_unlock:
-               rcu_read_unlock();
+               if (clone) {
+                       spin_unlock(&qp->q.lock);
+                       icmp_send(clone, ICMP_TIME_EXCEEDED,
+                                 ICMP_EXC_FRAGTIME, 0);
+                       consume_skb(clone);
+                       goto out_rcu_unlock;
+               }
         }
  out:
         spin_unlock(&qp->q.lock);
+out_rcu_unlock:
+       rcu_read_unlock();
         ipq_put(qp);
  }
  
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c

index 737ce826d7ecfa040d07d7f8e8d6dedd01ca7330..7a3fd25e8913a99d0fcbb256bc9001f6f1d4dd6f 100644 (file)
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -966,7 +966,7 @@ static int __ip_append_data(struct sock *sk,
         cork->length += length;
         if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
             (sk->sk_protocol == IPPROTO_UDP) &&
-           (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+           (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
             (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
                 err = ip_ufo_append_data(sk, queue, getfrag, from, length,
                                          hh_len, fragheaderlen, transhdrlen,
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c

index ebd953bc5607f3b25fffddcb26a5c65e5490cb2b..1d46d05efb0ff067c35750ac43be8e7babd60446 100644 (file)
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -488,16 +488,15 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
                 return false;
  
         /* Support IP_PKTINFO on tstamp packets if requested, to correlate
-        * timestamp with egress dev. Not possible for packets without dev
+        * timestamp with egress dev. Not possible for packets without iif
          * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
          */
-       if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
-           (!skb->dev))
+       info = PKTINFO_SKB_CB(skb);
+       if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+           !info->ipi_ifindex)
                 return false;
  
-       info = PKTINFO_SKB_CB(skb);
         info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
-       info->ipi_ifindex = skb->dev->ifindex;
         return true;
  }
  
@@ -591,6 +590,7 @@ static bool setsockopt_needs_rtnl(int optname)
         case MCAST_LEAVE_GROUP:
         case MCAST_LEAVE_SOURCE_GROUP:
         case MCAST_UNBLOCK_SOURCE:
+       case IP_ROUTER_ALERT:
                 return true;
         }
         return false;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c

index fd9f34bbd7408a0e9b0342ec6512c69cc30edc39..dfb2ab2dd3c84d93b8d77df41d1160d26f162fc3 100644 (file)
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -306,7 +306,7 @@ static void __init ic_close_devs(void)
         while ((d = next)) {
                 next = d->next;
                 dev = d->dev;
-               if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) {
+               if (d != ic_dev && !netdev_uses_dsa(dev)) {
                         pr_debug("IP-Config: Downing %s\n", dev->name);
                         dev_change_flags(dev, d->flags);
                 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c

index c0317c940bcdc303015f500b52198e0862440e17..b036e85e093b3e97cee1b0dbffc8d1dfeb6a2b72 100644 (file)
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1278,7 +1278,7 @@ static void mrtsock_destruct(struct sock *sk)
         struct net *net = sock_net(sk);
         struct mr_table *mrt;
  
-       rtnl_lock();
+       ASSERT_RTNL();
         ipmr_for_each_table(mrt, net) {
                 if (sk == rtnl_dereference(mrt->mroute_sk)) {
                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
@@ -1289,7 +1289,6 @@ static void mrtsock_destruct(struct sock *sk)
                         mroute_clean_tables(mrt, false);
                 }
         }
-       rtnl_unlock();
  }
  
  /* Socket options and virtual interface manipulation. The whole
@@ -1353,13 +1352,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
                 if (sk != rcu_access_pointer(mrt->mroute_sk)) {
                         ret = -EACCES;
                 } else {
-                       /* We need to unlock here because mrtsock_destruct takes
-                        * care of rtnl itself and we can't change that due to
-                        * the IP_ROUTER_ALERT setsockopt which runs without it.
-                        */
-                       rtnl_unlock();
                         ret = ip_ra_control(sk, 0, NULL);
-                       goto out;
+                       goto out_unlock;
                 }
                 break;
         case MRT_ADD_VIF:
@@ -1470,7 +1464,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
         }
  out_unlock:
         rtnl_unlock();
-out:
         return ret;
  }
  
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c

index 52f26459efc345a8a0c00d356306fb5fd398547e..9b8841316e7b94e375cc52d0dfd7f9fe89205195 100644 (file)
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -461,7 +461,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
  
         clusterip_config_put(cipinfo->config);
  
-       nf_ct_netns_get(par->net, par->family);
+       nf_ct_netns_put(par->net, par->family);
  }
  
  #ifdef CONFIG_COMPAT
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c

index bc1486f2c0643355ddac067cb79f075cafd788d1..2e14ed11a35cfc83db845e972521b2e8894f97c6 100644 (file)
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -165,6 +165,10 @@ static unsigned int ipv4_conntrack_local(void *priv,
         if (skb->len < sizeof(struct iphdr) ||
             ip_hdrlen(skb) < sizeof(struct iphdr))
                 return NF_ACCEPT;
+
+       if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
+               return NF_ACCEPT;
+
         return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
  }
  
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c

index f8aad03d674b05008edb5b9883b3a26b2fa7461f..6f5e8d01b876933a68e5f6cf8b2a48f8c4e17262 100644 (file)
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -255,11 +255,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
         /* maniptype == SRC for postrouting. */
         enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
  
-       /* We never see fragments: conntrack defrags on pre-routing
-        * and local-out, and nf_nat_out protects post-routing.
-        */
-       NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
-
         ct = nf_ct_get(skb, &ctinfo);
         /* Can't track?  It's not due to stress, or conntrack would
          * have dropped it.  Hence it's the user's responsibilty to
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c

index c9b52c361da2e6acc746c2de86d8c7f3af0a9b39..53e49f5011d3ce482c5180edc47da7928db0f224 100644 (file)
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = {
         .timeout        = 180,
  };
  
-static struct nf_conntrack_helper snmp_helper __read_mostly = {
-       .me                     = THIS_MODULE,
-       .help                   = help,
-       .expect_policy          = &snmp_exp_policy,
-       .name                   = "snmp",
-       .tuple.src.l3num        = AF_INET,
-       .tuple.src.u.udp.port   = cpu_to_be16(SNMP_PORT),
-       .tuple.dst.protonum     = IPPROTO_UDP,
-};
-
  static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
         .me                     = THIS_MODULE,
         .help                   = help,
@@ -1288,22 +1278,16 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
  
  static int __init nf_nat_snmp_basic_init(void)
  {
-       int ret = 0;
-
         BUG_ON(nf_nat_snmp_hook != NULL);
         RCU_INIT_POINTER(nf_nat_snmp_hook, help);
  
-       ret = nf_conntrack_helper_register(&snmp_trap_helper);
-       if (ret < 0) {
-               nf_conntrack_helper_unregister(&snmp_helper);
-               return ret;
-       }
-       return ret;
+       return nf_conntrack_helper_register(&snmp_trap_helper);
  }
  
  static void __exit nf_nat_snmp_basic_fini(void)
  {
         RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+       synchronize_rcu();
         nf_conntrack_helper_unregister(&snmp_trap_helper);
  }
  
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c

index a0ea8aad1bf150bcb9e8e0aa2e6b45a5347599e4..f18677277119305aeea043d81deb4e6ee7d20b7c 100644 (file)
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -26,10 +26,10 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
         memset(&range, 0, sizeof(range));
         range.flags = priv->flags;
         if (priv->sreg_proto_min) {
-               range.min_proto.all =
-                       *(__be16 *)&regs->data[priv->sreg_proto_min];
-               range.max_proto.all =
-                       *(__be16 *)&regs->data[priv->sreg_proto_max];
+               range.min_proto.all = (__force __be16)nft_reg_load16(
+                       &regs->data[priv->sreg_proto_min]);
+               range.max_proto.all = (__force __be16)nft_reg_load16(
+                       &regs->data[priv->sreg_proto_max]);
         }
         regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
                                                     &range, nft_out(pkt));
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c

index 1650ed23c15dd00bb8e4bd741dc2d02d6cbf2c4e..5120be1d31185dd5c879419f8889d36ddb363591 100644 (file)
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -26,10 +26,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
  
         memset(&mr, 0, sizeof(mr));
         if (priv->sreg_proto_min) {
-               mr.range[0].min.all =
-                       *(__be16 *)&regs->data[priv->sreg_proto_min];
-               mr.range[0].max.all =
-                       *(__be16 *)&regs->data[priv->sreg_proto_max];
+               mr.range[0].min.all = (__force __be16)nft_reg_load16(
+                       &regs->data[priv->sreg_proto_min]);
+               mr.range[0].max.all = (__force __be16)nft_reg_load16(
+                       &regs->data[priv->sreg_proto_max]);
                 mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
         }
  
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c

index 2af6244b83e27ae384e96cf071c10c5a89674804..ccfbce13a6333a65dab64e4847dd510dfafb1b43 100644 (file)
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -156,17 +156,18 @@ int ping_hash(struct sock *sk)
  void ping_unhash(struct sock *sk)
  {
         struct inet_sock *isk = inet_sk(sk);
+
         pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
+       write_lock_bh(&ping_table.lock);
         if (sk_hashed(sk)) {
-               write_lock_bh(&ping_table.lock);
                 hlist_nulls_del(&sk->sk_nulls_node);
                 sk_nulls_node_init(&sk->sk_nulls_node);
                 sock_put(sk);
                 isk->inet_num = 0;
                 isk->inet_sport = 0;
                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-               write_unlock_bh(&ping_table.lock);
         }
+       write_unlock_bh(&ping_table.lock);
  }
  EXPORT_SYMBOL_GPL(ping_unhash);
  
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c

index 8119e1f66e036ad2a8372bf24dd943c7d9631d8e..9d943974de2b6d91c56b2ae2dee0019883f8f3cf 100644 (file)
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -682,7 +682,9 @@ static void raw_close(struct sock *sk, long timeout)
         /*
          * Raw sockets may have direct kernel references. Kill them.
          */
+       rtnl_lock();
         ip_ra_control(sk, 0, NULL);
+       rtnl_unlock();
  
         sk_common_release(sk);
  }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c

index 8471dd116771462d149e1da2807e446b69b74bcc..d9724889ff09077aa88c98ec3e170dcfdb91d29b 100644 (file)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2359,7 +2359,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
                 }
  
                 /* L3 master device is the loopback for that domain */
-               dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
+               dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(res)) ? :
+                       net->loopback_dev;
                 fl4->flowi4_oif = dev_out->ifindex;
                 flags |= RTCF_LOCAL;
                 goto make_route;
@@ -2620,7 +2621,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
         skb_reset_network_header(skb);
  
         /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
-       ip_hdr(skb)->protocol = IPPROTO_ICMP;
+       ip_hdr(skb)->protocol = IPPROTO_UDP;
         skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
  
         src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c

index cf4555581282c608f920254078264e36e18584c6..40ba4249a58677671b68bf495f32f15b7c5f62d7 100644 (file)
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2322,6 +2322,7 @@ int tcp_disconnect(struct sock *sk, int flags)
         tcp_init_send_head(sk);
         memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
         __sk_dst_reset(sk);
+       tcp_saved_syn_free(tp);
  
         /* Clean up fastopen related fields */
         tcp_free_fastopen_req(tp);
@@ -2770,7 +2771,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
  {
         const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
         const struct inet_connection_sock *icsk = inet_csk(sk);
-       u32 now = tcp_time_stamp, intv;
+       u32 now, intv;
         u64 rate64;
         bool slow;
         u32 rate;
@@ -2839,6 +2840,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
         info->tcpi_retrans = tp->retrans_out;
         info->tcpi_fackets = tp->fackets_out;
  
+       now = tcp_time_stamp;
         info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
         info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
         info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c

index 79c4817abc94d08265edb2dfa995e3e479148a16..6e3c512054a60715e8e2d16ffedd12cba6a3d2d9 100644 (file)
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -168,12 +168,8 @@ void tcp_assign_congestion_control(struct sock *sk)
         }
  out:
         rcu_read_unlock();
+       memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
  
-       /* Clear out private data before diag gets it and
-        * the ca has not been initialized.
-        */
-       if (ca->get_info)
-               memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
         if (ca->flags & TCP_CONG_NEEDS_ECN)
                 INET_ECN_xmit(sk);
         else
@@ -200,11 +196,10 @@ static void tcp_reinit_congestion_control(struct sock *sk,
         tcp_cleanup_congestion_control(sk);
         icsk->icsk_ca_ops = ca;
         icsk->icsk_ca_setsockopt = 1;
+       memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
  
-       if (sk->sk_state != TCP_CLOSE) {
-               memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+       if (sk->sk_state != TCP_CLOSE)
                 tcp_init_congestion_control(sk);
-       }
  }
  
  /* Manage refcounts on socket close. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 39c393cc0fd3c17130cd5d8d8b37f31ad3aeafd9..659d1baefb2bba36d96e412eb7ca5a02996fb6dd 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -126,7 +126,8 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
  #define REXMIT_LOST    1 /* retransmit packets marked lost */
  #define REXMIT_NEW     2 /* FRTO-style transmit of unsent/new packets */
  
-static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb)
+static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
+                            unsigned int len)
  {
         static bool __once __read_mostly;
  
@@ -137,8 +138,9 @@ static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb)
  
                 rcu_read_lock();
                 dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
-               pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
-                       dev ? dev->name : "Unknown driver");
+               if (!dev || len >= dev->mtu)
+                       pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
+                               dev ? dev->name : "Unknown driver");
                 rcu_read_unlock();
         }
  }
@@ -161,8 +163,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
         if (len >= icsk->icsk_ack.rcv_mss) {
                 icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
                                                tcp_sk(sk)->advmss);
-               if (unlikely(icsk->icsk_ack.rcv_mss != len))
-                       tcp_gro_dev_warn(sk, skb);
+               /* Account for possibly-removed options */
+               if (unlikely(len > icsk->icsk_ack.rcv_mss +
+                                  MAX_TCP_OPTION_SPACE))
+                       tcp_gro_dev_warn(sk, skb, len);
         } else {
                 /* Otherwise, we make more careful check taking into account,
                  * that SACKs block is variable.
@@ -874,22 +878,11 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
                                   const int ts)
  {
         struct tcp_sock *tp = tcp_sk(sk);
-       if (metric > tp->reordering) {
-               int mib_idx;
+       int mib_idx;
  
+       if (metric > tp->reordering) {
                 tp->reordering = min(sysctl_tcp_max_reordering, metric);
  
-               /* This exciting event is worth to be remembered. 8) */
-               if (ts)
-                       mib_idx = LINUX_MIB_TCPTSREORDER;
-               else if (tcp_is_reno(tp))
-                       mib_idx = LINUX_MIB_TCPRENOREORDER;
-               else if (tcp_is_fack(tp))
-                       mib_idx = LINUX_MIB_TCPFACKREORDER;
-               else
-                       mib_idx = LINUX_MIB_TCPSACKREORDER;
-
-               NET_INC_STATS(sock_net(sk), mib_idx);
  #if FASTRETRANS_DEBUG > 1
                 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
                          tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -902,6 +895,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
         }
  
         tp->rack.reord = 1;
+
+       /* This exciting event is worth to be remembered. 8) */
+       if (ts)
+               mib_idx = LINUX_MIB_TCPTSREORDER;
+       else if (tcp_is_reno(tp))
+               mib_idx = LINUX_MIB_TCPRENOREORDER;
+       else if (tcp_is_fack(tp))
+               mib_idx = LINUX_MIB_TCPFACKREORDER;
+       else
+               mib_idx = LINUX_MIB_TCPSACKREORDER;
+
+       NET_INC_STATS(sock_net(sk), mib_idx);
  }
  
  /* This must be called before lost_out is incremented */
@@ -1930,6 +1935,7 @@ void tcp_enter_loss(struct sock *sk)
         struct tcp_sock *tp = tcp_sk(sk);
         struct net *net = sock_net(sk);
         struct sk_buff *skb;
+       bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
         bool is_reneg;                  /* is receiver reneging on SACKs? */
         bool mark_lost;
  
@@ -1989,15 +1995,18 @@ void tcp_enter_loss(struct sock *sk)
         tp->high_seq = tp->snd_nxt;
         tcp_ecn_queue_cwr(tp);
  
-       /* F-RTO RFC5682 sec 3.1 step 1 mandates to disable F-RTO
-        * if a previous recovery is underway, otherwise it may incorrectly
-        * call a timeout spurious if some previously retransmitted packets
-        * are s/acked (sec 3.2). We do not apply that retriction since
-        * retransmitted skbs are permanently tagged with TCPCB_EVER_RETRANS
-        * so FLAG_ORIG_SACK_ACKED is always correct. But we do disable F-RTO
-        * on PTMU discovery to avoid sending new data.
+       /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
+        * loss recovery is underway except recurring timeout(s) on
+        * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+        *
+        * In theory F-RTO can be used repeatedly during loss recovery.
+        * In practice this interacts badly with broken middle-boxes that
+        * falsely raise the receive window, which results in repeated
+        * timeouts and stop-and-go behavior.
          */
-       tp->frto = sysctl_tcp_frto && !inet_csk(sk)->icsk_mtup.probe_size;
+       tp->frto = sysctl_tcp_frto &&
+                  (new_recovery || icsk->icsk_retransmits) &&
+                  !inet_csk(sk)->icsk_mtup.probe_size;
  }
  
  /* If ACK arrived pointing to a remembered SACK, it means that our
@@ -5541,6 +5550,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
         struct inet_connection_sock *icsk = inet_csk(sk);
  
         tcp_set_state(sk, TCP_ESTABLISHED);
+       icsk->icsk_ack.lrcvtime = tcp_time_stamp;
  
         if (skb) {
                 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -5759,7 +5769,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                          * to stand against the temptation 8)     --ANK
                          */
                         inet_csk_schedule_ack(sk);
-                       icsk->icsk_ack.lrcvtime = tcp_time_stamp;
                         tcp_enter_quickack_mode(sk);
                         inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
                                                   TCP_DELACK_MAX, TCP_RTO_MAX);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index 9a89b8deafae1e9b2e8d1d9bc211c9c30b8dd8ec..575e19dcc01763ef3fa938dea3ea51995b573163 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -279,10 +279,13 @@ EXPORT_SYMBOL(tcp_v4_connect);
   */
  void tcp_v4_mtu_reduced(struct sock *sk)
  {
-       struct dst_entry *dst;
         struct inet_sock *inet = inet_sk(sk);
-       u32 mtu = tcp_sk(sk)->mtu_info;
+       struct dst_entry *dst;
+       u32 mtu;
  
+       if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+               return;
+       mtu = tcp_sk(sk)->mtu_info;
         dst = inet_csk_update_pmtu(sk, mtu);
         if (!dst)
                 return;
@@ -428,7 +431,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
  
         switch (type) {
         case ICMP_REDIRECT:
-               do_redirect(icmp_skb, sk);
+               if (!sock_owned_by_user(sk))
+                       do_redirect(icmp_skb, sk);
                 goto out;
         case ICMP_SOURCE_QUENCH:
                 /* Just silently ignore these. */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c

index 7e16243cdb58c830f869fe483730e86400e2eb00..65c0f3d13eca47c6394c09925decf54287d01b48 100644 (file)
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -460,6 +460,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
                 newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
                 minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
                 newicsk->icsk_rto = TCP_TIMEOUT_INIT;
+               newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
  
                 newtp->packets_out = 0;
                 newtp->retrans_out = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index 22548b5f05cbe5a655e0c53df2d31c5cc2e8a702..a85d863c44196e60fd22e25471cf773e72d2c133 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1267,7 +1267,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
   * eventually). The difference is that pulled data not copied, but
   * immediately discarded.
   */
-static void __pskb_trim_head(struct sk_buff *skb, int len)
+static int __pskb_trim_head(struct sk_buff *skb, int len)
  {
         struct skb_shared_info *shinfo;
         int i, k, eat;
@@ -1277,7 +1277,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
                 __skb_pull(skb, eat);
                 len -= eat;
                 if (!len)
-                       return;
+                       return 0;
         }
         eat = len;
         k = 0;
@@ -1303,23 +1303,28 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
         skb_reset_tail_pointer(skb);
         skb->data_len -= len;
         skb->len = skb->data_len;
+       return len;
  }
  
  /* Remove acked data from a packet in the transmit queue. */
  int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
  {
+       u32 delta_truesize;
+
         if (skb_unclone(skb, GFP_ATOMIC))
                 return -ENOMEM;
  
-       __pskb_trim_head(skb, len);
+       delta_truesize = __pskb_trim_head(skb, len);
  
         TCP_SKB_CB(skb)->seq += len;
         skb->ip_summed = CHECKSUM_PARTIAL;
  
-       skb->truesize        -= len;
-       sk->sk_wmem_queued   -= len;
-       sk_mem_uncharge(sk, len);
-       sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
+       if (delta_truesize) {
+               skb->truesize      -= delta_truesize;
+               sk->sk_wmem_queued -= delta_truesize;
+               sk_mem_uncharge(sk, delta_truesize);
+               sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
+       }
  
         /* Any change of skb->len requires recalculation of tso factor. */
         if (tcp_skb_pcount(skb) > 1)
@@ -2999,6 +3004,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
  {
         struct sk_buff *skb;
  
+       TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
+
         /* NOTE: No TCP options attached and we never retransmit this. */
         skb = alloc_skb(MAX_TCP_HEADER, priority);
         if (!skb) {
@@ -3014,8 +3021,6 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
         /* Send it off. */
         if (tcp_transmit_skb(sk, skb, 0, priority))
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
-
-       TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
  }
  
  /* Send a crossed SYN-ACK during socket establishment.
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c

index 4ecb38ae85042db7fa59e1aa6c74c9c3da0b1099..d8acbd9f477a2ac6b0f8eee1bf59f3ab43abff07 100644 (file)
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -12,7 +12,8 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
                 /* Account for retransmits that are lost again */
                 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                 tp->retrans_out -= tcp_skb_pcount(skb);
-               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
+               NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
+                             tcp_skb_pcount(skb));
         }
  }
  
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c

index 40d893556e6701ace6a02903e53c45822d6fa56d..b2ab411c6d3728fa7dbdebde045532a7317f5166 100644 (file)
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -249,7 +249,8 @@ void tcp_delack_timer_handler(struct sock *sk)
  
         sk_mem_reclaim_partial(sk);
  
-       if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+       if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+           !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
                 goto out;
  
         if (time_after(icsk->icsk_ack.timeout, jiffies)) {
@@ -552,7 +553,8 @@ void tcp_write_timer_handler(struct sock *sk)
         struct inet_connection_sock *icsk = inet_csk(sk);
         int event;
  
-       if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
+       if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+           !icsk->icsk_pending)
                 goto out;
  
         if (time_after(icsk->icsk_timeout, jiffies)) {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c

index b2be1d9757efb8ce8b82dc0a0fe3a475d193ea5b..781250151d40ee4559f7b90d15dccad8ffaeafd0 100644 (file)
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -29,6 +29,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
         u16 mac_len = skb->mac_len;
         int udp_offset, outer_hlen;
         __wsum partial;
+       bool need_ipsec;
  
         if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
                 goto out;
@@ -62,8 +63,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
  
         ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
  
+       need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
         /* Try to offload checksum if possible */
         offload_csum = !!(need_csum &&
+                         !need_ipsec &&
                           (skb->dev->features &
                            (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
                                       (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c

index 363172527e433e321cfa9fe8e96cfe32e4a78043..0ea96c4d334da2821a8d9c0e5e7d0d513dcb4228 100644 (file)
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3271,14 +3271,24 @@ static void addrconf_gre_config(struct net_device *dev)
  static int fixup_permanent_addr(struct inet6_dev *idev,
                                 struct inet6_ifaddr *ifp)
  {
-       if (!ifp->rt) {
-               struct rt6_info *rt;
+       /* rt6i_ref == 0 means the host route was removed from the
+        * FIB, for example, if 'lo' device is taken down. In that
+        * case regenerate the host route.
+        */
+       if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
+               struct rt6_info *rt, *prev;
  
                 rt = addrconf_dst_alloc(idev, &ifp->addr, false);
                 if (unlikely(IS_ERR(rt)))
                         return PTR_ERR(rt);
  
+               /* ifp->rt can be accessed outside of rtnl */
+               spin_lock(&ifp->lock);
+               prev = ifp->rt;
                 ifp->rt = rt;
+               spin_unlock(&ifp->lock);
+
+               ip6_rt_put(prev);
         }
  
         if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
@@ -3626,14 +3636,19 @@ restart:
         INIT_LIST_HEAD(&del_list);
         list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
                 struct rt6_info *rt = NULL;
+               bool keep;
  
                 addrconf_del_dad_work(ifa);
  
+               keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
+                       !addr_is_local(&ifa->addr);
+               if (!keep)
+                       list_move(&ifa->if_list, &del_list);
+
                 write_unlock_bh(&idev->lock);
                 spin_lock_bh(&ifa->lock);
  
-               if (keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
-                   !addr_is_local(&ifa->addr)) {
+               if (keep) {
                         /* set state to skip the notifier below */
                         state = INET6_IFADDR_STATE_DEAD;
                         ifa->state = 0;
@@ -3645,8 +3660,6 @@ restart:
                 } else {
                         state = ifa->state;
                         ifa->state = INET6_IFADDR_STATE_DEAD;
-
-                       list_move(&ifa->if_list, &del_list);
                 }
  
                 spin_unlock_bh(&ifa->lock);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c

index 04db40620ea65c1f369ef63490383e92def722ff..e82e59f22dfc0e8eabe6b8dd3e12f5c25533142b 100644 (file)
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -920,12 +920,12 @@ static int __init inet6_init(void)
         err = register_pernet_subsys(&inet6_net_ops);
         if (err)
                 goto register_pernet_fail;
-       err = icmpv6_init();
-       if (err)
-               goto icmp_fail;
         err = ip6_mr_init();
         if (err)
                 goto ipmr_fail;
+       err = icmpv6_init();
+       if (err)
+               goto icmp_fail;
         err = ndisc_init();
         if (err)
                 goto ndisc_fail;
@@ -933,8 +933,6 @@ static int __init inet6_init(void)
         if (err)
                 goto igmp_fail;
  
-       ipv6_stub = &ipv6_stub_impl;
-
         err = ipv6_netfilter_init();
         if (err)
                 goto netfilter_fail;
@@ -1010,6 +1008,10 @@ static int __init inet6_init(void)
         if (err)
                 goto sysctl_fail;
  #endif
+
+       /* ensure that ipv6 stubs are visible only after ipv6 is ready */
+       wmb();
+       ipv6_stub = &ipv6_stub_impl;
  out:
         return err;
  
@@ -1061,10 +1063,10 @@ igmp_fail:
         ndisc_cleanup();
  ndisc_fail:
         ip6_mr_cleanup();
-ipmr_fail:
-       icmpv6_cleanup();
  icmp_fail:
         unregister_pernet_subsys(&inet6_net_ops);
+ipmr_fail:
+       icmpv6_cleanup();
  register_pernet_fail:
         sock_unregister(PF_INET6);
         rtnl_unregister_all(PF_INET6);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c

index eec27f87efaca15133cf1d5225e37e6a2f6a6f8a..e011122ebd43c190aec3812099345ec852444284 100644 (file)
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -405,9 +405,6 @@ static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr)
   * At one point, excluding local errors was a quick test to identify icmp/icmp6
   * errors. This is no longer true, but the test remained, so the v6 stack,
   * unlike v4, also honors cmsg requests on all wifi and timestamp errors.
- *
- * Timestamp code paths do not initialize the fields expected by cmsg:
- * the PKTINFO fields in skb->cb[]. Fill those in here.
   */
  static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
                                       struct sock_exterr_skb *serr)
@@ -419,14 +416,9 @@ static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
         if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL)
                 return false;
  
-       if (!skb->dev)
+       if (!IP6CB(skb)->iif)
                 return false;
  
-       if (skb->protocol == htons(ETH_P_IPV6))
-               IP6CB(skb)->iif = skb->dev->ifindex;
-       else
-               PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex;
-
         return true;
  }
  
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c

index 275cac628a95066f0a27e93f5015ddeb0172c28c..d32e2110aff286cf6c911048e96fc3abf6e10779 100644 (file)
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -388,7 +388,6 @@ looped_back:
                 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
                                   ((&hdr->segments_left) -
                                    skb_network_header(skb)));
-               kfree_skb(skb);
                 return -1;
         }
  
@@ -910,6 +909,8 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
  {
         switch (opt->type) {
         case IPV6_SRCRT_TYPE_0:
+       case IPV6_SRCRT_STRICT:
+       case IPV6_SRCRT_TYPE_2:
                 ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr);
                 break;
         case IPV6_SRCRT_TYPE_4:
@@ -1164,6 +1165,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
  
         switch (opt->srcrt->type) {
         case IPV6_SRCRT_TYPE_0:
+       case IPV6_SRCRT_STRICT:
+       case IPV6_SRCRT_TYPE_2:
                 fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
                 break;
         case IPV6_SRCRT_TYPE_4:
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c

index e4266746e4a2af67562bb05dd50ace54e55d3edd..d4bf2c68a545b44873e433930e4e999920de78c9 100644 (file)
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -923,6 +923,8 @@ add:
                         ins = &rt->dst.rt6_next;
                         iter = *ins;
                         while (iter) {
+                               if (iter->rt6i_metric > rt->rt6i_metric)
+                                       break;
                                 if (rt6_qualify_for_ecmp(iter)) {
                                         *ins = iter->dst.rt6_next;
                                         fib6_purge_rt(iter, fn, info->nl_net);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c

index aacfb4bce1533b3f3b38e1173c18cb1bb6b33099..c45b12b4431cbfcaef1f8452bae871bb176be478 100644 (file)
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -122,11 +122,14 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
                         max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
         /*
          * RFC4291 2.5.3
+        * The loopback address must not be used as the source address in IPv6
+        * packets that are sent outside of a single node. [..]
          * A packet received on an interface with a destination address
          * of loopback must be dropped.
          */
-       if (!(dev->flags & IFF_LOOPBACK) &&
-           ipv6_addr_loopback(&hdr->daddr))
+       if ((ipv6_addr_loopback(&hdr->saddr) ||
+            ipv6_addr_loopback(&hdr->daddr)) &&
+            !(dev->flags & IFF_LOOPBACK))
                 goto err;
  
         /* RFC4291 Errata ID: 3480
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c

index 0838e6d01d2e4979559cae63a20ca339a3e2c22c..93e58a5e18374bee41f5a17f0c5911e381acb142 100644 (file)
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -294,8 +294,10 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
         struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
         int err = -ENOSYS;
  
-       if (skb->encapsulation)
+       if (skb->encapsulation) {
+               skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
                 skb_set_inner_network_header(skb, nhoff);
+       }
  
         iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
  
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c

index 528b3c1f3fdee4314e1c23007ae76333b4af7505..58f6288e9ba53e6964b74d71dde7615ead695c06 100644 (file)
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -768,13 +768,14 @@ slow_path:
          *      Fragment the datagram.
          */
  
-       *prevhdr = NEXTHDR_FRAGMENT;
         troom = rt->dst.dev->needed_tailroom;
  
         /*
          *      Keep copying data until we run out.
          */
         while (left > 0)        {
+               u8 *fragnexthdr_offset;
+
                 len = left;
                 /* IF: it doesn't fit, use 'mtu' - the data space left */
                 if (len > mtu)
@@ -819,6 +820,10 @@ slow_path:
                  */
                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
  
+               fragnexthdr_offset = skb_network_header(frag);
+               fragnexthdr_offset += prevhdr - skb_network_header(skb);
+               *fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
                 /*
                  *      Build fragment header.
                  */
@@ -1385,7 +1390,7 @@ emsgsize:
         if ((((length + fragheaderlen) > mtu) ||
              (skb && skb_is_gso(skb))) &&
             (sk->sk_protocol == IPPROTO_UDP) &&
-           (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+           (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
             (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
                 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
                                           hh_len, fragheaderlen, exthdrlen,
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c

index 75fac933c209a0f430279dea10b5dd2426a7ed31..a9692ec0cd6d0ba9fecba143d16191e8df0d9572 100644 (file)
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1037,7 +1037,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
         struct ip6_tnl *t = netdev_priv(dev);
         struct net *net = t->net;
         struct net_device_stats *stats = &t->dev->stats;
-       struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+       struct ipv6hdr *ipv6h;
         struct ipv6_tel_txoption opt;
         struct dst_entry *dst = NULL, *ndst = NULL;
         struct net_device *tdev;
@@ -1057,26 +1057,28 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
  
         /* NBMA tunnel */
         if (ipv6_addr_any(&t->parms.raddr)) {
-               struct in6_addr *addr6;
-               struct neighbour *neigh;
-               int addr_type;
+               if (skb->protocol == htons(ETH_P_IPV6)) {
+                       struct in6_addr *addr6;
+                       struct neighbour *neigh;
+                       int addr_type;
  
-               if (!skb_dst(skb))
-                       goto tx_err_link_failure;
+                       if (!skb_dst(skb))
+                               goto tx_err_link_failure;
  
-               neigh = dst_neigh_lookup(skb_dst(skb),
-                                        &ipv6_hdr(skb)->daddr);
-               if (!neigh)
-                       goto tx_err_link_failure;
+                       neigh = dst_neigh_lookup(skb_dst(skb),
+                                                &ipv6_hdr(skb)->daddr);
+                       if (!neigh)
+                               goto tx_err_link_failure;
  
-               addr6 = (struct in6_addr *)&neigh->primary_key;
-               addr_type = ipv6_addr_type(addr6);
+                       addr6 = (struct in6_addr *)&neigh->primary_key;
+                       addr_type = ipv6_addr_type(addr6);
  
-               if (addr_type == IPV6_ADDR_ANY)
-                       addr6 = &ipv6_hdr(skb)->daddr;
+                       if (addr_type == IPV6_ADDR_ANY)
+                               addr6 = &ipv6_hdr(skb)->daddr;
  
-               memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
-               neigh_release(neigh);
+                       memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
+                       neigh_release(neigh);
+               }
         } else if (!(t->parms.flags &
                      (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
                 /* enable the cache only only if the routing decision does
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c

index 644ba59fbd9d5ed8d6ba4a8082dd327589c9bb68..3d8a3b63b4fdbec7d488194e21e0c9013f0ff6da 100644 (file)
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -485,11 +485,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
         if (!skb->ignore_df && skb->len > mtu) {
                 skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
  
-               if (skb->protocol == htons(ETH_P_IPV6))
+               if (skb->protocol == htons(ETH_P_IPV6)) {
+                       if (mtu < IPV6_MIN_MTU)
+                               mtu = IPV6_MIN_MTU;
+
                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-               else
+               } else {
                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
                                   htonl(mtu));
+               }
  
                 return -EMSGSIZE;
         }
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c

index 6ba6c900ebcf430cf313a2bef55ff69c114af218..bf34d0950752ba1466fa806fd4f8ce0b802b0087 100644 (file)
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -774,7 +774,8 @@ failure:
   *     Delete a VIF entry
   */
  
-static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
+static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+                      struct list_head *head)
  {
         struct mif_device *v;
         struct net_device *dev;
@@ -820,7 +821,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
                                              dev->ifindex, &in6_dev->cnf);
         }
  
-       if (v->flags & MIFF_REGISTER)
+       if ((v->flags & MIFF_REGISTER) && !notify)
                 unregister_netdevice_queue(dev, head);
  
         dev_put(dev);
@@ -1331,7 +1332,6 @@ static int ip6mr_device_event(struct notifier_block *this,
         struct mr6_table *mrt;
         struct mif_device *v;
         int ct;
-       LIST_HEAD(list);
  
         if (event != NETDEV_UNREGISTER)
                 return NOTIFY_DONE;
@@ -1340,10 +1340,9 @@ static int ip6mr_device_event(struct notifier_block *this,
                 v = &mrt->vif6_table[0];
                 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
                         if (v->dev == dev)
-                               mif6_delete(mrt, ct, &list);
+                               mif6_delete(mrt, ct, 1, NULL);
                 }
         }
-       unregister_netdevice_many(&list);
  
         return NOTIFY_DONE;
  }
@@ -1552,7 +1551,7 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all)
         for (i = 0; i < mrt->maxvif; i++) {
                 if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
                         continue;
-               mif6_delete(mrt, i, &list);
+               mif6_delete(mrt, i, 0, &list);
         }
         unregister_netdevice_many(&list);
  
@@ -1707,7 +1706,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
                         return -EFAULT;
                 rtnl_lock();
-               ret = mif6_delete(mrt, mifi, NULL);
+               ret = mif6_delete(mrt, mifi, 0, NULL);
                 rtnl_unlock();
                 return ret;
  
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c

index 7ebac630d3c603186be2fc0dcbaac7d7e74bfde6..cb1766724a4ca12ec9ccbc452c776261477c99f1 100644 (file)
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1749,7 +1749,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
                 idev = in6_dev_get(dev);
                 if (!idev)
                         break;
-               if (idev->cnf.ndisc_notify)
+               if (idev->cnf.ndisc_notify ||
+                   net->ipv6.devconf_all->ndisc_notify)
                         ndisc_send_unsol_na(dev);
                 in6_dev_put(idev);
                 break;
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c

index 6c5b5b1830a74f52e2dbc4f4f2bb8127a0ba2efb..4146536e9c1517fc5e2e0ad066a8e87154446dda 100644 (file)
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -27,10 +27,10 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
         memset(&range, 0, sizeof(range));
         range.flags = priv->flags;
         if (priv->sreg_proto_min) {
-               range.min_proto.all =
-                       *(__be16 *)&regs->data[priv->sreg_proto_min];
-               range.max_proto.all =
-                       *(__be16 *)&regs->data[priv->sreg_proto_max];
+               range.min_proto.all = (__force __be16)nft_reg_load16(
+                       &regs->data[priv->sreg_proto_min]);
+               range.max_proto.all = (__force __be16)nft_reg_load16(
+                       &regs->data[priv->sreg_proto_max]);
         }
         regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
                                                     nft_out(pkt));
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c

index f5ac080fc0849b0f65751458432cf4e693353c8a..a27e424f690d699fafc5f2a7135637f36fb66388 100644 (file)
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -26,10 +26,10 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
  
         memset(&range, 0, sizeof(range));
         if (priv->sreg_proto_min) {
-               range.min_proto.all =
-                       *(__be16 *)&regs->data[priv->sreg_proto_min],
-               range.max_proto.all =
-                       *(__be16 *)&regs->data[priv->sreg_proto_max],
+               range.min_proto.all = (__force __be16)nft_reg_load16(
+                       &regs->data[priv->sreg_proto_min]);
+               range.max_proto.all = (__force __be16)nft_reg_load16(
+                       &regs->data[priv->sreg_proto_max]);
                 range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
         }
  
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c

index f174e76e6505d4045e940c9fceef765d2aaa937d..0da6a12b5472e322d679572c7244e5c9bc467741 100644 (file)
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1178,8 +1178,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
                 spin_lock_bh(&sk->sk_receive_queue.lock);
                 skb = skb_peek(&sk->sk_receive_queue);
                 if (skb)
-                       amount = skb_tail_pointer(skb) -
-                               skb_transport_header(skb);
+                       amount = skb->len;
                 spin_unlock_bh(&sk->sk_receive_queue.lock);
                 return put_user(amount, (int __user *)arg);
         }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c

index 229bfcc451ef5004e9e9d14c071937c1b9658711..fb174b590fd3b443a7503207d822dd02e7171290 100644 (file)
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1854,6 +1854,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
         int addr_type;
         int err = -EINVAL;
  
+       /* RTF_PCPU is an internal flag; can not be set by userspace */
+       if (cfg->fc_flags & RTF_PCPU)
+               goto out;
+
         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
                 goto out;
  #ifndef CONFIG_IPV6_SUBTREES
@@ -3299,7 +3303,6 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
                 nexthop_len = nla_total_size(0)  /* RTA_MULTIPATH */
                             + NLA_ALIGN(sizeof(struct rtnexthop))
                             + nla_total_size(16) /* RTA_GATEWAY */
-                           + nla_total_size(4)  /* RTA_OIF */
                             + lwtunnel_get_encap_size(rt->dst.lwtstate);
  
                 nexthop_len *= rt->rt6i_nsiblings;
@@ -3323,7 +3326,7 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
  }
  
  static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
-                           unsigned int *flags)
+                           unsigned int *flags, bool skip_oif)
  {
         if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
                 *flags |= RTNH_F_LINKDOWN;
@@ -3336,7 +3339,8 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
                         goto nla_put_failure;
         }
  
-       if (rt->dst.dev &&
+       /* not needed for multipath encoding b/c it has a rtnexthop struct */
+       if (!skip_oif && rt->dst.dev &&
             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
                 goto nla_put_failure;
  
@@ -3350,6 +3354,7 @@ nla_put_failure:
         return -EMSGSIZE;
  }
  
+/* add multipath next hop */
  static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
  {
         struct rtnexthop *rtnh;
@@ -3362,7 +3367,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
         rtnh->rtnh_hops = 0;
         rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
  
-       if (rt6_nexthop_info(skb, rt, &flags) < 0)
+       if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
                 goto nla_put_failure;
  
         rtnh->rtnh_flags = flags;
@@ -3422,6 +3427,8 @@ static int rt6_fill_node(struct net *net,
         }
         else if (rt->rt6i_flags & RTF_LOCAL)
                 rtm->rtm_type = RTN_LOCAL;
+       else if (rt->rt6i_flags & RTF_ANYCAST)
+               rtm->rtm_type = RTN_ANYCAST;
         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
                 rtm->rtm_type = RTN_LOCAL;
         else
@@ -3515,7 +3522,7 @@ static int rt6_fill_node(struct net *net,
  
                 nla_nest_end(skb, mp);
         } else {
-               if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags) < 0)
+               if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
                         goto nla_put_failure;
         }
  
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c

index a855eb325b030a666fe92c56a2d432c77d9dfe7a..5f44ffed25768d83c31b31295474c5ecf623e986 100644 (file)
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -53,6 +53,9 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
                 struct sr6_tlv *tlv;
                 unsigned int tlv_len;
  
+               if (trailing < sizeof(*tlv))
+                       return false;
+
                 tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset);
                 tlv_len = sizeof(*tlv) + tlv->len;
  
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

index 60a5295a7de6e877f5ab80ef32314c573c289d81..49fa2e8c3fa9212eef1198a1077a6726f0f1b6fc 100644 (file)
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -391,10 +391,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
         np = inet6_sk(sk);
  
         if (type == NDISC_REDIRECT) {
-               struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+               if (!sock_owned_by_user(sk)) {
+                       struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
  
-               if (dst)
-                       dst->ops->redirect(dst, sk, skb);
+                       if (dst)
+                               dst->ops->redirect(dst, sk, skb);
+               }
                 goto out;
         }
  
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c

index 4e4c401e3bc69020deaa4af1c10633288faedf13..e28082f0a307eb68ac13987580d8d9f65358212f 100644 (file)
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1035,6 +1035,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
         ipc6.hlimit = -1;
         ipc6.tclass = -1;
         ipc6.dontfrag = -1;
+       sockc.tsflags = sk->sk_tsflags;
  
         /* destination address check */
         if (sin6) {
@@ -1159,7 +1160,6 @@ do_udp_sendmsg:
  
         fl6.flowi6_mark = sk->sk_mark;
         fl6.flowi6_uid = sk->sk_uid;
-       sockc.tsflags = sk->sk_tsflags;
  
         if (msg->msg_controllen) {
                 opt = &opt_space;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c

index 81adc29a448dc5be56b96ddd5c42321417371d37..8d77ad5cadaff3aa1feb18f168e779c5a6e7f917 100644 (file)
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -828,7 +828,8 @@ out:
   *    Wait for incoming connection
   *
   */
-static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
+static int irda_accept(struct socket *sock, struct socket *newsock, int flags,
+                      bool kern)
  {
         struct sock *sk = sock->sk;
         struct irda_sock *new, *self = irda_sk(sk);
@@ -836,7 +837,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
         struct sk_buff *skb = NULL;
         int err;
  
-       err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
+       err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern);
         if (err)
                 return err;
  
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c

index 89bbde1081ce5eb56c0c6a1c7c18b030f3de1198..84de7b6326dcdf7fcf0d8cb73f738d9c21c2f9fe 100644 (file)
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -938,7 +938,7 @@ done:
  
  /* Accept a pending connection */
  static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
-                           int flags)
+                           int flags, bool kern)
  {
         DECLARE_WAITQUEUE(wait, current);
         struct sock *sk = sock->sk, *nsk;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c

index 309062f3debe298c1cf7666f77505f8d353d76d8..31762f76cdb5f2a3ec322135068402be532218ed 100644 (file)
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1687,7 +1687,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
                 struct kcm_attach info;
  
                 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
-                       err = -EFAULT;
+                       return -EFAULT;
  
                 err = kcm_attach_ioctl(sock, &info);
  
@@ -1697,7 +1697,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
                 struct kcm_unattach info;
  
                 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
-                       err = -EFAULT;
+                       return -EFAULT;
  
                 err = kcm_unattach_ioctl(sock, &info);
  
@@ -1708,7 +1708,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
                 struct socket *newsock = NULL;
  
                 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
-                       err = -EFAULT;
+                       return -EFAULT;
  
                 err = kcm_clone(sock, &info, &newsock);
  
diff --git a/net/key/af_key.c b/net/key/af_key.c

index c6252ed42c1de65dee149d7d869b62b96616e22a..be8cecc6500214de68cc8872b48b38c840d3304f 100644 (file)
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -63,8 +63,13 @@ struct pfkey_sock {
                 } u;
                 struct sk_buff  *skb;
         } dump;
+       struct mutex dump_lock;
  };
  
+static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len,
+                              xfrm_address_t *saddr, xfrm_address_t *daddr,
+                              u16 *family);
+
  static inline struct pfkey_sock *pfkey_sk(struct sock *sk)
  {
         return (struct pfkey_sock *)sk;
@@ -139,6 +144,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
  {
         struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
         struct sock *sk;
+       struct pfkey_sock *pfk;
         int err;
  
         if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -153,6 +159,9 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
         if (sk == NULL)
                 goto out;
  
+       pfk = pfkey_sk(sk);
+       mutex_init(&pfk->dump_lock);
+
         sock->ops = &pfkey_ops;
         sock_init_data(sock, sk);
  
@@ -281,13 +290,23 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
         struct sadb_msg *hdr;
         int rc;
  
+       mutex_lock(&pfk->dump_lock);
+       if (!pfk->dump.dump) {
+               rc = 0;
+               goto out;
+       }
+
         rc = pfk->dump.dump(pfk);
-       if (rc == -ENOBUFS)
-               return 0;
+       if (rc == -ENOBUFS) {
+               rc = 0;
+               goto out;
+       }
  
         if (pfk->dump.skb) {
-               if (!pfkey_can_dump(&pfk->sk))
-                       return 0;
+               if (!pfkey_can_dump(&pfk->sk)) {
+                       rc = 0;
+                       goto out;
+               }
  
                 hdr = (struct sadb_msg *) pfk->dump.skb->data;
                 hdr->sadb_msg_seq = 0;
@@ -298,6 +317,9 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
         }
  
         pfkey_terminate_dump(pfk);
+
+out:
+       mutex_unlock(&pfk->dump_lock);
         return rc;
  }
  
@@ -1793,19 +1815,26 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
         struct xfrm_address_filter *filter = NULL;
         struct pfkey_sock *pfk = pfkey_sk(sk);
  
-       if (pfk->dump.dump != NULL)
+       mutex_lock(&pfk->dump_lock);
+       if (pfk->dump.dump != NULL) {
+               mutex_unlock(&pfk->dump_lock);
                 return -EBUSY;
+       }
  
         proto = pfkey_satype2proto(hdr->sadb_msg_satype);
-       if (proto == 0)
+       if (proto == 0) {
+               mutex_unlock(&pfk->dump_lock);
                 return -EINVAL;
+       }
  
         if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
                 struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
  
                 filter = kmalloc(sizeof(*filter), GFP_KERNEL);
-               if (filter == NULL)
+               if (filter == NULL) {
+                       mutex_unlock(&pfk->dump_lock);
                         return -ENOMEM;
+               }
  
                 memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr,
                        sizeof(xfrm_address_t));
@@ -1821,6 +1850,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
         pfk->dump.dump = pfkey_dump_sa;
         pfk->dump.done = pfkey_dump_sa_done;
         xfrm_state_walk_init(&pfk->dump.u.state, proto, filter);
+       mutex_unlock(&pfk->dump_lock);
  
         return pfkey_do_dump(pfk);
  }
@@ -1913,19 +1943,14 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
  
         /* addresses present only in tunnel mode */
         if (t->mode == XFRM_MODE_TUNNEL) {
-               u8 *sa = (u8 *) (rq + 1);
-               int family, socklen;
+               int err;
  
-               family = pfkey_sockaddr_extract((struct sockaddr *)sa,
-                                               &t->saddr);
-               if (!family)
-                       return -EINVAL;
-
-               socklen = pfkey_sockaddr_len(family);
-               if (pfkey_sockaddr_extract((struct sockaddr *)(sa + socklen),
-                                          &t->id.daddr) != family)
-                       return -EINVAL;
-               t->encap_family = family;
+               err = parse_sockaddr_pair(
+                       (struct sockaddr *)(rq + 1),
+                       rq->sadb_x_ipsecrequest_len - sizeof(*rq),
+                       &t->saddr, &t->id.daddr, &t->encap_family);
+               if (err)
+                       return err;
         } else
                 t->encap_family = xp->family;
  
@@ -1945,7 +1970,11 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
         if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy))
                 return -EINVAL;
  
-       while (len >= sizeof(struct sadb_x_ipsecrequest)) {
+       while (len >= sizeof(*rq)) {
+               if (len < rq->sadb_x_ipsecrequest_len ||
+                   rq->sadb_x_ipsecrequest_len < sizeof(*rq))
+                       return -EINVAL;
+
                 if ((err = parse_ipsecrequest(xp, rq)) < 0)
                         return err;
                 len -= rq->sadb_x_ipsecrequest_len;
@@ -2408,7 +2437,6 @@ out:
         return err;
  }
  
-#ifdef CONFIG_NET_KEY_MIGRATE
  static int pfkey_sockaddr_pair_size(sa_family_t family)
  {
         return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2);
@@ -2420,7 +2448,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len,
  {
         int af, socklen;
  
-       if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family))
+       if (ext_len < 2 || ext_len < pfkey_sockaddr_pair_size(sa->sa_family))
                 return -EINVAL;
  
         af = pfkey_sockaddr_extract(sa, saddr);
@@ -2436,6 +2464,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len,
         return 0;
  }
  
+#ifdef CONFIG_NET_KEY_MIGRATE
  static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
                                     struct xfrm_migrate *m)
  {
@@ -2443,13 +2472,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
         struct sadb_x_ipsecrequest *rq2;
         int mode;
  
-       if (len <= sizeof(struct sadb_x_ipsecrequest) ||
-           len < rq1->sadb_x_ipsecrequest_len)
+       if (len < sizeof(*rq1) ||
+           len < rq1->sadb_x_ipsecrequest_len ||
+           rq1->sadb_x_ipsecrequest_len < sizeof(*rq1))
                 return -EINVAL;
  
         /* old endoints */
         err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1),
-                                 rq1->sadb_x_ipsecrequest_len,
+                                 rq1->sadb_x_ipsecrequest_len - sizeof(*rq1),
                                   &m->old_saddr, &m->old_daddr,
                                   &m->old_family);
         if (err)
@@ -2458,13 +2488,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
         rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len);
         len -= rq1->sadb_x_ipsecrequest_len;
  
-       if (len <= sizeof(struct sadb_x_ipsecrequest) ||
-           len < rq2->sadb_x_ipsecrequest_len)
+       if (len <= sizeof(*rq2) ||
+           len < rq2->sadb_x_ipsecrequest_len ||
+           rq2->sadb_x_ipsecrequest_len < sizeof(*rq2))
                 return -EINVAL;
  
         /* new endpoints */
         err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1),
-                                 rq2->sadb_x_ipsecrequest_len,
+                                 rq2->sadb_x_ipsecrequest_len - sizeof(*rq2),
                                   &m->new_saddr, &m->new_daddr,
                                   &m->new_family);
         if (err)
@@ -2679,14 +2710,18 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb
  {
         struct pfkey_sock *pfk = pfkey_sk(sk);
  
-       if (pfk->dump.dump != NULL)
+       mutex_lock(&pfk->dump_lock);
+       if (pfk->dump.dump != NULL) {
+               mutex_unlock(&pfk->dump_lock);
                 return -EBUSY;
+       }
  
         pfk->dump.msg_version = hdr->sadb_msg_version;
         pfk->dump.msg_portid = hdr->sadb_msg_pid;
         pfk->dump.dump = pfkey_dump_sp;
         pfk->dump.done = pfkey_dump_sp_done;
         xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN);
+       mutex_unlock(&pfk->dump_lock);
  
         return pfkey_do_dump(pfk);
  }
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c

index 8adab6335ced9f1018318094be20c132a70f8475..e37d9554da7b47df0571c41478e6e758b8a8e6f9 100644 (file)
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -278,7 +278,57 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn
  }
  EXPORT_SYMBOL_GPL(l2tp_session_find);
  
-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
+/* Like l2tp_session_find() but takes a reference on the returned session.
+ * Optionally calls session->ref() too if do_ref is true.
+ */
+struct l2tp_session *l2tp_session_get(struct net *net,
+                                     struct l2tp_tunnel *tunnel,
+                                     u32 session_id, bool do_ref)
+{
+       struct hlist_head *session_list;
+       struct l2tp_session *session;
+
+       if (!tunnel) {
+               struct l2tp_net *pn = l2tp_pernet(net);
+
+               session_list = l2tp_session_id_hash_2(pn, session_id);
+
+               rcu_read_lock_bh();
+               hlist_for_each_entry_rcu(session, session_list, global_hlist) {
+                       if (session->session_id == session_id) {
+                               l2tp_session_inc_refcount(session);
+                               if (do_ref && session->ref)
+                                       session->ref(session);
+                               rcu_read_unlock_bh();
+
+                               return session;
+                       }
+               }
+               rcu_read_unlock_bh();
+
+               return NULL;
+       }
+
+       session_list = l2tp_session_id_hash(tunnel, session_id);
+       read_lock_bh(&tunnel->hlist_lock);
+       hlist_for_each_entry(session, session_list, hlist) {
+               if (session->session_id == session_id) {
+                       l2tp_session_inc_refcount(session);
+                       if (do_ref && session->ref)
+                               session->ref(session);
+                       read_unlock_bh(&tunnel->hlist_lock);
+
+                       return session;
+               }
+       }
+       read_unlock_bh(&tunnel->hlist_lock);
+
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_get);
+
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
+                                         bool do_ref)
  {
         int hash;
         struct l2tp_session *session;
@@ -288,6 +338,9 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
         for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
                 hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
                         if (++count > nth) {
+                               l2tp_session_inc_refcount(session);
+                               if (do_ref && session->ref)
+                                       session->ref(session);
                                 read_unlock_bh(&tunnel->hlist_lock);
                                 return session;
                         }
@@ -298,12 +351,13 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
  
         return NULL;
  }
-EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
+EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
  
  /* Lookup a session by interface name.
   * This is very inefficient but is only used by management interfaces.
   */
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
+struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+                                               bool do_ref)
  {
         struct l2tp_net *pn = l2tp_pernet(net);
         int hash;
@@ -313,7 +367,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
         for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
                 hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
                         if (!strcmp(session->ifname, ifname)) {
+                               l2tp_session_inc_refcount(session);
+                               if (do_ref && session->ref)
+                                       session->ref(session);
                                 rcu_read_unlock_bh();
+
                                 return session;
                         }
                 }
@@ -323,7 +381,49 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
  
         return NULL;
  }
-EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
+EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
+
+static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
+                                     struct l2tp_session *session)
+{
+       struct l2tp_session *session_walk;
+       struct hlist_head *g_head;
+       struct hlist_head *head;
+       struct l2tp_net *pn;
+
+       head = l2tp_session_id_hash(tunnel, session->session_id);
+
+       write_lock_bh(&tunnel->hlist_lock);
+       hlist_for_each_entry(session_walk, head, hlist)
+               if (session_walk->session_id == session->session_id)
+                       goto exist;
+
+       if (tunnel->version == L2TP_HDR_VER_3) {
+               pn = l2tp_pernet(tunnel->l2tp_net);
+               g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net),
+                                               session->session_id);
+
+               spin_lock_bh(&pn->l2tp_session_hlist_lock);
+               hlist_for_each_entry(session_walk, g_head, global_hlist)
+                       if (session_walk->session_id == session->session_id)
+                               goto exist_glob;
+
+               hlist_add_head_rcu(&session->global_hlist, g_head);
+               spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+       }
+
+       hlist_add_head(&session->hlist, head);
+       write_unlock_bh(&tunnel->hlist_lock);
+
+       return 0;
+
+exist_glob:
+       spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+exist:
+       write_unlock_bh(&tunnel->hlist_lock);
+
+       return -EEXIST;
+}
  
  /* Lookup a tunnel by id
   */
@@ -633,6 +733,9 @@ discard:
   * a data (not control) frame before coming here. Fields up to the
   * session-id have already been parsed and ptr points to the data
   * after the session-id.
+ *
+ * session->ref() must have been called prior to l2tp_recv_common().
+ * session->deref() will be called automatically after skb is processed.
   */
  void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
                       unsigned char *ptr, unsigned char *optr, u16 hdrflags,
@@ -642,14 +745,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
         int offset;
         u32 ns, nr;
  
-       /* The ref count is increased since we now hold a pointer to
-        * the session. Take care to decrement the refcnt when exiting
-        * this function from now on...
-        */
-       l2tp_session_inc_refcount(session);
-       if (session->ref)
-               (*session->ref)(session);
-
         /* Parse and check optional cookie */
         if (session->peer_cookie_len > 0) {
                 if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
@@ -802,8 +897,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
         /* Try to dequeue as many skbs from reorder_q as we can. */
         l2tp_recv_dequeue(session);
  
-       l2tp_session_dec_refcount(session);
-
         return;
  
  discard:
@@ -812,8 +905,6 @@ discard:
  
         if (session->deref)
                 (*session->deref)(session);
-
-       l2tp_session_dec_refcount(session);
  }
  EXPORT_SYMBOL(l2tp_recv_common);
  
@@ -920,8 +1011,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
         }
  
         /* Find the session context */
-       session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
+       session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true);
         if (!session || !session->recv_skb) {
+               if (session) {
+                       if (session->deref)
+                               session->deref(session);
+                       l2tp_session_dec_refcount(session);
+               }
+
                 /* Not found? Pass to userspace to deal with */
                 l2tp_info(tunnel, L2TP_MSG_DATA,
                           "%s: no session found (%u/%u). Passing up.\n",
@@ -930,6 +1027,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
         }
  
         l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
+       l2tp_session_dec_refcount(session);
  
         return 0;
  
@@ -1738,6 +1836,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
  struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
  {
         struct l2tp_session *session;
+       int err;
  
         session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
         if (session != NULL) {
@@ -1793,6 +1892,13 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
  
                 l2tp_session_set_header_len(session, tunnel->version);
  
+               err = l2tp_session_add_to_tunnel(tunnel, session);
+               if (err) {
+                       kfree(session);
+
+                       return ERR_PTR(err);
+               }
+
                 /* Bump the reference count. The session context is deleted
                  * only when this drops to zero.
                  */
@@ -1802,28 +1908,14 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
                 /* Ensure tunnel socket isn't deleted */
                 sock_hold(tunnel->sock);
  
-               /* Add session to the tunnel's hash list */
-               write_lock_bh(&tunnel->hlist_lock);
-               hlist_add_head(&session->hlist,
-                              l2tp_session_id_hash(tunnel, session_id));
-               write_unlock_bh(&tunnel->hlist_lock);
-
-               /* And to the global session list if L2TPv3 */
-               if (tunnel->version != L2TP_HDR_VER_2) {
-                       struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
-                       spin_lock_bh(&pn->l2tp_session_hlist_lock);
-                       hlist_add_head_rcu(&session->global_hlist,
-                                          l2tp_session_id_hash_2(pn, session_id));
-                       spin_unlock_bh(&pn->l2tp_session_hlist_lock);
-               }
-
                 /* Ignore management session in session count value */
                 if (session->session_id != 0)
                         atomic_inc(&l2tp_session_count);
+
+               return session;
         }
  
-       return session;
+       return ERR_PTR(-ENOMEM);
  }
  EXPORT_SYMBOL_GPL(l2tp_session_create);
  
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h

index aebf281d09eeb31c531eb624bd2ddd78cab8da9b..8ce7818c7a9d0578b79e70eff1916d3248e7604a 100644 (file)
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -230,11 +230,16 @@ out:
         return tunnel;
  }
  
+struct l2tp_session *l2tp_session_get(struct net *net,
+                                     struct l2tp_tunnel *tunnel,
+                                     u32 session_id, bool do_ref);
  struct l2tp_session *l2tp_session_find(struct net *net,
                                        struct l2tp_tunnel *tunnel,
                                        u32 session_id);
-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
+                                         bool do_ref);
+struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+                                               bool do_ref);
  struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
  struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
  
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c

index 2d6760a2ae347b96d465e30192ab8a7957258d32..d100aed3d06fb63b8851a00c55350f1728b18599 100644 (file)
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
  
  static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
  {
-       pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+       pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
         pd->session_idx++;
  
         if (pd->session == NULL) {
@@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
         }
  
         /* Show the tunnel or session context */
-       if (pd->session == NULL)
+       if (!pd->session) {
                 l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
-       else
+       } else {
                 l2tp_dfs_seq_session_show(m, pd->session);
+               if (pd->session->deref)
+                       pd->session->deref(pd->session);
+               l2tp_session_dec_refcount(pd->session);
+       }
  
  out:
         return 0;
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c

index 8bf18a5f66e0c465ef3640ae4168c875c4c9e1ed..6fd41d7afe1ef27592970de333c75928264dc275 100644 (file)
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -221,12 +221,6 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
                 goto out;
         }
  
-       session = l2tp_session_find(net, tunnel, session_id);
-       if (session) {
-               rc = -EEXIST;
-               goto out;
-       }
-
         if (cfg->ifname) {
                 dev = dev_get_by_name(net, cfg->ifname);
                 if (dev) {
@@ -240,8 +234,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
  
         session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
                                       peer_session_id, cfg);
-       if (!session) {
-               rc = -ENOMEM;
+       if (IS_ERR(session)) {
+               rc = PTR_ERR(session);
                 goto out;
         }
  
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c

index d25038cfd64e1ae5d5819fe1e7049529f4b5a2e4..4d322c1b7233e5b546ff75a585a3603503e076bc 100644 (file)
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -143,19 +143,19 @@ static int l2tp_ip_recv(struct sk_buff *skb)
         }
  
         /* Ok, this is a data packet. Lookup the session. */
-       session = l2tp_session_find(net, NULL, session_id);
-       if (session == NULL)
+       session = l2tp_session_get(net, NULL, session_id, true);
+       if (!session)
                 goto discard;
  
         tunnel = session->tunnel;
-       if (tunnel == NULL)
-               goto discard;
+       if (!tunnel)
+               goto discard_sess;
  
         /* Trace packet contents, if enabled */
         if (tunnel->debug & L2TP_MSG_DATA) {
                 length = min(32u, skb->len);
                 if (!pskb_may_pull(skb, length))
-                       goto discard;
+                       goto discard_sess;
  
                 /* Point to L2TP header */
                 optr = ptr = skb->data;
@@ -165,6 +165,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
         }
  
         l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+       l2tp_session_dec_refcount(session);
  
         return 0;
  
@@ -178,9 +179,10 @@ pass_up:
  
         tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
         tunnel = l2tp_tunnel_find(net, tunnel_id);
-       if (tunnel != NULL)
+       if (tunnel) {
                 sk = tunnel->sock;
-       else {
+               sock_hold(sk);
+       } else {
                 struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
  
                 read_lock_bh(&l2tp_ip_lock);
@@ -202,6 +204,12 @@ pass_up:
  
         return sk_receive_skb(sk, skb, 1);
  
+discard_sess:
+       if (session->deref)
+               session->deref(session);
+       l2tp_session_dec_refcount(session);
+       goto discard;
+
  discard_put:
         sock_put(sk);
  
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c

index a4abcbc4c09ae65424a701a1200b7535fa3635ac..88b397c30d86af8d6a22daeb466cedac36aac57e 100644 (file)
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -156,19 +156,19 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
         }
  
         /* Ok, this is a data packet. Lookup the session. */
-       session = l2tp_session_find(net, NULL, session_id);
-       if (session == NULL)
+       session = l2tp_session_get(net, NULL, session_id, true);
+       if (!session)
                 goto discard;
  
         tunnel = session->tunnel;
-       if (tunnel == NULL)
-               goto discard;
+       if (!tunnel)
+               goto discard_sess;
  
         /* Trace packet contents, if enabled */
         if (tunnel->debug & L2TP_MSG_DATA) {
                 length = min(32u, skb->len);
                 if (!pskb_may_pull(skb, length))
-                       goto discard;
+                       goto discard_sess;
  
                 /* Point to L2TP header */
                 optr = ptr = skb->data;
@@ -179,6 +179,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
  
         l2tp_recv_common(session, skb, ptr, optr, 0, skb->len,
                          tunnel->recv_payload_hook);
+       l2tp_session_dec_refcount(session);
+
         return 0;
  
  pass_up:
@@ -191,9 +193,10 @@ pass_up:
  
         tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
         tunnel = l2tp_tunnel_find(net, tunnel_id);
-       if (tunnel != NULL)
+       if (tunnel) {
                 sk = tunnel->sock;
-       else {
+               sock_hold(sk);
+       } else {
                 struct ipv6hdr *iph = ipv6_hdr(skb);
  
                 read_lock_bh(&l2tp_ip6_lock);
@@ -215,6 +218,12 @@ pass_up:
  
         return sk_receive_skb(sk, skb, 1);
  
+discard_sess:
+       if (session->deref)
+               session->deref(session);
+       l2tp_session_dec_refcount(session);
+       goto discard;
+
  discard_put:
         sock_put(sk);
  
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c

index 3620fba317863dc59c93c1089faf63451e831aa5..7e3e669baac42df9d0be2864b927ba4f390258e9 100644 (file)
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -48,7 +48,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
  /* Accessed under genl lock */
  static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
  
-static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
+static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
+                                               bool do_ref)
  {
         u32 tunnel_id;
         u32 session_id;
@@ -59,14 +60,15 @@ static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
  
         if (info->attrs[L2TP_ATTR_IFNAME]) {
                 ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
-               session = l2tp_session_find_by_ifname(net, ifname);
+               session = l2tp_session_get_by_ifname(net, ifname, do_ref);
         } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
                    (info->attrs[L2TP_ATTR_CONN_ID])) {
                 tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
                 session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
                 tunnel = l2tp_tunnel_find(net, tunnel_id);
                 if (tunnel)
-                       session = l2tp_session_find(net, tunnel, session_id);
+                       session = l2tp_session_get(net, tunnel, session_id,
+                                                  do_ref);
         }
  
         return session;
@@ -642,10 +644,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
                         session_id, peer_session_id, &cfg);
  
         if (ret >= 0) {
-               session = l2tp_session_find(net, tunnel, session_id);
-               if (session)
+               session = l2tp_session_get(net, tunnel, session_id, false);
+               if (session) {
                         ret = l2tp_session_notify(&l2tp_nl_family, info, session,
                                                   L2TP_CMD_SESSION_CREATE);
+                       l2tp_session_dec_refcount(session);
+               }
         }
  
  out:
@@ -658,7 +662,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
         struct l2tp_session *session;
         u16 pw_type;
  
-       session = l2tp_nl_session_find(info);
+       session = l2tp_nl_session_get(info, true);
         if (session == NULL) {
                 ret = -ENODEV;
                 goto out;
@@ -672,6 +676,10 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
                 if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
                         ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
  
+       if (session->deref)
+               session->deref(session);
+       l2tp_session_dec_refcount(session);
+
  out:
         return ret;
  }
@@ -681,7 +689,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
         int ret = 0;
         struct l2tp_session *session;
  
-       session = l2tp_nl_session_find(info);
+       session = l2tp_nl_session_get(info, false);
         if (session == NULL) {
                 ret = -ENODEV;
                 goto out;
@@ -716,6 +724,8 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
         ret = l2tp_session_notify(&l2tp_nl_family, info,
                                   session, L2TP_CMD_SESSION_MODIFY);
  
+       l2tp_session_dec_refcount(session);
+
  out:
         return ret;
  }
@@ -811,29 +821,34 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
         struct sk_buff *msg;
         int ret;
  
-       session = l2tp_nl_session_find(info);
+       session = l2tp_nl_session_get(info, false);
         if (session == NULL) {
                 ret = -ENODEV;
-               goto out;
+               goto err;
         }
  
         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
         if (!msg) {
                 ret = -ENOMEM;
-               goto out;
+               goto err_ref;
         }
  
         ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
                                    0, session, L2TP_CMD_SESSION_GET);
         if (ret < 0)
-               goto err_out;
+               goto err_ref_msg;
  
-       return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
+       ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
  
-err_out:
-       nlmsg_free(msg);
+       l2tp_session_dec_refcount(session);
  
-out:
+       return ret;
+
+err_ref_msg:
+       nlmsg_free(msg);
+err_ref:
+       l2tp_session_dec_refcount(session);
+err:
         return ret;
  }
  
@@ -852,7 +867,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
                                 goto out;
                 }
  
-               session = l2tp_session_find_nth(tunnel, si);
+               session = l2tp_session_get_nth(tunnel, si, false);
                 if (session == NULL) {
                         ti++;
                         tunnel = NULL;
@@ -862,8 +877,11 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
  
                 if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
-                                        session, L2TP_CMD_SESSION_GET) < 0)
+                                        session, L2TP_CMD_SESSION_GET) < 0) {
+                       l2tp_session_dec_refcount(session);
                         break;
+               }
+               l2tp_session_dec_refcount(session);
  
                 si++;
         }
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c

index 36cc56fd041871c73796cc0a52241ef3e38483c9..32ea0f3d868c6459c4194732ffcc14d8a20ec8bc 100644 (file)
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -450,6 +450,10 @@ static void pppol2tp_session_close(struct l2tp_session *session)
  static void pppol2tp_session_destruct(struct sock *sk)
  {
         struct l2tp_session *session = sk->sk_user_data;
+
+       skb_queue_purge(&sk->sk_receive_queue);
+       skb_queue_purge(&sk->sk_write_queue);
+
         if (session) {
                 sk->sk_user_data = NULL;
                 BUG_ON(session->magic != L2TP_SESSION_MAGIC);
@@ -488,9 +492,6 @@ static int pppol2tp_release(struct socket *sock)
                 l2tp_session_queue_purge(session);
                 sock_put(sk);
         }
-       skb_queue_purge(&sk->sk_receive_queue);
-       skb_queue_purge(&sk->sk_write_queue);
-
         release_sock(sk);
  
         /* This will delete the session context via
@@ -582,6 +583,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
         int error = 0;
         u32 tunnel_id, peer_tunnel_id;
         u32 session_id, peer_session_id;
+       bool drop_refcnt = false;
         int ver = 2;
         int fd;
  
@@ -683,36 +685,36 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
         if (tunnel->peer_tunnel_id == 0)
                 tunnel->peer_tunnel_id = peer_tunnel_id;
  
-       /* Create session if it doesn't already exist. We handle the
-        * case where a session was previously created by the netlink
-        * interface by checking that the session doesn't already have
-        * a socket and its tunnel socket are what we expect. If any
-        * of those checks fail, return EEXIST to the caller.
-        */
-       session = l2tp_session_find(sock_net(sk), tunnel, session_id);
-       if (session == NULL) {
-               /* Default MTU must allow space for UDP/L2TP/PPP
-                * headers.
+       session = l2tp_session_get(sock_net(sk), tunnel, session_id, false);
+       if (session) {
+               drop_refcnt = true;
+               ps = l2tp_session_priv(session);
+
+               /* Using a pre-existing session is fine as long as it hasn't
+                * been connected yet.
                  */
-               cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+               if (ps->sock) {
+                       error = -EEXIST;
+                       goto end;
+               }
  
-               /* Allocate and initialize a new session context. */
-               session = l2tp_session_create(sizeof(struct pppol2tp_session),
-                                             tunnel, session_id,
-                                             peer_session_id, &cfg);
-               if (session == NULL) {
-                       error = -ENOMEM;
+               /* consistency checks */
+               if (ps->tunnel_sock != tunnel->sock) {
+                       error = -EEXIST;
                         goto end;
                 }
         } else {
-               ps = l2tp_session_priv(session);
-               error = -EEXIST;
-               if (ps->sock != NULL)
-                       goto end;
+               /* Default MTU must allow space for UDP/L2TP/PPP headers */
+               cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+               cfg.mru = cfg.mtu;
  
-               /* consistency checks */
-               if (ps->tunnel_sock != tunnel->sock)
+               session = l2tp_session_create(sizeof(struct pppol2tp_session),
+                                             tunnel, session_id,
+                                             peer_session_id, &cfg);
+               if (IS_ERR(session)) {
+                       error = PTR_ERR(session);
                         goto end;
+               }
         }
  
         /* Associate session with its PPPoL2TP socket */
@@ -777,6 +779,8 @@ out_no_ppp:
                   session->name);
  
  end:
+       if (drop_refcnt)
+               l2tp_session_dec_refcount(session);
         release_sock(sk);
  
         return error;
@@ -804,12 +808,6 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
         if (tunnel->sock == NULL)
                 goto out;
  
-       /* Check that this session doesn't already exist */
-       error = -EEXIST;
-       session = l2tp_session_find(net, tunnel, session_id);
-       if (session != NULL)
-               goto out;
-
         /* Default MTU values. */
         if (cfg->mtu == 0)
                 cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
@@ -817,12 +815,13 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
                 cfg->mru = cfg->mtu;
  
         /* Allocate and initialize a new session context. */
-       error = -ENOMEM;
         session = l2tp_session_create(sizeof(struct pppol2tp_session),
                                       tunnel, session_id,
                                       peer_session_id, cfg);
-       if (session == NULL)
+       if (IS_ERR(session)) {
+               error = PTR_ERR(session);
                 goto out;
+       }
  
         ps = l2tp_session_priv(session);
         ps->tunnel_sock = tunnel->sock;
@@ -1140,11 +1139,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
                 if (stats.session_id != 0) {
                         /* resend to session ioctl handler */
                         struct l2tp_session *session =
-                               l2tp_session_find(sock_net(sk), tunnel, stats.session_id);
-                       if (session != NULL)
-                               err = pppol2tp_session_ioctl(session, cmd, arg);
-                       else
+                               l2tp_session_get(sock_net(sk), tunnel,
+                                                stats.session_id, true);
+
+                       if (session) {
+                               err = pppol2tp_session_ioctl(session, cmd,
+                                                            arg);
+                               if (session->deref)
+                                       session->deref(session);
+                               l2tp_session_dec_refcount(session);
+                       } else {
                                 err = -EBADR;
+                       }
                         break;
                 }
  #ifdef CONFIG_XFRM
@@ -1377,8 +1383,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
         } else
                 err = pppol2tp_session_setsockopt(sk, session, optname, val);
  
-       err = 0;
-
  end_put_sess:
         sock_put(sk);
  end:
@@ -1501,8 +1505,13 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
  
                 err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
                 sock_put(ps->tunnel_sock);
-       } else
+               if (err)
+                       goto end_put_sess;
+       } else {
                 err = pppol2tp_session_getsockopt(sk, session, optname, &val);
+               if (err)
+                       goto end_put_sess;
+       }
  
         err = -EFAULT;
         if (put_user(len, optlen))
@@ -1554,7 +1563,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
  
  static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
  {
-       pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+       pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
         pd->session_idx++;
  
         if (pd->session == NULL) {
@@ -1681,10 +1690,14 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
  
         /* Show the tunnel or session context.
          */
-       if (pd->session == NULL)
+       if (!pd->session) {
                 pppol2tp_seq_tunnel_show(m, pd->tunnel);
-       else
+       } else {
                 pppol2tp_seq_session_show(m, pd->session);
+               if (pd->session->deref)
+                       pd->session->deref(pd->session);
+               l2tp_session_dec_refcount(pd->session);
+       }
  
  out:
         return 0;
@@ -1843,4 +1856,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP");
  MODULE_LICENSE("GPL");
  MODULE_VERSION(PPPOL2TP_DRV_VERSION);
  MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP);
-MODULE_ALIAS_L2TP_PWTYPE(11);
+MODULE_ALIAS_L2TP_PWTYPE(7);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c

index 06186d608a274eb46cd768610c67e8a5a8e84c15..cb4fff785cbf5aaad520442dc243ae62dc5750ea 100644 (file)
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -641,11 +641,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
   *     @sock: Socket which connections arrive on.
   *     @newsock: Socket to move incoming connection to.
   *     @flags: User specified operational flags.
+ *     @kern: If the socket is kernel internal
   *
   *     Accept a new incoming connection.
   *     Returns 0 upon success, negative otherwise.
   */
-static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags)
+static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
+                        bool kern)
  {
         struct sock *sk = sock->sk, *newsk;
         struct llc_sock *llc, *newllc;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c

index 40813dd3301c600978374e259953ca5d661022ce..5bb0c501281954dfe656c5e886c9032b958061be 100644 (file)
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -718,7 +718,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
         ieee80211_recalc_ps(local);
  
         if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
-           sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+           sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+           local->ops->wake_tx_queue) {
                 /* XXX: for AP_VLAN, actually track AP queues */
                 netif_tx_start_all_queues(dev);
         } else if (dev) {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c

index e48724a6725e3266c1d5559d268339a7d2cd7f10..4d7543d1a62cce8d70e2d6894ffb86920c80c241 100644 (file)
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -208,6 +208,51 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
         return len;
  }
  
+static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
+                                        struct sk_buff *skb,
+                                        int rtap_vendor_space)
+{
+       struct {
+               struct ieee80211_hdr_3addr hdr;
+               u8 category;
+               u8 action_code;
+       } __packed action;
+
+       if (!sdata)
+               return;
+
+       BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE + 1);
+
+       if (skb->len < rtap_vendor_space + sizeof(action) +
+                      VHT_MUMIMO_GROUPS_DATA_LEN)
+               return;
+
+       if (!is_valid_ether_addr(sdata->u.mntr.mu_follow_addr))
+               return;
+
+       skb_copy_bits(skb, rtap_vendor_space, &action, sizeof(action));
+
+       if (!ieee80211_is_action(action.hdr.frame_control))
+               return;
+
+       if (action.category != WLAN_CATEGORY_VHT)
+               return;
+
+       if (action.action_code != WLAN_VHT_ACTION_GROUPID_MGMT)
+               return;
+
+       if (!ether_addr_equal(action.hdr.addr1, sdata->u.mntr.mu_follow_addr))
+               return;
+
+       skb = skb_copy(skb, GFP_ATOMIC);
+       if (!skb)
+               return;
+
+       skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
+       skb_queue_tail(&sdata->skb_queue, skb);
+       ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+}
+
  /*
   * ieee80211_add_rx_radiotap_header - add radiotap header
   *
@@ -515,7 +560,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
         struct net_device *prev_dev = NULL;
         int present_fcs_len = 0;
         unsigned int rtap_vendor_space = 0;
-       struct ieee80211_mgmt *mgmt;
         struct ieee80211_sub_if_data *monitor_sdata =
                 rcu_dereference(local->monitor_sdata);
  
@@ -553,6 +597,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
                 return remove_monitor_info(local, origskb, rtap_vendor_space);
         }
  
+       ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_vendor_space);
+
         /* room for the radiotap header based on driver features */
         rt_hdrlen = ieee80211_rx_radiotap_hdrlen(local, status, origskb);
         needed_headroom = rt_hdrlen - rtap_vendor_space;
@@ -618,23 +664,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
                 ieee80211_rx_stats(sdata->dev, skb->len);
         }
  
-       mgmt = (void *)skb->data;
-       if (monitor_sdata &&
-           skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + VHT_MUMIMO_GROUPS_DATA_LEN &&
-           ieee80211_is_action(mgmt->frame_control) &&
-           mgmt->u.action.category == WLAN_CATEGORY_VHT &&
-           mgmt->u.action.u.vht_group_notif.action_code == WLAN_VHT_ACTION_GROUPID_MGMT &&
-           is_valid_ether_addr(monitor_sdata->u.mntr.mu_follow_addr) &&
-           ether_addr_equal(mgmt->da, monitor_sdata->u.mntr.mu_follow_addr)) {
-               struct sk_buff *mu_skb = skb_copy(skb, GFP_ATOMIC);
-
-               if (mu_skb) {
-                       mu_skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
-                       skb_queue_tail(&monitor_sdata->skb_queue, mu_skb);
-                       ieee80211_queue_work(&local->hw, &monitor_sdata->work);
-               }
-       }
-
         if (prev_dev) {
                 skb->dev = prev_dev;
                 netif_receive_skb(skb);
@@ -3610,6 +3639,27 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
                             !ether_addr_equal(bssid, hdr->addr1))
                                 return false;
                 }
+
+               /*
+                * 802.11-2016 Table 9-26 says that for data frames, A1 must be
+                * the BSSID - we've checked that already but may have accepted
+                * the wildcard (ff:ff:ff:ff:ff:ff).
+                *
+                * It also says:
+                *      The BSSID of the Data frame is determined as follows:
+                *      a) If the STA is contained within an AP or is associated
+                *         with an AP, the BSSID is the address currently in use
+                *         by the STA contained in the AP.
+                *
+                * So we should not accept data frames with an address that's
+                * multicast.
+                *
+                * Accepting it also opens a security problem because stations
+                * could encrypt it with the GTK and inject traffic that way.
+                */
+               if (ieee80211_is_data(hdr->frame_control) && multicast)
+                       return false;
+
                 return true;
         case NL80211_IFTYPE_WDS:
                 if (bssid || !ieee80211_is_data(hdr->frame_control))
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c

index 3818686182b210be11025ff69d82f58e4e08e401..6414079aa7297eee8fbd6320fb4392c6d8865e34 100644 (file)
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1269,6 +1269,8 @@ static void mpls_ifdown(struct net_device *dev, int event)
  {
         struct mpls_route __rcu **platform_label;
         struct net *net = dev_net(dev);
+       unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN;
+       unsigned int alive;
         unsigned index;
  
         platform_label = rtnl_dereference(net->mpls.platform_label);
@@ -1278,9 +1280,11 @@ static void mpls_ifdown(struct net_device *dev, int event)
                 if (!rt)
                         continue;
  
+               alive = 0;
                 change_nexthops(rt) {
                         if (rtnl_dereference(nh->nh_dev) != dev)
-                               continue;
+                               goto next;
+
                         switch (event) {
                         case NETDEV_DOWN:
                         case NETDEV_UNREGISTER:
@@ -1288,12 +1292,16 @@ static void mpls_ifdown(struct net_device *dev, int event)
                                 /* fall through */
                         case NETDEV_CHANGE:
                                 nh->nh_flags |= RTNH_F_LINKDOWN;
-                               ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
                                 break;
                         }
                         if (event == NETDEV_UNREGISTER)
                                 RCU_INIT_POINTER(nh->nh_dev, NULL);
+next:
+                       if (!(nh->nh_flags & nh_flags))
+                               alive++;
                 } endfor_nexthops(rt);
+
+               WRITE_ONCE(rt->rt_nhn_alive, alive);
         }
  }
  
@@ -2028,6 +2036,7 @@ static void mpls_net_exit(struct net *net)
         for (index = 0; index < platform_labels; index++) {
                 struct mpls_route *rt = rtnl_dereference(platform_label[index]);
                 RCU_INIT_POINTER(platform_label[index], NULL);
+               mpls_notify_route(net, index, rt, NULL, NULL);
                 mpls_rt_free(rt);
         }
         rtnl_unlock();
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index 071b97fcbefb083ded417e06e739a4622b237fe8..ffb78e5f7b70912a2bba608a7f32f0a2bc486adc 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -181,7 +181,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
  unsigned int nf_conntrack_max __read_mostly;
  seqcount_t nf_conntrack_generation __read_mostly;
  
-DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used
+ * for the nfctinfo. We cheat by (ab)using the PER CPU cache line
+ * alignment to enforce this.
+ */
+DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
  EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
  
  static unsigned int nf_conntrack_hash_rnd __read_mostly;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c

index da9df2d56e669ed33d8126a484788e600dcaaabd..22fc32143e9c4ae17bc48edc68eb0decf11d931c 100644 (file)
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net,
         BUG_ON(notify != new);
         RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
         mutex_unlock(&nf_ct_ecache_mutex);
+       /* synchronize_rcu() is called from ctnetlink_exit. */
  }
  EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
  
@@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net,
         BUG_ON(notify != new);
         RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
         mutex_unlock(&nf_ct_ecache_mutex);
+       /* synchronize_rcu() is called from ctnetlink_exit. */
  }
  EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
  
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c

index 4b2e1fb28bb438d695715fc492f52bf7809ade5d..d80073037856db9081e57b7c88e482a61b94a45f 100644 (file)
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -57,7 +57,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
         hlist_del_rcu(&exp->hnode);
         net->ct.expect_count--;
  
-       hlist_del(&exp->lnode);
+       hlist_del_rcu(&exp->lnode);
         master_help->expecting[exp->class]--;
  
         nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
@@ -363,7 +363,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
         /* two references : one for hash insert, one for the timer */
         atomic_add(2, &exp->use);
  
-       hlist_add_head(&exp->lnode, &master_help->expectations);
+       hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
         master_help->expecting[exp->class]++;
  
         hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c

index 02bcf00c24920b332401cd7c82ab6ced951659f5..008299b7f78fe3754946cf0a58029090234ad905 100644 (file)
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id,
  
         rcu_read_lock();
         t = rcu_dereference(nf_ct_ext_types[id]);
-       BUG_ON(t == NULL);
+       if (!t) {
+               rcu_read_unlock();
+               return NULL;
+       }
+
         off = ALIGN(sizeof(struct nf_ct_ext), t->align);
         len = off + t->len + var_alloc_len;
         alloc_size = t->alloc_size + var_alloc_len;
@@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
  
         rcu_read_lock();
         t = rcu_dereference(nf_ct_ext_types[id]);
-       BUG_ON(t == NULL);
+       if (!t) {
+               rcu_read_unlock();
+               return NULL;
+       }
  
         newoff = ALIGN(old->len, t->align);
         newlen = newoff + t->len + var_alloc_len;
@@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
         RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
         update_alloc_size(type);
         mutex_unlock(&nf_ct_ext_type_mutex);
-       rcu_barrier(); /* Wait for completion of call_rcu()'s */
+       synchronize_rcu();
  }
  EXPORT_SYMBOL_GPL(nf_ct_extend_unregister);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c

index 6dc44d9b41900bea12f487e5a044259e92a47f7e..4eeb3418366ad5473945395346b6e4e5fc213fbc 100644 (file)
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -158,16 +158,25 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
  {
         struct nf_conntrack_helper *h;
  
+       rcu_read_lock();
+
         h = __nf_conntrack_helper_find(name, l3num, protonum);
  #ifdef CONFIG_MODULES
         if (h == NULL) {
-               if (request_module("nfct-helper-%s", name) == 0)
+               rcu_read_unlock();
+               if (request_module("nfct-helper-%s", name) == 0) {
+                       rcu_read_lock();
                         h = __nf_conntrack_helper_find(name, l3num, protonum);
+               } else {
+                       return h;
+               }
         }
  #endif
         if (h != NULL && !try_module_get(h->me))
                 h = NULL;
  
+       rcu_read_unlock();
+
         return h;
  }
  EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
@@ -311,38 +320,36 @@ void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n)
  }
  EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister);
  
+/* Caller should hold the rcu lock */
  struct nf_ct_helper_expectfn *
  nf_ct_helper_expectfn_find_by_name(const char *name)
  {
         struct nf_ct_helper_expectfn *cur;
         bool found = false;
  
-       rcu_read_lock();
         list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) {
                 if (!strcmp(cur->name, name)) {
                         found = true;
                         break;
                 }
         }
-       rcu_read_unlock();
         return found ? cur : NULL;
  }
  EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name);
  
+/* Caller should hold the rcu lock */
  struct nf_ct_helper_expectfn *
  nf_ct_helper_expectfn_find_by_symbol(const void *symbol)
  {
         struct nf_ct_helper_expectfn *cur;
         bool found = false;
  
-       rcu_read_lock();
         list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) {
                 if (cur->expectfn == symbol) {
                         found = true;
                         break;
                 }
         }
-       rcu_read_unlock();
         return found ? cur : NULL;
  }
  EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c

index 6806b5e73567bb0220b248682abed3e5e34f780e..dc7dfd68fafe5d8db341488ac7d9ad5bf8f80b46 100644 (file)
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1488,11 +1488,16 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
                  * treat the second attempt as a no-op instead of returning
                  * an error.
                  */
-               if (help && help->helper &&
-                   !strcmp(help->helper->name, helpname))
-                       return 0;
-               else
-                       return -EBUSY;
+               err = -EBUSY;
+               if (help) {
+                       rcu_read_lock();
+                       helper = rcu_dereference(help->helper);
+                       if (helper && !strcmp(helper->name, helpname))
+                               err = 0;
+                       rcu_read_unlock();
+               }
+
+               return err;
         }
  
         if (!strcmp(helpname, "")) {
@@ -1929,9 +1934,9 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
  
                         err = 0;
                         if (test_bit(IPS_EXPECTED_BIT, &ct->status))
-                               events = IPCT_RELATED;
+                               events = 1 << IPCT_RELATED;
                         else
-                               events = IPCT_NEW;
+                               events = 1 << IPCT_NEW;
  
                         if (cda[CTA_LABELS] &&
                             ctnetlink_attach_labels(ct, cda) == 0)
@@ -2675,8 +2680,8 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
         last = (struct nf_conntrack_expect *)cb->args[1];
         for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
  restart:
-               hlist_for_each_entry(exp, &nf_ct_expect_hash[cb->args[0]],
-                                    hnode) {
+               hlist_for_each_entry_rcu(exp, &nf_ct_expect_hash[cb->args[0]],
+                                        hnode) {
                         if (l3proto && exp->tuple.src.l3num != l3proto)
                                 continue;
  
@@ -2727,7 +2732,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
         rcu_read_lock();
         last = (struct nf_conntrack_expect *)cb->args[1];
  restart:
-       hlist_for_each_entry(exp, &help->expectations, lnode) {
+       hlist_for_each_entry_rcu(exp, &help->expectations, lnode) {
                 if (l3proto && exp->tuple.src.l3num != l3proto)
                         continue;
                 if (cb->args[1]) {
@@ -2789,6 +2794,12 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
                 return -ENOENT;
  
         ct = nf_ct_tuplehash_to_ctrack(h);
+       /* No expectation linked to this connection tracking. */
+       if (!nfct_help(ct)) {
+               nf_ct_put(ct);
+               return 0;
+       }
+
         c.data = ct;
  
         err = netlink_dump_start(ctnl, skb, nlh, &c);
@@ -3133,23 +3144,27 @@ ctnetlink_create_expect(struct net *net,
                 return -ENOENT;
         ct = nf_ct_tuplehash_to_ctrack(h);
  
+       rcu_read_lock();
         if (cda[CTA_EXPECT_HELP_NAME]) {
                 const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
  
                 helper = __nf_conntrack_helper_find(helpname, u3,
                                                     nf_ct_protonum(ct));
                 if (helper == NULL) {
+                       rcu_read_unlock();
  #ifdef CONFIG_MODULES
                         if (request_module("nfct-helper-%s", helpname) < 0) {
                                 err = -EOPNOTSUPP;
                                 goto err_ct;
                         }
+                       rcu_read_lock();
                         helper = __nf_conntrack_helper_find(helpname, u3,
                                                             nf_ct_protonum(ct));
                         if (helper) {
                                 err = -EAGAIN;
-                               goto err_ct;
+                               goto err_rcu;
                         }
+                       rcu_read_unlock();
  #endif
                         err = -EOPNOTSUPP;
                         goto err_ct;
@@ -3159,11 +3174,13 @@ ctnetlink_create_expect(struct net *net,
         exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
         if (IS_ERR(exp)) {
                 err = PTR_ERR(exp);
-               goto err_ct;
+               goto err_rcu;
         }
  
         err = nf_ct_expect_related_report(exp, portid, report);
         nf_ct_expect_put(exp);
+err_rcu:
+       rcu_read_unlock();
  err_ct:
         nf_ct_put(ct);
         return err;
@@ -3442,6 +3459,7 @@ static void __exit ctnetlink_exit(void)
  #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
         RCU_INIT_POINTER(nfnl_ct_hook, NULL);
  #endif
+       synchronize_rcu();
  }
  
  module_init(ctnetlink_init);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c

index 94b14c5a8b177277e218790da32eafebff3be963..82802e4a6640817e64eb3f3a6ffcb875ad14a747 100644 (file)
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -903,6 +903,8 @@ static void __exit nf_nat_cleanup(void)
  #ifdef CONFIG_XFRM
         RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
  #endif
+       synchronize_rcu();
+
         for (i = 0; i < NFPROTO_NUMPROTO; i++)
                 kfree(nf_nat_l4protos[i]);
  
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c

index 31d358691af0963c664c742d180e79c664590005..804e8a0ab36ef56b120ea89be1994b39eca5bc36 100644 (file)
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -33,8 +33,16 @@ sctp_manip_pkt(struct sk_buff *skb,
                enum nf_nat_manip_type maniptype)
  {
         sctp_sctphdr_t *hdr;
+       int hdrsize = 8;
  
-       if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+       /* This could be an inner header returned in imcp packet; in such
+        * cases we cannot update the checksum field since it is outside
+        * of the 8 bytes of transport layer headers we are guaranteed.
+        */
+       if (skb->len >= hdroff + sizeof(*hdr))
+               hdrsize = sizeof(*hdr);
+
+       if (!skb_make_writable(skb, hdroff + hdrsize))
                 return false;
  
         hdr = (struct sctphdr *)(skb->data + hdroff);
@@ -47,6 +55,9 @@ sctp_manip_pkt(struct sk_buff *skb,
                 hdr->dest = tuple->dst.u.sctp.port;
         }
  
+       if (hdrsize < sizeof(*hdr))
+               return true;
+
         if (skb->ip_summed != CHECKSUM_PARTIAL) {
                 hdr->checksum = sctp_compute_cksum(skb, hdroff);
                 skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c

index d43869879fcfcea6ca23e3a579bd21f8aa855b10..86067560a3184f26c521e35b8f63e4952c95955e 100644 (file)
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -101,11 +101,13 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
                 rcu_read_lock();
                 idev = __in6_dev_get(skb->dev);
                 if (idev != NULL) {
+                       read_lock_bh(&idev->lock);
                         list_for_each_entry(ifa, &idev->addr_list, if_list) {
                                 newdst = ifa->addr;
                                 addr = true;
                                 break;
                         }
+                       read_unlock_bh(&idev->lock);
                 }
                 rcu_read_unlock();
  
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index 5e0ccfd5bb37d1cbebb7e03b0998b7c24cca024d..434c739dfecaa8727dc193f8ad86e18133932660 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3145,7 +3145,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
                 iter.count      = 0;
                 iter.err        = 0;
                 iter.fn         = nf_tables_bind_check_setelem;
-               iter.flush      = false;
  
                 set->ops->walk(ctx, set, &iter);
                 if (iter.err < 0)
@@ -3399,7 +3398,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
         args.iter.count         = 0;
         args.iter.err           = 0;
         args.iter.fn            = nf_tables_dump_setelem;
-       args.iter.flush         = false;
         set->ops->walk(&ctx, set, &args.iter);
  
         nla_nest_end(skb, nest);
@@ -3963,7 +3961,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
                 struct nft_set_iter iter = {
                         .genmask        = genmask,
                         .fn             = nft_flush_set,
-                       .flush          = true,
                 };
                 set->ops->walk(&ctx, set, &iter);
  
@@ -5114,7 +5111,6 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
                         iter.count      = 0;
                         iter.err        = 0;
                         iter.fn         = nf_tables_loop_check_setelem;
-                       iter.flush      = false;
  
                         set->ops->walk(ctx, set, &iter);
                         if (iter.err < 0)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c

index de8782345c863777c8cedf95a5ccf60504e9586f..d45558178da5b62a8ad7c896e096c1862c512091 100644 (file)
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -32,6 +32,13 @@ MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
  MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers");
  
+struct nfnl_cthelper {
+       struct list_head                list;
+       struct nf_conntrack_helper      helper;
+};
+
+static LIST_HEAD(nfnl_cthelper_list);
+
  static int
  nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
                         struct nf_conn *ct, enum ip_conntrack_info ctinfo)
@@ -161,6 +168,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
         int i, ret;
         struct nf_conntrack_expect_policy *expect_policy;
         struct nlattr *tb[NFCTH_POLICY_SET_MAX+1];
+       unsigned int class_max;
  
         ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
                                nfnl_cthelper_expect_policy_set);
@@ -170,19 +178,18 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
         if (!tb[NFCTH_POLICY_SET_NUM])
                 return -EINVAL;
  
-       helper->expect_class_max =
-               ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
-
-       if (helper->expect_class_max != 0 &&
-           helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES)
+       class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+       if (class_max == 0)
+               return -EINVAL;
+       if (class_max > NF_CT_MAX_EXPECT_CLASSES)
                 return -EOVERFLOW;
  
         expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) *
-                               helper->expect_class_max, GFP_KERNEL);
+                               class_max, GFP_KERNEL);
         if (expect_policy == NULL)
                 return -ENOMEM;
  
-       for (i=0; i<helper->expect_class_max; i++) {
+       for (i = 0; i < class_max; i++) {
                 if (!tb[NFCTH_POLICY_SET+i])
                         goto err;
  
@@ -191,6 +198,8 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
                 if (ret < 0)
                         goto err;
         }
+
+       helper->expect_class_max = class_max - 1;
         helper->expect_policy = expect_policy;
         return 0;
  err:
@@ -203,18 +212,20 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
                      struct nf_conntrack_tuple *tuple)
  {
         struct nf_conntrack_helper *helper;
+       struct nfnl_cthelper *nfcth;
         int ret;
  
         if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN])
                 return -EINVAL;
  
-       helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL);
-       if (helper == NULL)
+       nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL);
+       if (nfcth == NULL)
                 return -ENOMEM;
+       helper = &nfcth->helper;
  
         ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
         if (ret < 0)
-               goto err;
+               goto err1;
  
         strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
         helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
@@ -245,14 +256,100 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
  
         ret = nf_conntrack_helper_register(helper);
         if (ret < 0)
-               goto err;
+               goto err2;
  
+       list_add_tail(&nfcth->list, &nfnl_cthelper_list);
         return 0;
-err:
-       kfree(helper);
+err2:
+       kfree(helper->expect_policy);
+err1:
+       kfree(nfcth);
         return ret;
  }
  
+static int
+nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
+                               struct nf_conntrack_expect_policy *new_policy,
+                               const struct nlattr *attr)
+{
+       struct nlattr *tb[NFCTH_POLICY_MAX + 1];
+       int err;
+
+       err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
+                              nfnl_cthelper_expect_pol);
+       if (err < 0)
+               return err;
+
+       if (!tb[NFCTH_POLICY_NAME] ||
+           !tb[NFCTH_POLICY_EXPECT_MAX] ||
+           !tb[NFCTH_POLICY_EXPECT_TIMEOUT])
+               return -EINVAL;
+
+       if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name))
+               return -EBUSY;
+
+       new_policy->max_expected =
+               ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+       new_policy->timeout =
+               ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
+
+       return 0;
+}
+
+static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
+                                          struct nf_conntrack_helper *helper)
+{
+       struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1];
+       struct nf_conntrack_expect_policy *policy;
+       int i, err;
+
+       /* Check first that all policy attributes are well-formed, so we don't
+        * leave things in inconsistent state on errors.
+        */
+       for (i = 0; i < helper->expect_class_max + 1; i++) {
+
+               if (!tb[NFCTH_POLICY_SET + i])
+                       return -EINVAL;
+
+               err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
+                                                     &new_policy[i],
+                                                     tb[NFCTH_POLICY_SET + i]);
+               if (err < 0)
+                       return err;
+       }
+       /* Now we can safely update them. */
+       for (i = 0; i < helper->expect_class_max + 1; i++) {
+               policy = (struct nf_conntrack_expect_policy *)
+                               &helper->expect_policy[i];
+               policy->max_expected = new_policy->max_expected;
+               policy->timeout = new_policy->timeout;
+       }
+
+       return 0;
+}
+
+static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
+                                      const struct nlattr *attr)
+{
+       struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1];
+       unsigned int class_max;
+       int err;
+
+       err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
+                              nfnl_cthelper_expect_policy_set);
+       if (err < 0)
+               return err;
+
+       if (!tb[NFCTH_POLICY_SET_NUM])
+               return -EINVAL;
+
+       class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+       if (helper->expect_class_max + 1 != class_max)
+               return -EBUSY;
+
+       return nfnl_cthelper_update_policy_all(tb, helper);
+}
+
  static int
  nfnl_cthelper_update(const struct nlattr * const tb[],
                      struct nf_conntrack_helper *helper)
@@ -263,8 +360,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
                 return -EBUSY;
  
         if (tb[NFCTH_POLICY]) {
-               ret = nfnl_cthelper_parse_expect_policy(helper,
-                                                       tb[NFCTH_POLICY]);
+               ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
                 if (ret < 0)
                         return ret;
         }
@@ -293,7 +389,8 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
         const char *helper_name;
         struct nf_conntrack_helper *cur, *helper = NULL;
         struct nf_conntrack_tuple tuple;
-       int ret = 0, i;
+       struct nfnl_cthelper *nlcth;
+       int ret = 0;
  
         if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
                 return -EINVAL;
@@ -304,31 +401,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
         if (ret < 0)
                 return ret;
  
-       rcu_read_lock();
-       for (i = 0; i < nf_ct_helper_hsize && !helper; i++) {
-               hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+       list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+               cur = &nlcth->helper;
  
-                       /* skip non-userspace conntrack helpers. */
-                       if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-                               continue;
+               if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+                       continue;
  
-                       if (strncmp(cur->name, helper_name,
-                                       NF_CT_HELPER_NAME_LEN) != 0)
-                               continue;
+               if ((tuple.src.l3num != cur->tuple.src.l3num ||
+                    tuple.dst.protonum != cur->tuple.dst.protonum))
+                       continue;
  
-                       if ((tuple.src.l3num != cur->tuple.src.l3num ||
-                            tuple.dst.protonum != cur->tuple.dst.protonum))
-                               continue;
+               if (nlh->nlmsg_flags & NLM_F_EXCL)
+                       return -EEXIST;
  
-                       if (nlh->nlmsg_flags & NLM_F_EXCL) {
-                               ret = -EEXIST;
-                               goto err;
-                       }
-                       helper = cur;
-                       break;
-               }
+               helper = cur;
+               break;
         }
-       rcu_read_unlock();
  
         if (helper == NULL)
                 ret = nfnl_cthelper_create(tb, &tuple);
@@ -336,9 +424,6 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
                 ret = nfnl_cthelper_update(tb, helper);
  
         return ret;
-err:
-       rcu_read_unlock();
-       return ret;
  }
  
  static int
@@ -377,10 +462,10 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb,
                 goto nla_put_failure;
  
         if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM,
-                        htonl(helper->expect_class_max)))
+                        htonl(helper->expect_class_max + 1)))
                 goto nla_put_failure;
  
-       for (i=0; i<helper->expect_class_max; i++) {
+       for (i = 0; i < helper->expect_class_max + 1; i++) {
                 nest_parms2 = nla_nest_start(skb,
                                 (NFCTH_POLICY_SET+i) | NLA_F_NESTED);
                 if (nest_parms2 == NULL)
@@ -502,11 +587,12 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
                              struct sk_buff *skb, const struct nlmsghdr *nlh,
                              const struct nlattr * const tb[])
  {
-       int ret = -ENOENT, i;
+       int ret = -ENOENT;
         struct nf_conntrack_helper *cur;
         struct sk_buff *skb2;
         char *helper_name = NULL;
         struct nf_conntrack_tuple tuple;
+       struct nfnl_cthelper *nlcth;
         bool tuple_set = false;
  
         if (nlh->nlmsg_flags & NLM_F_DUMP) {
@@ -527,45 +613,39 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
                 tuple_set = true;
         }
  
-       for (i = 0; i < nf_ct_helper_hsize; i++) {
-               hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+       list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+               cur = &nlcth->helper;
+               if (helper_name &&
+                   strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+                       continue;
  
-                       /* skip non-userspace conntrack helpers. */
-                       if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-                               continue;
+               if (tuple_set &&
+                   (tuple.src.l3num != cur->tuple.src.l3num ||
+                    tuple.dst.protonum != cur->tuple.dst.protonum))
+                       continue;
  
-                       if (helper_name && strncmp(cur->name, helper_name,
-                                               NF_CT_HELPER_NAME_LEN) != 0) {
-                               continue;
-                       }
-                       if (tuple_set &&
-                           (tuple.src.l3num != cur->tuple.src.l3num ||
-                            tuple.dst.protonum != cur->tuple.dst.protonum))
-                               continue;
-
-                       skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-                       if (skb2 == NULL) {
-                               ret = -ENOMEM;
-                               break;
-                       }
+               skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+               if (skb2 == NULL) {
+                       ret = -ENOMEM;
+                       break;
+               }
  
-                       ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
-                                               nlh->nlmsg_seq,
-                                               NFNL_MSG_TYPE(nlh->nlmsg_type),
-                                               NFNL_MSG_CTHELPER_NEW, cur);
-                       if (ret <= 0) {
-                               kfree_skb(skb2);
-                               break;
-                       }
+               ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
+                                             nlh->nlmsg_seq,
+                                             NFNL_MSG_TYPE(nlh->nlmsg_type),
+                                             NFNL_MSG_CTHELPER_NEW, cur);
+               if (ret <= 0) {
+                       kfree_skb(skb2);
+                       break;
+               }
  
-                       ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
-                                               MSG_DONTWAIT);
-                       if (ret > 0)
-                               ret = 0;
+               ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+                                     MSG_DONTWAIT);
+               if (ret > 0)
+                       ret = 0;
  
-                       /* this avoids a loop in nfnetlink. */
-                       return ret == -EAGAIN ? -ENOBUFS : ret;
-               }
+               /* this avoids a loop in nfnetlink. */
+               return ret == -EAGAIN ? -ENOBUFS : ret;
         }
         return ret;
  }
@@ -576,10 +656,10 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
  {
         char *helper_name = NULL;
         struct nf_conntrack_helper *cur;
-       struct hlist_node *tmp;
         struct nf_conntrack_tuple tuple;
         bool tuple_set = false, found = false;
-       int i, j = 0, ret;
+       struct nfnl_cthelper *nlcth, *n;
+       int j = 0, ret;
  
         if (tb[NFCTH_NAME])
                 helper_name = nla_data(tb[NFCTH_NAME]);
@@ -592,28 +672,27 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
                 tuple_set = true;
         }
  
-       for (i = 0; i < nf_ct_helper_hsize; i++) {
-               hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
-                                                               hnode) {
-                       /* skip non-userspace conntrack helpers. */
-                       if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-                               continue;
+       list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+               cur = &nlcth->helper;
+               j++;
  
-                       j++;
+               if (helper_name &&
+                   strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+                       continue;
  
-                       if (helper_name && strncmp(cur->name, helper_name,
-                                               NF_CT_HELPER_NAME_LEN) != 0) {
-                               continue;
-                       }
-                       if (tuple_set &&
-                           (tuple.src.l3num != cur->tuple.src.l3num ||
-                            tuple.dst.protonum != cur->tuple.dst.protonum))
-                               continue;
+               if (tuple_set &&
+                   (tuple.src.l3num != cur->tuple.src.l3num ||
+                    tuple.dst.protonum != cur->tuple.dst.protonum))
+                       continue;
  
-                       found = true;
-                       nf_conntrack_helper_unregister(cur);
-               }
+               found = true;
+               nf_conntrack_helper_unregister(cur);
+               kfree(cur->expect_policy);
+
+               list_del(&nlcth->list);
+               kfree(nlcth);
         }
+
         /* Make sure we return success if we flush and there is no helpers */
         return (found || j == 0) ? 0 : -ENOENT;
  }
@@ -662,20 +741,16 @@ err_out:
  static void __exit nfnl_cthelper_exit(void)
  {
         struct nf_conntrack_helper *cur;
-       struct hlist_node *tmp;
-       int i;
+       struct nfnl_cthelper *nlcth, *n;
  
         nfnetlink_subsys_unregister(&nfnl_cthelper_subsys);
  
-       for (i=0; i<nf_ct_helper_hsize; i++) {
-               hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
-                                                                       hnode) {
-                       /* skip non-userspace conntrack helpers. */
-                       if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-                               continue;
+       list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+               cur = &nlcth->helper;
  
-                       nf_conntrack_helper_unregister(cur);
-               }
+               nf_conntrack_helper_unregister(cur);
+               kfree(cur->expect_policy);
+               kfree(nlcth);
         }
  }
  
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c

index 139e0867e56e9e606942c98e75148eb17b2ec7eb..47d6656c9119fd5c75fb1fb5f0bfd4bb49f1c38b 100644 (file)
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void)
  #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
         RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
         RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
+       synchronize_rcu();
  #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-       rcu_barrier();
  }
  
  module_init(cttimeout_init);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c

index 3ee0b8a000a41ec901faeb239e752a126428dc4d..933509ebf3d3e2e84aecd55fd7f19f21f69e46d4 100644 (file)
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
         skb = alloc_skb(size, GFP_ATOMIC);
         if (!skb) {
                 skb_tx_error(entskb);
-               return NULL;
+               goto nlmsg_failure;
         }
  
         nlh = nlmsg_put(skb, 0, 0,
@@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
         if (!nlh) {
                 skb_tx_error(entskb);
                 kfree_skb(skb);
-               return NULL;
+               goto nlmsg_failure;
         }
         nfmsg = nlmsg_data(nlh);
         nfmsg->nfgen_family = entry->state.pf;
@@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
         }
  
         nlh->nlmsg_len = skb->len;
+       if (seclen)
+               security_release_secctx(secdata, seclen);
         return skb;
  
  nla_put_failure:
         skb_tx_error(entskb);
         kfree_skb(skb);
         net_err_ratelimited("nf_queue: error creating packet message\n");
+nlmsg_failure:
+       if (seclen)
+               security_release_secctx(secdata, seclen);
         return NULL;
  }
  
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c

index bf548a7a71ec9b49cf308af041811d2eb5f33c8c..0264258c46feb5071a8eebcf9299b4b717fd0a32 100644 (file)
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -83,7 +83,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
  
         switch (priv->key) {
         case NFT_CT_DIRECTION:
-               *dest = CTINFO2DIR(ctinfo);
+               nft_reg_store8(dest, CTINFO2DIR(ctinfo));
                 return;
         case NFT_CT_STATUS:
                 *dest = ct->status;
@@ -151,20 +151,22 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
                 return;
         }
         case NFT_CT_L3PROTOCOL:
-               *dest = nf_ct_l3num(ct);
+               nft_reg_store8(dest, nf_ct_l3num(ct));
                 return;
         case NFT_CT_PROTOCOL:
-               *dest = nf_ct_protonum(ct);
+               nft_reg_store8(dest, nf_ct_protonum(ct));
                 return;
  #ifdef CONFIG_NF_CONNTRACK_ZONES
         case NFT_CT_ZONE: {
                 const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+               u16 zoneid;
  
                 if (priv->dir < IP_CT_DIR_MAX)
-                       *dest = nf_ct_zone_id(zone, priv->dir);
+                       zoneid = nf_ct_zone_id(zone, priv->dir);
                 else
-                       *dest = zone->id;
+                       zoneid = zone->id;
  
+               nft_reg_store16(dest, zoneid);
                 return;
         }
  #endif
@@ -183,10 +185,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
                        nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
                 return;
         case NFT_CT_PROTO_SRC:
-               *dest = (__force __u16)tuple->src.u.all;
+               nft_reg_store16(dest, (__force u16)tuple->src.u.all);
                 return;
         case NFT_CT_PROTO_DST:
-               *dest = (__force __u16)tuple->dst.u.all;
+               nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
                 return;
         default:
                 break;
@@ -205,7 +207,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
         const struct nft_ct *priv = nft_expr_priv(expr);
         struct sk_buff *skb = pkt->skb;
         enum ip_conntrack_info ctinfo;
-       u16 value = regs->data[priv->sreg];
+       u16 value = nft_reg_load16(&regs->data[priv->sreg]);
         struct nf_conn *ct;
  
         ct = nf_ct_get(skb, &ctinfo);
@@ -542,7 +544,8 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
                 case IP_CT_DIR_REPLY:
                         break;
                 default:
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto err1;
                 }
         }
  
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c

index eb2721af898dbb54ab099f4878d7b2673cbb9522..c4dad1254ead01818fb5b2c0474b26f74762fee2 100644 (file)
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -21,6 +21,7 @@ struct nft_hash {
         enum nft_registers      sreg:8;
         enum nft_registers      dreg:8;
         u8                      len;
+       bool                    autogen_seed:1;
         u32                     modulus;
         u32                     seed;
         u32                     offset;
@@ -82,10 +83,12 @@ static int nft_hash_init(const struct nft_ctx *ctx,
         if (priv->offset + priv->modulus - 1 < priv->offset)
                 return -EOVERFLOW;
  
-       if (tb[NFTA_HASH_SEED])
+       if (tb[NFTA_HASH_SEED]) {
                 priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
-       else
+       } else {
+               priv->autogen_seed = true;
                 get_random_bytes(&priv->seed, sizeof(priv->seed));
+       }
  
         return nft_validate_register_load(priv->sreg, len) &&
                nft_validate_register_store(ctx, priv->dreg, NULL,
@@ -105,7 +108,8 @@ static int nft_hash_dump(struct sk_buff *skb,
                 goto nla_put_failure;
         if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
                 goto nla_put_failure;
-       if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
+       if (!priv->autogen_seed &&
+           nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
                 goto nla_put_failure;
         if (priv->offset != 0)
                 if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c

index e1f5ca9b423b5ffda43ec5519d4c8832ce695899..7b60e01f38ff9f2f9fa7d28f6f99b4f889d190d7 100644 (file)
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -45,16 +45,15 @@ void nft_meta_get_eval(const struct nft_expr *expr,
                 *dest = skb->len;
                 break;
         case NFT_META_PROTOCOL:
-               *dest = 0;
-               *(__be16 *)dest = skb->protocol;
+               nft_reg_store16(dest, (__force u16)skb->protocol);
                 break;
         case NFT_META_NFPROTO:
-               *dest = nft_pf(pkt);
+               nft_reg_store8(dest, nft_pf(pkt));
                 break;
         case NFT_META_L4PROTO:
                 if (!pkt->tprot_set)
                         goto err;
-               *dest = pkt->tprot;
+               nft_reg_store8(dest, pkt->tprot);
                 break;
         case NFT_META_PRIORITY:
                 *dest = skb->priority;
@@ -85,14 +84,12 @@ void nft_meta_get_eval(const struct nft_expr *expr,
         case NFT_META_IIFTYPE:
                 if (in == NULL)
                         goto err;
-               *dest = 0;
-               *(u16 *)dest = in->type;
+               nft_reg_store16(dest, in->type);
                 break;
         case NFT_META_OIFTYPE:
                 if (out == NULL)
                         goto err;
-               *dest = 0;
-               *(u16 *)dest = out->type;
+               nft_reg_store16(dest, out->type);
                 break;
         case NFT_META_SKUID:
                 sk = skb_to_full_sk(skb);
@@ -142,19 +139,19 @@ void nft_meta_get_eval(const struct nft_expr *expr,
  #endif
         case NFT_META_PKTTYPE:
                 if (skb->pkt_type != PACKET_LOOPBACK) {
-                       *dest = skb->pkt_type;
+                       nft_reg_store8(dest, skb->pkt_type);
                         break;
                 }
  
                 switch (nft_pf(pkt)) {
                 case NFPROTO_IPV4:
                         if (ipv4_is_multicast(ip_hdr(skb)->daddr))
-                               *dest = PACKET_MULTICAST;
+                               nft_reg_store8(dest, PACKET_MULTICAST);
                         else
-                               *dest = PACKET_BROADCAST;
+                               nft_reg_store8(dest, PACKET_BROADCAST);
                         break;
                 case NFPROTO_IPV6:
-                       *dest = PACKET_MULTICAST;
+                       nft_reg_store8(dest, PACKET_MULTICAST);
                         break;
                 case NFPROTO_NETDEV:
                         switch (skb->protocol) {
@@ -168,14 +165,14 @@ void nft_meta_get_eval(const struct nft_expr *expr,
                                         goto err;
  
                                 if (ipv4_is_multicast(iph->daddr))
-                                       *dest = PACKET_MULTICAST;
+                                       nft_reg_store8(dest, PACKET_MULTICAST);
                                 else
-                                       *dest = PACKET_BROADCAST;
+                                       nft_reg_store8(dest, PACKET_BROADCAST);
  
                                 break;
                         }
                         case htons(ETH_P_IPV6):
-                               *dest = PACKET_MULTICAST;
+                               nft_reg_store8(dest, PACKET_MULTICAST);
                                 break;
                         default:
                                 WARN_ON_ONCE(1);
@@ -230,7 +227,9 @@ void nft_meta_set_eval(const struct nft_expr *expr,
  {
         const struct nft_meta *meta = nft_expr_priv(expr);
         struct sk_buff *skb = pkt->skb;
-       u32 value = regs->data[meta->sreg];
+       u32 *sreg = &regs->data[meta->sreg];
+       u32 value = *sreg;
+       u8 pkt_type;
  
         switch (meta->key) {
         case NFT_META_MARK:
@@ -240,9 +239,12 @@ void nft_meta_set_eval(const struct nft_expr *expr,
                 skb->priority = value;
                 break;
         case NFT_META_PKTTYPE:
-               if (skb->pkt_type != value &&
-                   skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type))
-                       skb->pkt_type = value;
+               pkt_type = nft_reg_load8(sreg);
+
+               if (skb->pkt_type != pkt_type &&
+                   skb_pkt_type_ok(pkt_type) &&
+                   skb_pkt_type_ok(skb->pkt_type))
+                       skb->pkt_type = pkt_type;
                 break;
         case NFT_META_NFTRACE:
                 skb->nf_trace = !!value;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c

index 19a7bf3236f968725a29e827012af301781802df..439e0bd152a004c98664a19ae6c920458fa6160a 100644 (file)
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -65,10 +65,10 @@ static void nft_nat_eval(const struct nft_expr *expr,
         }
  
         if (priv->sreg_proto_min) {
-               range.min_proto.all =
-                       *(__be16 *)&regs->data[priv->sreg_proto_min];
-               range.max_proto.all =
-                       *(__be16 *)&regs->data[priv->sreg_proto_max];
+               range.min_proto.all = (__force __be16)nft_reg_load16(
+                       &regs->data[priv->sreg_proto_min]);
+               range.max_proto.all = (__force __be16)nft_reg_load16(
+                       &regs->data[priv->sreg_proto_max]);
                 range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
         }
  
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c

index 152d226552c174929fd8973f023eaac888e4b0a9..8ebbc2940f4c593d393c65bd5674d90feb585d98 100644 (file)
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -15,6 +15,11 @@
  #include <linux/netfilter/nf_tables.h>
  #include <net/netfilter/nf_tables.h>
  
+struct nft_bitmap_elem {
+       struct list_head        head;
+       struct nft_set_ext      ext;
+};
+
  /* This bitmap uses two bits to represent one element. These two bits determine
   * the element state in the current and the future generation.
   *
@@ -41,13 +46,22 @@
   *      restore its previous state.
   */
  struct nft_bitmap {
-       u16     bitmap_size;
-       u8      bitmap[];
+       struct  list_head       list;
+       u16                     bitmap_size;
+       u8                      bitmap[];
  };
  
-static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off)
+static inline void nft_bitmap_location(const struct nft_set *set,
+                                      const void *key,
+                                      u32 *idx, u32 *off)
  {
-       u32 k = (key << 1);
+       u32 k;
+
+       if (set->klen == 2)
+               k = *(u16 *)key;
+       else
+               k = *(u8 *)key;
+       k <<= 1;
  
         *idx = k / BITS_PER_BYTE;
         *off = k % BITS_PER_BYTE;
@@ -69,26 +83,48 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
         u8 genmask = nft_genmask_cur(net);
         u32 idx, off;
  
-       nft_bitmap_location(*key, &idx, &off);
+       nft_bitmap_location(set, key, &idx, &off);
  
         return nft_bitmap_active(priv->bitmap, idx, off, genmask);
  }
  
+static struct nft_bitmap_elem *
+nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
+                    u8 genmask)
+{
+       const struct nft_bitmap *priv = nft_set_priv(set);
+       struct nft_bitmap_elem *be;
+
+       list_for_each_entry_rcu(be, &priv->list, head) {
+               if (memcmp(nft_set_ext_key(&be->ext),
+                          nft_set_ext_key(&this->ext), set->klen) ||
+                   !nft_set_elem_active(&be->ext, genmask))
+                       continue;
+
+               return be;
+       }
+       return NULL;
+}
+
  static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
                              const struct nft_set_elem *elem,
-                            struct nft_set_ext **_ext)
+                            struct nft_set_ext **ext)
  {
         struct nft_bitmap *priv = nft_set_priv(set);
-       struct nft_set_ext *ext = elem->priv;
+       struct nft_bitmap_elem *new = elem->priv, *be;
         u8 genmask = nft_genmask_next(net);
         u32 idx, off;
  
-       nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
-       if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
+       be = nft_bitmap_elem_find(set, new, genmask);
+       if (be) {
+               *ext = &be->ext;
                 return -EEXIST;
+       }
  
+       nft_bitmap_location(set, nft_set_ext_key(&new->ext), &idx, &off);
         /* Enter 01 state. */
         priv->bitmap[idx] |= (genmask << off);
+       list_add_tail_rcu(&new->head, &priv->list);
  
         return 0;
  }
@@ -98,13 +134,14 @@ static void nft_bitmap_remove(const struct net *net,
                               const struct nft_set_elem *elem)
  {
         struct nft_bitmap *priv = nft_set_priv(set);
-       struct nft_set_ext *ext = elem->priv;
+       struct nft_bitmap_elem *be = elem->priv;
         u8 genmask = nft_genmask_next(net);
         u32 idx, off;
  
-       nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+       nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
         /* Enter 00 state. */
         priv->bitmap[idx] &= ~(genmask << off);
+       list_del_rcu(&be->head);
  }
  
  static void nft_bitmap_activate(const struct net *net,
@@ -112,74 +149,52 @@ static void nft_bitmap_activate(const struct net *net,
                                 const struct nft_set_elem *elem)
  {
         struct nft_bitmap *priv = nft_set_priv(set);
-       struct nft_set_ext *ext = elem->priv;
+       struct nft_bitmap_elem *be = elem->priv;
         u8 genmask = nft_genmask_next(net);
         u32 idx, off;
  
-       nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+       nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
         /* Enter 11 state. */
         priv->bitmap[idx] |= (genmask << off);
+       nft_set_elem_change_active(net, set, &be->ext);
  }
  
  static bool nft_bitmap_flush(const struct net *net,
-                            const struct nft_set *set, void *ext)
+                            const struct nft_set *set, void *_be)
  {
         struct nft_bitmap *priv = nft_set_priv(set);
         u8 genmask = nft_genmask_next(net);
+       struct nft_bitmap_elem *be = _be;
         u32 idx, off;
  
-       nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+       nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
         /* Enter 10 state, similar to deactivation. */
         priv->bitmap[idx] &= ~(genmask << off);
+       nft_set_elem_change_active(net, set, &be->ext);
  
         return true;
  }
  
-static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set,
-                                               const struct nft_set_elem *elem)
-{
-       struct nft_set_ext_tmpl tmpl;
-       struct nft_set_ext *ext;
-
-       nft_set_ext_prepare(&tmpl);
-       nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
-
-       ext = kzalloc(tmpl.len, GFP_KERNEL);
-       if (!ext)
-               return NULL;
-
-       nft_set_ext_init(ext, &tmpl);
-       memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen);
-
-       return ext;
-}
-
  static void *nft_bitmap_deactivate(const struct net *net,
                                    const struct nft_set *set,
                                    const struct nft_set_elem *elem)
  {
         struct nft_bitmap *priv = nft_set_priv(set);
+       struct nft_bitmap_elem *this = elem->priv, *be;
         u8 genmask = nft_genmask_next(net);
-       struct nft_set_ext *ext;
-       u32 idx, off, key = 0;
-
-       memcpy(&key, elem->key.val.data, set->klen);
-       nft_bitmap_location(key, &idx, &off);
+       u32 idx, off;
  
-       if (!nft_bitmap_active(priv->bitmap, idx, off, genmask))
-               return NULL;
+       nft_bitmap_location(set, elem->key.val.data, &idx, &off);
  
-       /* We have no real set extension since this is a bitmap, allocate this
-        * dummy object that is released from the commit/abort path.
-        */
-       ext = nft_bitmap_ext_alloc(set, elem);
-       if (!ext)
+       be = nft_bitmap_elem_find(set, this, genmask);
+       if (!be)
                 return NULL;
  
         /* Enter 10 state. */
         priv->bitmap[idx] &= ~(genmask << off);
+       nft_set_elem_change_active(net, set, &be->ext);
  
-       return ext;
+       return be;
  }
  
  static void nft_bitmap_walk(const struct nft_ctx *ctx,
@@ -187,47 +202,23 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
                             struct nft_set_iter *iter)
  {
         const struct nft_bitmap *priv = nft_set_priv(set);
-       struct nft_set_ext_tmpl tmpl;
+       struct nft_bitmap_elem *be;
         struct nft_set_elem elem;
-       struct nft_set_ext *ext;
-       int idx, off;
-       u16 key;
-
-       nft_set_ext_prepare(&tmpl);
-       nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
-
-       for (idx = 0; idx < priv->bitmap_size; idx++) {
-               for (off = 0; off < BITS_PER_BYTE; off += 2) {
-                       if (iter->count < iter->skip)
-                               goto cont;
-
-                       if (!nft_bitmap_active(priv->bitmap, idx, off,
-                                              iter->genmask))
-                               goto cont;
-
-                       ext = kzalloc(tmpl.len, GFP_KERNEL);
-                       if (!ext) {
-                               iter->err = -ENOMEM;
-                               return;
-                       }
-                       nft_set_ext_init(ext, &tmpl);
-                       key = ((idx * BITS_PER_BYTE) + off) >> 1;
-                       memcpy(nft_set_ext_key(ext), &key, set->klen);
-
-                       elem.priv = ext;
-                       iter->err = iter->fn(ctx, set, iter, &elem);
-
-                       /* On set flush, this dummy extension object is released
-                        * from the commit/abort path.
-                        */
-                       if (!iter->flush)
-                               kfree(ext);
-
-                       if (iter->err < 0)
-                               return;
+
+       list_for_each_entry_rcu(be, &priv->list, head) {
+               if (iter->count < iter->skip)
+                       goto cont;
+               if (!nft_set_elem_active(&be->ext, iter->genmask))
+                       goto cont;
+
+               elem.priv = be;
+
+               iter->err = iter->fn(ctx, set, iter, &elem);
+
+               if (iter->err < 0)
+                       return;
  cont:
-                       iter->count++;
-               }
+               iter->count++;
         }
  }
  
@@ -258,6 +249,7 @@ static int nft_bitmap_init(const struct nft_set *set,
  {
         struct nft_bitmap *priv = nft_set_priv(set);
  
+       INIT_LIST_HEAD(&priv->list);
         priv->bitmap_size = nft_bitmap_size(set->klen);
  
         return 0;
@@ -283,6 +275,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
  
  static struct nft_set_ops nft_bitmap_ops __read_mostly = {
         .privsize       = nft_bitmap_privsize,
+       .elemsize       = offsetof(struct nft_bitmap_elem, ext),
         .estimate       = nft_bitmap_estimate,
         .init           = nft_bitmap_init,
         .destroy        = nft_bitmap_destroy,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c

index 27241a767f17b4b27d24095a31e5e9a2d3e29ce4..c64aca611ac5c5f81ad7c925652bbb90554763ac 100644 (file)
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -104,7 +104,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
         tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
         tcp_hdrlen = tcph->doff * 4;
  
-       if (len < tcp_hdrlen)
+       if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr))
                 return -1;
  
         if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
@@ -152,6 +152,10 @@ tcpmss_mangle_packet(struct sk_buff *skb,
         if (len > tcp_hdrlen)
                 return 0;
  
+       /* tcph->doff has 4 bits, do not wrap it to 0 */
+       if (tcp_hdrlen >= 15 * 4)
+               return 0;
+
         /*
          * MSS Option not found ?! add it..
          */
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c

index 80cb7babeb6427d5768f9e636d5d9633d46f8413..df7f1df0033090c0cd76f3afda43e5159a509791 100644 (file)
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -393,7 +393,8 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
  
         rcu_read_lock();
         indev = __in6_dev_get(skb->dev);
-       if (indev)
+       if (indev) {
+               read_lock_bh(&indev->lock);
                 list_for_each_entry(ifa, &indev->addr_list, if_list) {
                         if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
                                 continue;
@@ -401,6 +402,8 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
                         laddr = &ifa->addr;
                         break;
                 }
+               read_unlock_bh(&indev->lock);
+       }
         rcu_read_unlock();
  
         return laddr ? laddr : daddr;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c

index 7b73c7c161a9680b8691a712c31073b7789620f7..596eaff66649e5955d6c0f349f062b6d8360dc2d 100644 (file)
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -96,6 +96,44 @@ EXPORT_SYMBOL_GPL(nl_table);
  
  static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
  
+static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];
+
+static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
+       "nlk_cb_mutex-ROUTE",
+       "nlk_cb_mutex-1",
+       "nlk_cb_mutex-USERSOCK",
+       "nlk_cb_mutex-FIREWALL",
+       "nlk_cb_mutex-SOCK_DIAG",
+       "nlk_cb_mutex-NFLOG",
+       "nlk_cb_mutex-XFRM",
+       "nlk_cb_mutex-SELINUX",
+       "nlk_cb_mutex-ISCSI",
+       "nlk_cb_mutex-AUDIT",
+       "nlk_cb_mutex-FIB_LOOKUP",
+       "nlk_cb_mutex-CONNECTOR",
+       "nlk_cb_mutex-NETFILTER",
+       "nlk_cb_mutex-IP6_FW",
+       "nlk_cb_mutex-DNRTMSG",
+       "nlk_cb_mutex-KOBJECT_UEVENT",
+       "nlk_cb_mutex-GENERIC",
+       "nlk_cb_mutex-17",
+       "nlk_cb_mutex-SCSITRANSPORT",
+       "nlk_cb_mutex-ECRYPTFS",
+       "nlk_cb_mutex-RDMA",
+       "nlk_cb_mutex-CRYPTO",
+       "nlk_cb_mutex-SMC",
+       "nlk_cb_mutex-23",
+       "nlk_cb_mutex-24",
+       "nlk_cb_mutex-25",
+       "nlk_cb_mutex-26",
+       "nlk_cb_mutex-27",
+       "nlk_cb_mutex-28",
+       "nlk_cb_mutex-29",
+       "nlk_cb_mutex-30",
+       "nlk_cb_mutex-31",
+       "nlk_cb_mutex-MAX_LINKS"
+};
+
  static int netlink_dump(struct sock *sk);
  static void netlink_skb_destructor(struct sk_buff *skb);
  
@@ -585,6 +623,9 @@ static int __netlink_create(struct net *net, struct socket *sock,
         } else {
                 nlk->cb_mutex = &nlk->cb_def_mutex;
                 mutex_init(nlk->cb_mutex);
+               lockdep_set_class_and_name(nlk->cb_mutex,
+                                          nlk_cb_mutex_keys + protocol,
+                                          nlk_cb_mutex_key_strings[protocol]);
         }
         init_waitqueue_head(&nlk->wait);
  
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c

index fb6e10fdb2174320c96608aea63d3c484d3625a0..92e0981f74040d7029b65863167b459322612024 100644 (file)
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -783,8 +783,10 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
  
                 if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
                                    cb->nlh->nlmsg_seq, NLM_F_MULTI,
-                                  skb, CTRL_CMD_NEWFAMILY) < 0)
+                                  skb, CTRL_CMD_NEWFAMILY) < 0) {
+                       n--;
                         break;
+               }
         }
  
         cb->args[0] = n;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c

index 4bbf4526b88566d7c3f14e602f279b7e2570113c..ebf16f7f90892dd3029e643835859459baec4507 100644 (file)
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -765,7 +765,8 @@ out_release:
         return err;
  }
  
-static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
+static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
+                    bool kern)
  {
         struct sk_buff *skb;
         struct sock *newsk;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c

index 879885b31cce5ff2461c3a1524612527b9383bbd..2ffb18e73df6c03072fffeb68b660fb2f884eb45 100644 (file)
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -441,7 +441,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
  }
  
  static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
-                           int flags)
+                           int flags, bool kern)
  {
         DECLARE_WAITQUEUE(wait, current);
         struct sock *sk = sock->sk, *new_sk;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c

index e0a87776a010a3be352c0b2b71859e56c75a6b6f..7b2c2fce408a02d4251f03a2e3f0b4d9e7fccb80 100644 (file)
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -643,8 +643,8 @@ static bool skb_nfct_cached(struct net *net,
                  */
                 if (nf_ct_is_confirmed(ct))
                         nf_ct_delete(ct, 0, 0);
-               else
-                       nf_conntrack_put(&ct->ct_general);
+
+               nf_conntrack_put(&ct->ct_general);
                 nf_ct_set(skb, NULL, 0);
                 return false;
         }
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c

index 9d4bb8eb63f25c2e9e9e5f4190e6c943a32be547..3f76cb765e5bb71d18c3e9a4c220ed9fa3906186 100644 (file)
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -527,7 +527,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
  
         /* Link layer. */
         clear_vlan(key);
-       if (key->mac_proto == MAC_PROTO_NONE) {
+       if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
                 if (unlikely(eth_type_vlan(skb->protocol)))
                         return -EINVAL;
  
@@ -745,7 +745,13 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
  
  int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
  {
-       return key_extract(skb, key);
+       int res;
+
+       res = key_extract(skb, key);
+       if (!res)
+               key->mac_proto &= ~SW_FLOW_KEY_INVALID;
+
+       return res;
  }
  
  static int key_extract_mac_proto(struct sk_buff *skb)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c

index 6f5fa50f716d066333b30edde43e5165b9fe94be..1105a838bab83f9fe647423060d08e4a415bab61 100644 (file)
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -604,7 +604,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
                         ipv4 = true;
                         break;
                 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
-                       SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+                       SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
                                         nla_get_in6_addr(a), is_mask);
                         ipv6 = true;
                         break;
@@ -665,6 +665,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
                         tun_flags |= TUNNEL_VXLAN_OPT;
                         opts_type = type;
                         break;
+               case OVS_TUNNEL_KEY_ATTR_PAD:
+                       break;
                 default:
                         OVS_NLERR(log, "Unknown IP tunnel attribute %d",
                                   type);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c

index a0dbe7ca8f724cd33b675ea15fb263d82041994c..ea81ccf3c7d6a53095b4922329c25b566d5b5940 100644 (file)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3665,6 +3665,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
                         return -EBUSY;
                 if (copy_from_user(&val, optval, sizeof(val)))
                         return -EFAULT;
+               if (val > INT_MAX)
+                       return -EINVAL;
                 po->tp_reserve = val;
                 return 0;
         }
@@ -3834,6 +3836,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
         case PACKET_HDRLEN:
                 if (len > sizeof(int))
                         len = sizeof(int);
+               if (len < sizeof(int))
+                       return -EINVAL;
                 if (copy_from_user(&val, optval, len))
                         return -EFAULT;
                 switch (val) {
@@ -4193,8 +4197,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
                 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
                         goto out;
                 if (po->tp_version >= TPACKET_V3 &&
-                   (int)(req->tp_block_size -
-                         BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0)
+                   req->tp_block_size <=
+                         BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv))
                         goto out;
                 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
                                         po->tp_reserve))
@@ -4205,6 +4209,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
                 rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
                 if (unlikely(rb->frames_per_block == 0))
                         goto out;
+               if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
+                       goto out;
                 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
                                         req->tp_frame_nr))
                         goto out;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c

index 222bedcd95754c80644748daba365dc00b10fd8c..e81537991ddf0d67e6eca19fc9eb6f442d3c06a4 100644 (file)
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -772,7 +772,8 @@ static void pep_sock_close(struct sock *sk, long timeout)
         sock_put(sk);
  }
  
-static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
+static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
+                                   bool kern)
  {
         struct pep_sock *pn = pep_sk(sk), *newpn;
         struct sock *newsk = NULL;
@@ -846,7 +847,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
         }
  
         /* Create a new to-be-accepted sock */
-       newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0);
+       newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot,
+                        kern);
         if (!newsk) {
                 pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
                 err = -ENOBUFS;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c

index a6c8da3ee89349989a9f23e095b98293050da73a..64634e3ec2fc78ebb84ad8873f6e446d06844493 100644 (file)
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -305,7 +305,7 @@ out:
  }
  
  static int pn_socket_accept(struct socket *sock, struct socket *newsock,
-                               int flags)
+                           int flags, bool kern)
  {
         struct sock *sk = sock->sk;
         struct sock *newsk;
@@ -314,7 +314,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
         if (unlikely(sk->sk_state != TCP_LISTEN))
                 return -EINVAL;
  
-       newsk = sk->sk_prot->accept(sk, flags, &err);
+       newsk = sk->sk_prot->accept(sk, flags, &err, kern);
         if (!newsk)
                 return err;
  
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c

index ae5ac175b2bef96ffa614bc799db5cd90a7bdc08..9da7368b0140f9e4a96794e21d66f487881987ba 100644 (file)
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -658,7 +658,9 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
         }
  
         if (plen != len) {
-               skb_pad(skb, plen - len);
+               rc = skb_pad(skb, plen - len);
+               if (rc)
+                       goto out_node;
                 skb_put(skb, plen - len);
         }
  
diff --git a/net/rds/connection.c b/net/rds/connection.c

index 0e04dcceb1d416438be8bb40fc68253f336f631d..1fa75ab7b733230585666abcb7279ba691365256 100644 (file)
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -429,6 +429,7 @@ void rds_conn_destroy(struct rds_connection *conn)
          */
         rds_cong_remove_conn(conn);
  
+       put_net(conn->c_net);
         kmem_cache_free(rds_conn_slab, conn);
  
         spin_lock_irqsave(&rds_conn_lock, flags);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c

index ce3775abc6e7a1d30e335aaea749a9840e949786..1c38d2c7caa8e955585b45f0c9218a0775013b4d 100644 (file)
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -442,7 +442,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
                 ic->i_send_cq = NULL;
                 ibdev_put_vector(rds_ibdev, ic->i_scq_vector);
                 rdsdebug("ib_create_cq send failed: %d\n", ret);
-               goto out;
+               goto rds_ibdev_out;
         }
  
         ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev);
@@ -456,19 +456,19 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
                 ic->i_recv_cq = NULL;
                 ibdev_put_vector(rds_ibdev, ic->i_rcq_vector);
                 rdsdebug("ib_create_cq recv failed: %d\n", ret);
-               goto out;
+               goto send_cq_out;
         }
  
         ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
         if (ret) {
                 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
-               goto out;
+               goto recv_cq_out;
         }
  
         ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
         if (ret) {
                 rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
-               goto out;
+               goto recv_cq_out;
         }
  
         /* XXX negotiate max send/recv with remote? */
@@ -494,7 +494,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
         ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
         if (ret) {
                 rdsdebug("rdma_create_qp failed: %d\n", ret);
-               goto out;
+               goto recv_cq_out;
         }
  
         ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
@@ -504,7 +504,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
         if (!ic->i_send_hdrs) {
                 ret = -ENOMEM;
                 rdsdebug("ib_dma_alloc_coherent send failed\n");
-               goto out;
+               goto qp_out;
         }
  
         ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
@@ -514,7 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
         if (!ic->i_recv_hdrs) {
                 ret = -ENOMEM;
                 rdsdebug("ib_dma_alloc_coherent recv failed\n");
-               goto out;
+               goto send_hdrs_dma_out;
         }
  
         ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
@@ -522,7 +522,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
         if (!ic->i_ack) {
                 ret = -ENOMEM;
                 rdsdebug("ib_dma_alloc_coherent ack failed\n");
-               goto out;
+               goto recv_hdrs_dma_out;
         }
  
         ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
@@ -530,7 +530,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
         if (!ic->i_sends) {
                 ret = -ENOMEM;
                 rdsdebug("send allocation failed\n");
-               goto out;
+               goto ack_dma_out;
         }
  
         ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
@@ -538,7 +538,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
         if (!ic->i_recvs) {
                 ret = -ENOMEM;
                 rdsdebug("recv allocation failed\n");
-               goto out;
+               goto sends_out;
         }
  
         rds_ib_recv_init_ack(ic);
@@ -546,8 +546,33 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
         rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
                  ic->i_send_cq, ic->i_recv_cq);
  
-out:
+       return ret;
+
+sends_out:
+       vfree(ic->i_sends);
+ack_dma_out:
+       ib_dma_free_coherent(dev, sizeof(struct rds_header),
+                            ic->i_ack, ic->i_ack_dma);
+recv_hdrs_dma_out:
+       ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr *
+                                       sizeof(struct rds_header),
+                                       ic->i_recv_hdrs, ic->i_recv_hdrs_dma);
+send_hdrs_dma_out:
+       ib_dma_free_coherent(dev, ic->i_send_ring.w_nr *
+                                       sizeof(struct rds_header),
+                                       ic->i_send_hdrs, ic->i_send_hdrs_dma);
+qp_out:
+       rdma_destroy_qp(ic->i_cm_id);
+recv_cq_out:
+       if (!ib_destroy_cq(ic->i_recv_cq))
+               ic->i_recv_cq = NULL;
+send_cq_out:
+       if (!ib_destroy_cq(ic->i_send_cq))
+               ic->i_send_cq = NULL;
+rds_ibdev_out:
+       rds_ib_remove_conn(rds_ibdev, conn);
         rds_ib_dev_put(rds_ibdev);
+
         return ret;
  }
  
diff --git a/net/rds/rds.h b/net/rds/rds.h

index 39518ef7af4dfbada74af4a685cd8fe8dbaf9e40..82d38ccf5e8bcf99eefd20934744cbeb7410406b 100644 (file)
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -147,7 +147,7 @@ struct rds_connection {
  
         /* Protocol version */
         unsigned int            c_version;
-       possible_net_t          c_net;
+       struct net              *c_net;
  
         struct list_head        c_map_item;
         unsigned long           c_map_queued;
@@ -162,13 +162,13 @@ struct rds_connection {
  static inline
  struct net *rds_conn_net(struct rds_connection *conn)
  {
-       return read_pnet(&conn->c_net);
+       return conn->c_net;
  }
  
  static inline
  void rds_conn_net_set(struct rds_connection *conn, struct net *net)
  {
-       write_pnet(&conn->c_net, net);
+       conn->c_net = get_net(net);
  }
  
  #define RDS_FLAG_CONG_BITMAP   0x01
diff --git a/net/rds/tcp.c b/net/rds/tcp.c

index a973d3b4dff0b2216bf3698cfbfeeb4b227dfc37..22569007677357ba40347ee46e6584c683de2597 100644 (file)
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -484,9 +484,10 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
          * we do need to clean up the listen socket here.
          */
         if (rtn->rds_tcp_listen_sock) {
-               rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+               struct socket *lsock = rtn->rds_tcp_listen_sock;
+
                 rtn->rds_tcp_listen_sock = NULL;
-               flush_work(&rtn->rds_tcp_accept_w);
+               rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
         }
  }
  
@@ -523,13 +524,13 @@ static void rds_tcp_kill_sock(struct net *net)
         struct rds_tcp_connection *tc, *_tc;
         LIST_HEAD(tmp_list);
         struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+       struct socket *lsock = rtn->rds_tcp_listen_sock;
  
-       rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
         rtn->rds_tcp_listen_sock = NULL;
-       flush_work(&rtn->rds_tcp_accept_w);
+       rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
         spin_lock_irq(&rds_tcp_conn_lock);
         list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-               struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+               struct net *c_net = tc->t_cpath->cp_conn->c_net;
  
                 if (net != c_net || !tc->t_sock)
                         continue;
@@ -546,8 +547,12 @@ static void rds_tcp_kill_sock(struct net *net)
  void *rds_tcp_listen_sock_def_readable(struct net *net)
  {
         struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+       struct socket *lsock = rtn->rds_tcp_listen_sock;
+
+       if (!lsock)
+               return NULL;
  
-       return rtn->rds_tcp_listen_sock->sk->sk_user_data;
+       return lsock->sk->sk_user_data;
  }
  
  static int rds_tcp_dev_event(struct notifier_block *this,
@@ -584,7 +589,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
  
         spin_lock_irq(&rds_tcp_conn_lock);
         list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-               struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+               struct net *c_net = tc->t_cpath->cp_conn->c_net;
  
                 if (net != c_net || !tc->t_sock)
                         continue;
@@ -638,19 +643,19 @@ static int rds_tcp_init(void)
                 goto out;
         }
  
-       ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
-       if (ret) {
-               pr_warn("could not register rds_tcp_dev_notifier\n");
+       ret = rds_tcp_recv_init();
+       if (ret)
                 goto out_slab;
-       }
  
         ret = register_pernet_subsys(&rds_tcp_net_ops);
         if (ret)
-               goto out_notifier;
+               goto out_recv;
  
-       ret = rds_tcp_recv_init();
-       if (ret)
+       ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
+       if (ret) {
+               pr_warn("could not register rds_tcp_dev_notifier\n");
                 goto out_pernet;
+       }
  
         rds_trans_register(&rds_tcp_transport);
  
@@ -660,9 +665,8 @@ static int rds_tcp_init(void)
  
  out_pernet:
         unregister_pernet_subsys(&rds_tcp_net_ops);
-out_notifier:
-       if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
-               pr_warn("could not unregister rds_tcp_dev_notifier\n");
+out_recv:
+       rds_tcp_recv_exit();
  out_slab:
         kmem_cache_destroy(rds_tcp_conn_slab);
  out:
diff --git a/net/rds/tcp.h b/net/rds/tcp.h

index 9a1cc890657679798cf58888c42d5bb2372f0fef..56ea6620fcf97ce40d0926089b5e5b188ea1a1fe 100644 (file)
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk);
  
  /* tcp_listen.c */
  struct socket *rds_tcp_listen_init(struct net *);
-void rds_tcp_listen_stop(struct socket *);
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
  void rds_tcp_listen_data_ready(struct sock *sk);
  int rds_tcp_accept_one(struct socket *sock);
  int rds_tcp_keepalive(struct socket *sock);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c

index 67d0929c7d3d0c97ed209af9a67b4d83343c3de1..507678853e6cb3bb769711d22d75f5099228faab 100644 (file)
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -133,7 +133,7 @@ int rds_tcp_accept_one(struct socket *sock)
  
         new_sock->type = sock->type;
         new_sock->ops = sock->ops;
-       ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+       ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
         if (ret < 0)
                 goto out;
  
@@ -223,6 +223,9 @@ void rds_tcp_listen_data_ready(struct sock *sk)
          * before it has been accepted and the accepter has set up their
          * data_ready.. we only want to queue listen work for our listening
          * socket
+        *
+        * (*ready)() may be null if we are racing with netns delete, and
+        * the listen socket is being torn down.
          */
         if (sk->sk_state == TCP_LISTEN)
                 rds_tcp_accept_work(sk);
@@ -231,7 +234,8 @@ void rds_tcp_listen_data_ready(struct sock *sk)
  
  out:
         read_unlock_bh(&sk->sk_callback_lock);
-       ready(sk);
+       if (ready)
+               ready(sk);
  }
  
  struct socket *rds_tcp_listen_init(struct net *net)
@@ -271,7 +275,7 @@ out:
         return NULL;
  }
  
-void rds_tcp_listen_stop(struct socket *sock)
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor)
  {
         struct sock *sk;
  
@@ -292,5 +296,6 @@ void rds_tcp_listen_stop(struct socket *sock)
  
         /* wait for accepts to stop and close the socket */
         flush_workqueue(rds_wq);
+       flush_work(acceptor);
         sock_release(sock);
  }
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c

index b8a1df2c97853246b2485d9d30caa0e19b61278a..4a9729257023676565a0ff8c140ef56823b6d374 100644 (file)
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -871,7 +871,8 @@ out_release:
         return err;
  }
  
-static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
+                      bool kern)
  {
         struct sk_buff *skb;
         struct sock *newsk;
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c

index 3f9d8d7ec6323a95de3e08d01098abdfcf33ff4f..b099b64366f356c27dea0a4dd215cc1034e61b55 100644 (file)
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -275,6 +275,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
                 rxrpc_conn_retransmit_call(conn, skb);
                 return 0;
  
+       case RXRPC_PACKET_TYPE_BUSY:
+               /* Just ignore BUSY packets for now. */
+               return 0;
+
         case RXRPC_PACKET_TYPE_ABORT:
                 if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
                                   &wtmp, sizeof(wtmp)) < 0)
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c

index 9f4cfa25af7c92c406e81d8003b8aa07c7892a04..18b2ad8be8e2b57dd57ef846287add68b027b08e 100644 (file)
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -420,6 +420,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
                              u16 skew)
  {
         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       enum rxrpc_call_state state;
         unsigned int offset = sizeof(struct rxrpc_wire_header);
         unsigned int ix;
         rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
@@ -434,14 +435,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
         _proto("Rx DATA %%%u { #%u f=%02x }",
                sp->hdr.serial, seq, sp->hdr.flags);
  
-       if (call->state >= RXRPC_CALL_COMPLETE)
+       state = READ_ONCE(call->state);
+       if (state >= RXRPC_CALL_COMPLETE)
                 return;
  
         /* Received data implicitly ACKs all of the request packets we sent
          * when we're acting as a client.
          */
-       if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
-            call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
+       if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
+            state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
             !rxrpc_receiving_reply(call))
                 return;
  
@@ -650,6 +652,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
         struct rxrpc_peer *peer;
         unsigned int mtu;
+       bool wake = false;
         u32 rwind = ntohl(ackinfo->rwind);
  
         _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
@@ -657,9 +660,14 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
                ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
                rwind, ntohl(ackinfo->jumbo_max));
  
-       if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
-               rwind = RXRPC_RXTX_BUFF_SIZE - 1;
-       call->tx_winsize = rwind;
+       if (call->tx_winsize != rwind) {
+               if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+                       rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+               if (rwind > call->tx_winsize)
+                       wake = true;
+               call->tx_winsize = rwind;
+       }
+
         if (call->cong_ssthresh > rwind)
                 call->cong_ssthresh = rwind;
  
@@ -673,6 +681,9 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
                 spin_unlock_bh(&peer->lock);
                 _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
         }
+
+       if (wake)
+               wake_up(&call->waitq);
  }
  
  /*
@@ -799,7 +810,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
                 return rxrpc_proto_abort("AK0", call, 0);
  
         /* Ignore ACKs unless we are or have just been transmitting. */
-       switch (call->state) {
+       switch (READ_ONCE(call->state)) {
         case RXRPC_CALL_CLIENT_SEND_REQUEST:
         case RXRPC_CALL_CLIENT_AWAIT_REPLY:
         case RXRPC_CALL_SERVER_SEND_REPLY:
@@ -940,7 +951,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
  static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
                                           struct rxrpc_call *call)
  {
-       switch (call->state) {
+       switch (READ_ONCE(call->state)) {
         case RXRPC_CALL_SERVER_AWAIT_ACK:
                 rxrpc_call_completed(call);
                 break;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c

index 6491ca46a03fda6dc66e02e887ad08012acca14b..3e2f1a8e9c5b51bf90ce8679c06b6ec8a8958ea9 100644 (file)
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -527,7 +527,7 @@ try_again:
                 msg->msg_namelen = len;
         }
  
-       switch (call->state) {
+       switch (READ_ONCE(call->state)) {
         case RXRPC_CALL_SERVER_ACCEPTING:
                 ret = rxrpc_recvmsg_new_call(rx, call, msg, flags);
                 break;
@@ -640,7 +640,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
  
         mutex_lock(&call->user_mutex);
  
-       switch (call->state) {
+       switch (READ_ONCE(call->state)) {
         case RXRPC_CALL_CLIENT_RECV_REPLY:
         case RXRPC_CALL_SERVER_RECV_REQUEST:
         case RXRPC_CALL_SERVER_ACK_REQUEST:
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c

index bc2d3dcff9de76fcc42a20a3aeaec2305ebd2d6c..97ab214ca4118d7a451a4e56a916bd5809ae81f3 100644 (file)
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -488,6 +488,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
  int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
         __releases(&rx->sk.sk_lock.slock)
  {
+       enum rxrpc_call_state state;
         enum rxrpc_command cmd;
         struct rxrpc_call *call;
         unsigned long user_call_ID = 0;
@@ -526,13 +527,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
                         return PTR_ERR(call);
                 /* ... and we have the call lock. */
         } else {
-               ret = -EBUSY;
-               if (call->state == RXRPC_CALL_UNINITIALISED ||
-                   call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
-                   call->state == RXRPC_CALL_SERVER_PREALLOC ||
-                   call->state == RXRPC_CALL_SERVER_SECURING ||
-                   call->state == RXRPC_CALL_SERVER_ACCEPTING)
+               switch (READ_ONCE(call->state)) {
+               case RXRPC_CALL_UNINITIALISED:
+               case RXRPC_CALL_CLIENT_AWAIT_CONN:
+               case RXRPC_CALL_SERVER_PREALLOC:
+               case RXRPC_CALL_SERVER_SECURING:
+               case RXRPC_CALL_SERVER_ACCEPTING:
+                       ret = -EBUSY;
                         goto error_release_sock;
+               default:
+                       break;
+               }
  
                 ret = mutex_lock_interruptible(&call->user_mutex);
                 release_sock(&rx->sk);
@@ -542,10 +547,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
                 }
         }
  
+       state = READ_ONCE(call->state);
         _debug("CALL %d USR %lx ST %d on CONN %p",
-              call->debug_id, call->user_call_ID, call->state, call->conn);
+              call->debug_id, call->user_call_ID, state, call->conn);
  
-       if (call->state >= RXRPC_CALL_COMPLETE) {
+       if (state >= RXRPC_CALL_COMPLETE) {
                 /* it's too late for this call */
                 ret = -ESHUTDOWN;
         } else if (cmd == RXRPC_CMD_SEND_ABORT) {
@@ -555,12 +561,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
         } else if (cmd != RXRPC_CMD_SEND_DATA) {
                 ret = -EINVAL;
         } else if (rxrpc_is_client_call(call) &&
-                  call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+                  state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
                 /* request phase complete for this client call */
                 ret = -EPROTO;
         } else if (rxrpc_is_service_call(call) &&
-                  call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-                  call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+                  state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+                  state != RXRPC_CALL_SERVER_SEND_REPLY) {
                 /* Reply phase not begun or not complete for service call. */
                 ret = -EPROTO;
         } else {
@@ -605,14 +611,21 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
         _debug("CALL %d USR %lx ST %d on CONN %p",
                call->debug_id, call->user_call_ID, call->state, call->conn);
  
-       if (call->state >= RXRPC_CALL_COMPLETE) {
-               ret = -ESHUTDOWN; /* it's too late for this call */
-       } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
-                  call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-                  call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
-               ret = -EPROTO; /* request phase complete for this client call */
-       } else {
+       switch (READ_ONCE(call->state)) {
+       case RXRPC_CALL_CLIENT_SEND_REQUEST:
+       case RXRPC_CALL_SERVER_ACK_REQUEST:
+       case RXRPC_CALL_SERVER_SEND_REPLY:
                 ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+               break;
+       case RXRPC_CALL_COMPLETE:
+               read_lock_bh(&call->state_lock);
+               ret = -call->error;
+               read_unlock_bh(&call->state_lock);
+               break;
+       default:
+                /* Request phase complete for this client call */
+               ret = -EPROTO;
+               break;
         }
  
         mutex_unlock(&call->user_mutex);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c

index b70aa57319ea3233395dc7ae349b8b7aab5dfd03..e05b924618a03e6a58655873700ee66e0688b7ad 100644 (file)
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -529,20 +529,20 @@ errout:
         return err;
  }
  
-static int nla_memdup_cookie(struct tc_action *a, struct nlattr **tb)
+static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
  {
-       a->act_cookie = kzalloc(sizeof(*a->act_cookie), GFP_KERNEL);
-       if (!a->act_cookie)
-               return -ENOMEM;
+       struct tc_cookie *c = kzalloc(sizeof(*c), GFP_KERNEL);
+       if (!c)
+               return NULL;
  
-       a->act_cookie->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL);
-       if (!a->act_cookie->data) {
-               kfree(a->act_cookie);
-               return -ENOMEM;
+       c->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL);
+       if (!c->data) {
+               kfree(c);
+               return NULL;
         }
-       a->act_cookie->len = nla_len(tb[TCA_ACT_COOKIE]);
+       c->len = nla_len(tb[TCA_ACT_COOKIE]);
  
-       return 0;
+       return c;
  }
  
  struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
@@ -551,6 +551,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
  {
         struct tc_action *a;
         struct tc_action_ops *a_o;
+       struct tc_cookie *cookie = NULL;
         char act_name[IFNAMSIZ];
         struct nlattr *tb[TCA_ACT_MAX + 1];
         struct nlattr *kind;
@@ -566,6 +567,18 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
                         goto err_out;
                 if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
                         goto err_out;
+               if (tb[TCA_ACT_COOKIE]) {
+                       int cklen = nla_len(tb[TCA_ACT_COOKIE]);
+
+                       if (cklen > TC_COOKIE_MAX_SIZE)
+                               goto err_out;
+
+                       cookie = nla_memdup_cookie(tb);
+                       if (!cookie) {
+                               err = -ENOMEM;
+                               goto err_out;
+                       }
+               }
         } else {
                 err = -EINVAL;
                 if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
@@ -604,20 +617,12 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
         if (err < 0)
                 goto err_mod;
  
-       if (tb[TCA_ACT_COOKIE]) {
-               int cklen = nla_len(tb[TCA_ACT_COOKIE]);
-
-               if (cklen > TC_COOKIE_MAX_SIZE) {
-                       err = -EINVAL;
-                       tcf_hash_release(a, bind);
-                       goto err_mod;
-               }
-
-               if (nla_memdup_cookie(a, tb) < 0) {
-                       err = -ENOMEM;
-                       tcf_hash_release(a, bind);
-                       goto err_mod;
+       if (name == NULL && tb[TCA_ACT_COOKIE]) {
+               if (a->act_cookie) {
+                       kfree(a->act_cookie->data);
+                       kfree(a->act_cookie);
                 }
+               a->act_cookie = cookie;
         }
  
         /* module count goes up only when brand new policy is created
@@ -632,6 +637,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
  err_mod:
         module_put(a_o->owner);
  err_out:
+       if (cookie) {
+               kfree(cookie->data);
+               kfree(cookie);
+       }
         return ERR_PTR(err);
  }
  
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c

index ab80629099622c47933efb36662a323f98f66773..f9bb43c25697e70d18fe9bbba90f6e98dfe05759 100644 (file)
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -113,6 +113,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
         if (ret < 0)
                 return ret;
  
+       if (!tb[TCA_CONNMARK_PARMS])
+               return -EINVAL;
+
         parm = nla_data(tb[TCA_CONNMARK_PARMS]);
  
         if (!tcf_hash_check(tn, parm->index, a, bind)) {
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c

index 3b7074e2302487808dc1d16b01143d0b292ebe4e..c736627f8f4a0e0ff86db535ec95459a417e4ada 100644 (file)
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -228,7 +228,6 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
  
         return skb->len;
  nla_put_failure:
-       rcu_read_unlock();
         nlmsg_trim(skb, b);
         return -1;
  }
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c

index 802ac7c2e5e87eed1341ba4c09d3e5d70bc75876..5334e309f17f0ef4416dddcdc9278c14ecb5585d 100644 (file)
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -201,9 +201,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
         pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
  
         if (p->set_tc_index) {
+               int wlen = skb_network_offset(skb);
+
                 switch (tc_skb_protocol(skb)) {
                 case htons(ETH_P_IP):
-                       if (skb_cow_head(skb, sizeof(struct iphdr)))
+                       wlen += sizeof(struct iphdr);
+                       if (!pskb_may_pull(skb, wlen) ||
+                           skb_try_make_writable(skb, wlen))
                                 goto drop;
  
                         skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
@@ -211,7 +215,9 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                         break;
  
                 case htons(ETH_P_IPV6):
-                       if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
+                       wlen += sizeof(struct ipv6hdr);
+                       if (!pskb_may_pull(skb, wlen) ||
+                           skb_try_make_writable(skb, wlen))
                                 goto drop;
  
                         skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c

index b052b27a984e39c244c94132f1162a7033e5cc63..1a2f9e964330a5cd0c0b9a5cac91807221b0ffd9 100644 (file)
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -794,7 +794,7 @@ static void attach_default_qdiscs(struct net_device *dev)
                 }
         }
  #ifdef CONFIG_NET_SCHED
-       if (dev->qdisc)
+       if (dev->qdisc != &noop_qdisc)
                 qdisc_hash_add(dev->qdisc);
  #endif
  }
diff --git a/net/sctp/associola.c b/net/sctp/associola.c

index 2a6835b4562b61cff52425a530524f1c48bc7919..a9708da28eb53ff2987264c6c7d7ca6ec2ff09e9 100644 (file)
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -71,9 +71,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
  {
         struct net *net = sock_net(sk);
         struct sctp_sock *sp;
-       int i;
         sctp_paramhdr_t *p;
-       int err;
+       int i;
  
         /* Retrieve the SCTP per socket area.  */
         sp = sctp_sk((struct sock *)sk);
@@ -247,6 +246,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
         if (!sctp_ulpq_init(&asoc->ulpq, asoc))
                 goto fail_init;
  
+       if (sctp_stream_new(asoc, gfp))
+               goto fail_init;
+
         /* Assume that peer would support both address types unless we are
          * told otherwise.
          */
@@ -264,9 +266,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
  
         /* AUTH related initializations */
         INIT_LIST_HEAD(&asoc->endpoint_shared_keys);
-       err = sctp_auth_asoc_copy_shkeys(ep, asoc, gfp);
-       if (err)
-               goto fail_init;
+       if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp))
+               goto stream_free;
  
         asoc->active_key_id = ep->active_key_id;
         asoc->prsctp_enable = ep->prsctp_enable;
@@ -289,6 +290,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
  
         return asoc;
  
+stream_free:
+       sctp_stream_free(asoc->stream);
  fail_init:
         sock_put(asoc->base.sk);
         sctp_endpoint_put(asoc->ep);
@@ -1409,7 +1412,7 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
  /* Update the association's pmtu and frag_point by going through all the
   * transports. This routine is called when a transport's PMTU has changed.
   */
-void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
+void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
  {
         struct sctp_transport *t;
         __u32 pmtu = 0;
@@ -1421,8 +1424,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
         list_for_each_entry(t, &asoc->peer.transport_addr_list,
                                 transports) {
                 if (t->pmtu_pending && t->dst) {
-                       sctp_transport_update_pmtu(sk, t,
-                                                  SCTP_TRUNC4(dst_mtu(t->dst)));
+                       sctp_transport_update_pmtu(
+                                       t, SCTP_TRUNC4(dst_mtu(t->dst)));
                         t->pmtu_pending = 0;
                 }
                 if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/input.c b/net/sctp/input.c

index 2a28ab20487f03f61ed8d74cb511bce2973ce242..0e06a278d2a911e2360e75e983b623e453284b7b 100644 (file)
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -401,10 +401,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
  
         if (t->param_flags & SPP_PMTUD_ENABLE) {
                 /* Update transports view of the MTU */
-               sctp_transport_update_pmtu(sk, t, pmtu);
+               sctp_transport_update_pmtu(t, pmtu);
  
                 /* Update association pmtu. */
-               sctp_assoc_sync_pmtu(sk, asoc);
+               sctp_assoc_sync_pmtu(asoc);
         }
  
         /* Retransmit with the new pmtu setting.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c

index 063baac5b9fe4048e9d7b41e848a33f0f73c61d4..961ee59f696a0b0a8b6c2bade0031a073dff53ad 100644 (file)
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -640,14 +640,15 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
  
  /* Create and initialize a new sk for the socket to be returned by accept(). */
  static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
-                                            struct sctp_association *asoc)
+                                            struct sctp_association *asoc,
+                                            bool kern)
  {
         struct sock *newsk;
         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
         struct sctp6_sock *newsctp6sk;
         struct ipv6_txoptions *opt;
  
-       newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
+       newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern);
         if (!newsk)
                 goto out;
  
diff --git a/net/sctp/output.c b/net/sctp/output.c

index 71ce6b945dcb54d831425bdb02e315a14dae69ef..1409a875ad8e22172a4b6ec08ce339da3c8b80ab 100644 (file)
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -86,43 +86,53 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
  {
         struct sctp_transport *tp = packet->transport;
         struct sctp_association *asoc = tp->asoc;
+       struct sock *sk;
  
         pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
-
         packet->vtag = vtag;
  
-       if (asoc && tp->dst) {
-               struct sock *sk = asoc->base.sk;
-
-               rcu_read_lock();
-               if (__sk_dst_get(sk) != tp->dst) {
-                       dst_hold(tp->dst);
-                       sk_setup_caps(sk, tp->dst);
-               }
-
-               if (sk_can_gso(sk)) {
-                       struct net_device *dev = tp->dst->dev;
+       /* do the following jobs only once for a flush schedule */
+       if (!sctp_packet_empty(packet))
+               return;
  
-                       packet->max_size = dev->gso_max_size;
-               } else {
-                       packet->max_size = asoc->pathmtu;
-               }
-               rcu_read_unlock();
+       /* set packet max_size with pathmtu */
+       packet->max_size = tp->pathmtu;
+       if (!asoc)
+               return;
  
-       } else {
-               packet->max_size = tp->pathmtu;
+       /* update dst or transport pathmtu if in need */
+       sk = asoc->base.sk;
+       if (!sctp_transport_dst_check(tp)) {
+               sctp_transport_route(tp, NULL, sctp_sk(sk));
+               if (asoc->param_flags & SPP_PMTUD_ENABLE)
+                       sctp_assoc_sync_pmtu(asoc);
+       } else if (!sctp_transport_pmtu_check(tp)) {
+               if (asoc->param_flags & SPP_PMTUD_ENABLE)
+                       sctp_assoc_sync_pmtu(asoc);
         }
  
-       if (ecn_capable && sctp_packet_empty(packet)) {
-               struct sctp_chunk *chunk;
+       /* If there a is a prepend chunk stick it on the list before
+        * any other chunks get appended.
+        */
+       if (ecn_capable) {
+               struct sctp_chunk *chunk = sctp_get_ecne_prepend(asoc);
  
-               /* If there a is a prepend chunk stick it on the list before
-                * any other chunks get appended.
-                */
-               chunk = sctp_get_ecne_prepend(asoc);
                 if (chunk)
                         sctp_packet_append_chunk(packet, chunk);
         }
+
+       if (!tp->dst)
+               return;
+
+       /* set packet max_size with gso_max_size if gso is enabled*/
+       rcu_read_lock();
+       if (__sk_dst_get(sk) != tp->dst) {
+               dst_hold(tp->dst);
+               sk_setup_caps(sk, tp->dst);
+       }
+       packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size
+                                         : asoc->pathmtu;
+       rcu_read_unlock();
  }
  
  /* Initialize the packet structure. */
@@ -546,7 +556,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
         struct sctp_association *asoc = tp->asoc;
         struct sctp_chunk *chunk, *tmp;
         int pkt_count, gso = 0;
-       int confirm;
         struct dst_entry *dst;
         struct sk_buff *head;
         struct sctphdr *sh;
@@ -583,12 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
         sh->vtag = htonl(packet->vtag);
         sh->checksum = 0;
  
-       /* update dst if in need */
-       if (!sctp_transport_dst_check(tp)) {
-               sctp_transport_route(tp, NULL, sctp_sk(sk));
-               if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE)
-                       sctp_assoc_sync_pmtu(sk, asoc);
-       }
+       /* drop packet if no dst */
         dst = dst_clone(tp->dst);
         if (!dst) {
                 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
@@ -625,13 +629,13 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
                         asoc->peer.last_sent_to = tp;
         }
         head->ignore_df = packet->ipfragok;
-       confirm = tp->dst_pending_confirm;
-       if (confirm)
+       if (tp->dst_pending_confirm)
                 skb_set_dst_pending_confirm(head, 1);
         /* neighbour should be confirmed on successful transmission or
          * positive error
          */
-       if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm)
+       if (tp->af_specific->sctp_xmit(head, tp) >= 0 &&
+           tp->dst_pending_confirm)
                 tp->dst_pending_confirm = 0;
  
  out:
@@ -705,7 +709,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
          */
  
         if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) &&
-           !chunk->msg->force_delay)
+           !asoc->force_delay)
                 /* Nothing unacked */
                 return SCTP_XMIT_OK;
  
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c

index db352e5d61f8980dc461a162959643d872997217..8081476ed313cca8ad8f9876ec5ef5dc8fd68207 100644 (file)
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -382,17 +382,18 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
  }
  
  static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
-                                   struct sctp_sndrcvinfo *sinfo,
-                                   struct list_head *queue, int msg_len)
+                                   struct sctp_sndrcvinfo *sinfo, int msg_len)
  {
+       struct sctp_outq *q = &asoc->outqueue;
         struct sctp_chunk *chk, *temp;
  
-       list_for_each_entry_safe(chk, temp, queue, list) {
+       list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
                 if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
                     chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
                         continue;
  
                 list_del_init(&chk->list);
+               q->out_qlen -= chk->skb->len;
                 asoc->sent_cnt_removable--;
                 asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
  
@@ -431,9 +432,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc,
                         return;
         }
  
-       sctp_prsctp_prune_unsent(asoc, sinfo,
-                                &asoc->outqueue.out_chunk_list,
-                                msg_len);
+       sctp_prsctp_prune_unsent(asoc, sinfo, msg_len);
  }
  
  /* Mark all the eligible packets on a transport for retransmission.  */
@@ -1027,8 +1026,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
                         /* RFC 2960 6.5 Every DATA chunk MUST carry a valid
                          * stream identifier.
                          */
-                       if (chunk->sinfo.sinfo_stream >=
-                           asoc->c.sinit_num_ostreams) {
+                       if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) {
  
                                 /* Mark as failed send. */
                                 sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c

index 206377fe91ec4db4a59b24bf45daa0e42be0015b..a0b29d43627f48425e83d7d3c9698d99315dd869 100644 (file)
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -361,8 +361,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
         sctp_seq_dump_remote_addrs(seq, assoc);
         seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
                    "%8d %8d %8d %8d",
-               assoc->hbinterval, assoc->c.sinit_max_instreams,
-               assoc->c.sinit_num_ostreams, assoc->max_retrans,
+               assoc->hbinterval, assoc->stream->incnt,
+               assoc->stream->outcnt, assoc->max_retrans,
                 assoc->init_retries, assoc->shutdown_retries,
                 assoc->rtx_data_chunks,
                 atomic_read(&sk->sk_wmem_alloc),
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c

index 1b6d4574d2b02a2877caba604bb549352a0f0470..989a900383b57c57590bff37e3aee7426fb0b156 100644 (file)
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -575,10 +575,11 @@ static int sctp_v4_is_ce(const struct sk_buff *skb)
  
  /* Create and initialize a new sk for the socket returned by accept(). */
  static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
-                                            struct sctp_association *asoc)
+                                            struct sctp_association *asoc,
+                                            bool kern)
  {
         struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
-                       sk->sk_prot, 0);
+                       sk->sk_prot, kern);
         struct inet_sock *newinet;
  
         if (!newsk)
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c

index 969a30c7bb5431530b293c8ed51f2fdea61dd8bf..118faff6a332ee24caf3d772b6f00641128ef104 100644 (file)
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2460,15 +2460,10 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
          * association.
          */
         if (!asoc->temp) {
-               int error;
-
-               asoc->stream = sctp_stream_new(asoc->c.sinit_max_instreams,
-                                              asoc->c.sinit_num_ostreams, gfp);
-               if (!asoc->stream)
+               if (sctp_stream_init(asoc, gfp))
                         goto clean_up;
  
-               error = sctp_assoc_set_id(asoc, gfp);
-               if (error)
+               if (sctp_assoc_set_id(asoc, gfp))
                         goto clean_up;
         }
  
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c

index e03bb1aab4d095b65259c33f4fba6990e90f586b..24c6ccce7539097728f3733ae5ecc037562cefcf 100644 (file)
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3946,7 +3946,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
  
         /* Silently discard the chunk if stream-id is not valid */
         sctp_walk_fwdtsn(skip, chunk) {
-               if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams)
+               if (ntohs(skip->stream) >= asoc->stream->incnt)
                         goto discard_noforce;
         }
  
@@ -4017,7 +4017,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
  
         /* Silently discard the chunk if stream-id is not valid */
         sctp_walk_fwdtsn(skip, chunk) {
-               if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams)
+               if (ntohs(skip->stream) >= asoc->stream->incnt)
                         goto gen_shutdown;
         }
  
@@ -6353,7 +6353,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
          * and discard the DATA chunk.
          */
         sid = ntohs(data_hdr->stream);
-       if (sid >= asoc->c.sinit_max_instreams) {
+       if (sid >= asoc->stream->incnt) {
                 /* Mark tsn as received even though we drop it */
                 sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
  
diff --git a/net/sctp/socket.c b/net/sctp/socket.c

index 6f0a9be50f5055fd7efa29bb8b183cc37b23b25f..d9d4c92e06b312e6c300afd8f3d5db33161fd9f7 100644 (file)
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1907,7 +1907,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
         }
  
         if (asoc->pmtu_pending)
-               sctp_assoc_pending_pmtu(sk, asoc);
+               sctp_assoc_pending_pmtu(asoc);
  
         /* If fragmentation is disabled and the message length exceeds the
          * association fragmentation point, return EMSGSIZE.  The I-D
@@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
         }
  
         /* Check for invalid stream. */
-       if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) {
+       if (sinfo->sinfo_stream >= asoc->stream->outcnt) {
                 err = -EINVAL;
                 goto out_free;
         }
@@ -1965,7 +1965,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
                 err = PTR_ERR(datamsg);
                 goto out_free;
         }
-       datamsg->force_delay = !!(msg->msg_flags & MSG_MORE);
+       asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
  
         /* Now send the (possibly) fragmented message. */
         list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
@@ -2435,7 +2435,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
         if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
                 if (trans) {
                         trans->pathmtu = params->spp_pathmtu;
-                       sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
+                       sctp_assoc_sync_pmtu(asoc);
                 } else if (asoc) {
                         asoc->pathmtu = params->spp_pathmtu;
                 } else {
@@ -2451,7 +2451,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
                                 (trans->param_flags & ~SPP_PMTUD) | pmtud_change;
                         if (update) {
                                 sctp_transport_pmtu(trans, sctp_opt2sk(sp));
-                               sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
+                               sctp_assoc_sync_pmtu(asoc);
                         }
                 } else if (asoc) {
                         asoc->param_flags =
@@ -4116,7 +4116,7 @@ static int sctp_disconnect(struct sock *sk, int flags)
   * descriptor will be returned from accept() to represent the newly
   * formed association.
   */
-static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
+static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
  {
         struct sctp_sock *sp;
         struct sctp_endpoint *ep;
@@ -4151,7 +4151,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
          */
         asoc = list_entry(ep->asocs.next, struct sctp_association, asocs);
  
-       newsk = sp->pf->create_accept_sk(sk, asoc);
+       newsk = sp->pf->create_accept_sk(sk, asoc, kern);
         if (!newsk) {
                 error = -ENOMEM;
                 goto out;
@@ -4461,8 +4461,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
         info->sctpi_rwnd = asoc->a_rwnd;
         info->sctpi_unackdata = asoc->unack_data;
         info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
-       info->sctpi_instrms = asoc->c.sinit_max_instreams;
-       info->sctpi_outstrms = asoc->c.sinit_num_ostreams;
+       info->sctpi_instrms = asoc->stream->incnt;
+       info->sctpi_outstrms = asoc->stream->outcnt;
         list_for_each(pos, &asoc->base.inqueue.in_chunk_list)
                 info->sctpi_inqueue++;
         list_for_each(pos, &asoc->outqueue.out_chunk_list)
@@ -4691,8 +4691,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
         status.sstat_unackdata = asoc->unack_data;
  
         status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
-       status.sstat_instrms = asoc->c.sinit_max_instreams;
-       status.sstat_outstrms = asoc->c.sinit_num_ostreams;
+       status.sstat_instrms = asoc->stream->incnt;
+       status.sstat_outstrms = asoc->stream->outcnt;
         status.sstat_fragmentation_point = asoc->frag_point;
         status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
         memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr,
@@ -7034,6 +7034,9 @@ int sctp_inet_listen(struct socket *sock, int backlog)
         if (sock->state != SS_UNCONNECTED)
                 goto out;
  
+       if (!sctp_sstate(sk, LISTENING) && !sctp_sstate(sk, CLOSED))
+               goto out;
+
         /* If backlog is zero, disable listening. */
         if (!backlog) {
                 if (sctp_sstate(sk, CLOSED))
diff --git a/net/sctp/stream.c b/net/sctp/stream.c

index 1c6cc04fa3a41f7266597f9cd80420c228094a2b..bbed997e1c5f01d4401adcd4664be4b6d16a93fa 100644 (file)
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -35,33 +35,60 @@
  #include <net/sctp/sctp.h>
  #include <net/sctp/sm.h>
  
-struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp)
+int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp)
  {
         struct sctp_stream *stream;
         int i;
  
         stream = kzalloc(sizeof(*stream), gfp);
         if (!stream)
-               return NULL;
+               return -ENOMEM;
  
-       stream->outcnt = outcnt;
+       stream->outcnt = asoc->c.sinit_num_ostreams;
         stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
         if (!stream->out) {
                 kfree(stream);
-               return NULL;
+               return -ENOMEM;
         }
         for (i = 0; i < stream->outcnt; i++)
                 stream->out[i].state = SCTP_STREAM_OPEN;
  
-       stream->incnt = incnt;
+       asoc->stream = stream;
+
+       return 0;
+}
+
+int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp)
+{
+       struct sctp_stream *stream = asoc->stream;
+       int i;
+
+       /* Initial stream->out size may be very big, so free it and alloc
+        * a new one with new outcnt to save memory.
+        */
+       kfree(stream->out);
+       stream->outcnt = asoc->c.sinit_num_ostreams;
+       stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
+       if (!stream->out)
+               goto nomem;
+
+       for (i = 0; i < stream->outcnt; i++)
+               stream->out[i].state = SCTP_STREAM_OPEN;
+
+       stream->incnt = asoc->c.sinit_max_instreams;
         stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp);
         if (!stream->in) {
                 kfree(stream->out);
-               kfree(stream);
-               return NULL;
+               goto nomem;
         }
  
-       return stream;
+       return 0;
+
+nomem:
+       asoc->stream = NULL;
+       kfree(stream);
+
+       return -ENOMEM;
  }
  
  void sctp_stream_free(struct sctp_stream *stream)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c

index 3379668af3686de2ec14db980b1ef527a6d1045f..721eeebfcd8a50609877db61ede41575e012606a 100644 (file)
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -251,14 +251,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
                 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
  }
  
-void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu)
+void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
  {
-       struct dst_entry *dst;
+       struct dst_entry *dst = sctp_transport_dst_check(t);
  
         if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
                 pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
-                       __func__, pmtu,
-                       SCTP_DEFAULT_MINSEGMENT);
+                       __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
                 /* Use default minimum segment size and disable
                  * pmtu discovery on this transport.
                  */
@@ -267,17 +266,13 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p
                 t->pathmtu = pmtu;
         }
  
-       dst = sctp_transport_dst_check(t);
-       if (!dst)
-               t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
-
         if (dst) {
-               dst->ops->update_pmtu(dst, sk, NULL, pmtu);
-
+               dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
                 dst = sctp_transport_dst_check(t);
-               if (!dst)
-                       t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
         }
+
+       if (!dst)
+               t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
  }
  
  /* Caches the dst entry and source address for a transport's destination
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c

index 85837ab90e8916e612d5dd0a21ef48c5e2c9e544..093803786eacf3388dc470a04c02293f60e4102e 100644 (file)
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -944,7 +944,7 @@ out:
  }
  
  static int smc_accept(struct socket *sock, struct socket *new_sock,
-                     int flags)
+                     int flags, bool kern)
  {
         struct sock *sk = sock->sk, *nsk;
         DECLARE_WAITQUEUE(wait, current);
diff --git a/net/socket.c b/net/socket.c

index 2c1e8677ff2d4fdb2f29eaa6e06a7c323d27d981..985ef06792d6e54c69d296f3e15baf89be972f9c 100644 (file)
--- a/net/socket.c
+++ b/net/socket.c
@@ -652,6 +652,16 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
  }
  EXPORT_SYMBOL(kernel_sendmsg);
  
+static bool skb_is_err_queue(const struct sk_buff *skb)
+{
+       /* pkt_type of skbs enqueued on the error queue are set to
+        * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
+        * in recvmsg, since skbs received on a local socket will never
+        * have a pkt_type of PACKET_OUTGOING.
+        */
+       return skb->pkt_type == PACKET_OUTGOING;
+}
+
  /*
   * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
   */
@@ -695,7 +705,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
                 put_cmsg(msg, SOL_SOCKET,
                          SCM_TIMESTAMPING, sizeof(tss), &tss);
  
-               if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
+               if (skb_is_err_queue(skb) && skb->len &&
+                   SKB_EXT_ERR(skb)->opt_stats)
                         put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
                                  skb->len, skb->data);
         }
@@ -1506,7 +1517,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
         if (err)
                 goto out_fd;
  
-       err = sock->ops->accept(sock, newsock, sock->file->f_flags);
+       err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
         if (err < 0)
                 goto out_fd;
  
@@ -1731,6 +1742,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
         /* We assume all kernel code knows the size of sockaddr_storage */
         msg.msg_namelen = 0;
         msg.msg_iocb = NULL;
+       msg.msg_flags = 0;
         if (sock->file->f_flags & O_NONBLOCK)
                 flags |= MSG_DONTWAIT;
         err = sock_recvmsg(sock, &msg, flags);
@@ -3238,7 +3250,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
         if (err < 0)
                 goto done;
  
-       err = sock->ops->accept(sock, *newsock, flags);
+       err = sock->ops->accept(sock, *newsock, flags, true);
         if (err < 0) {
                 sock_release(*newsock);
                 *newsock = NULL;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c

index 8931e33b65412d7b8bbe8b3872e5f7d7b27d92d5..2b720fa35c4ff7c2ae906e9e76d13d27a2b2f008 100644 (file)
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1635,6 +1635,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
  
         xprt = &svsk->sk_xprt;
         svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
+       set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
  
         serv->sv_bc_xprt = xprt;
  
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c

index c13a5c35ce14d992515fa99e456976ed0cd1c382..fc8f14c7bfec60dc5828340861a747e49f06193e 100644 (file)
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -127,6 +127,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
         xprt = &cma_xprt->sc_xprt;
  
         svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
+       set_bit(XPT_CONG_CTRL, &xprt->xpt_flags);
         serv->sv_bc_xprt = xprt;
  
         dprintk("svcrdma: %s(%p)\n", __func__, xprt);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c

index 81cd31acf690f41573e5fedd9b837376543f5ce9..3b332b395045b5b0ad07bc13a30db1420d7f7082 100644 (file)
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -503,7 +503,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
         struct ib_cq *sendcq, *recvcq;
         int rc;
  
-       max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
+       max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
+                       RPCRDMA_MAX_SEND_SGES);
         if (max_sge < RPCRDMA_MIN_SEND_SGES) {
                 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
                 return -ENOMEM;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c

index 43e4045e72bc00cfbc9db6c1bf987a46e272969b..bdce99f9407affaae8ef524d3bd65bca5847d62b 100644 (file)
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -115,7 +115,8 @@ static void tipc_data_ready(struct sock *sk);
  static void tipc_write_space(struct sock *sk);
  static void tipc_sock_destruct(struct sock *sk);
  static int tipc_release(struct socket *sock);
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+                      bool kern);
  static void tipc_sk_timeout(unsigned long data);
  static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
                            struct tipc_name_seq const *seq);
@@ -865,6 +866,14 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
         if (!tsk_peer_msg(tsk, hdr))
                 goto exit;
  
+       if (unlikely(msg_errcode(hdr))) {
+               tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+               tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
+                                     tsk_peer_port(tsk));
+               sk->sk_state_change(sk);
+               goto exit;
+       }
+
         tsk->probe_unacked = false;
  
         if (mtyp == CONN_PROBE) {
@@ -1082,7 +1091,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
                 }
         } while (sent < dlen && !rc);
  
-       return rc ? rc : sent;
+       return sent ? sent : rc;
  }
  
  /**
@@ -1258,7 +1267,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
         struct sock *sk = sock->sk;
         DEFINE_WAIT(wait);
         long timeo = *timeop;
-       int err;
+       int err = sock_error(sk);
+
+       if (err)
+               return err;
  
         for (;;) {
                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
@@ -1280,6 +1292,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
                 err = sock_intr_errno(timeo);
                 if (signal_pending(current))
                         break;
+
+               err = sock_error(sk);
+               if (err)
+                       break;
         }
         finish_wait(sk_sleep(sk), &wait);
         *timeop = timeo;
@@ -1483,7 +1499,7 @@ restart:
         if (unlikely(flags & MSG_PEEK))
                 goto exit;
  
-       tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
+       tsk->rcv_unacked += tsk_inc(tsk, hlen + msg_data_sz(msg));
         if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
                 tipc_sk_send_ack(tsk);
         tsk_advance_rx_queue(sk);
@@ -1550,6 +1566,8 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
         struct sock *sk = &tsk->sk;
         struct net *net = sock_net(sk);
         struct tipc_msg *hdr = buf_msg(skb);
+       u32 pport = msg_origport(hdr);
+       u32 pnode = msg_orignode(hdr);
  
         if (unlikely(msg_mcast(hdr)))
                 return false;
@@ -1557,18 +1575,28 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
         switch (sk->sk_state) {
         case TIPC_CONNECTING:
                 /* Accept only ACK or NACK message */
-               if (unlikely(!msg_connected(hdr)))
-                       return false;
+               if (unlikely(!msg_connected(hdr))) {
+                       if (pport != tsk_peer_port(tsk) ||
+                           pnode != tsk_peer_node(tsk))
+                               return false;
+
+                       tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+                       sk->sk_err = ECONNREFUSED;
+                       sk->sk_state_change(sk);
+                       return true;
+               }
  
                 if (unlikely(msg_errcode(hdr))) {
                         tipc_set_sk_state(sk, TIPC_DISCONNECTING);
                         sk->sk_err = ECONNREFUSED;
+                       sk->sk_state_change(sk);
                         return true;
                 }
  
                 if (unlikely(!msg_isdata(hdr))) {
                         tipc_set_sk_state(sk, TIPC_DISCONNECTING);
                         sk->sk_err = EINVAL;
+                       sk->sk_state_change(sk);
                         return true;
                 }
  
@@ -1580,8 +1608,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
                         return true;
  
                 /* If empty 'ACK-' message, wake up sleeping connect() */
-               if (waitqueue_active(sk_sleep(sk)))
-                       wake_up_interruptible(sk_sleep(sk));
+               sk->sk_data_ready(sk);
  
                 /* 'ACK-' message is neither accepted nor rejected: */
                 msg_set_dest_droppable(hdr, 1);
@@ -2029,7 +2056,8 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
   *
   * Returns 0 on success, errno otherwise
   */
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+                      bool kern)
  {
         struct sock *new_sk, *sk = sock->sk;
         struct sk_buff *buf;
@@ -2051,7 +2079,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
  
         buf = skb_peek(&sk->sk_receive_queue);
  
-       res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
+       res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
         if (res)
                 goto exit;
         security_sk_clone(sock->sk, new_sock->sk);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c

index 9d94e65d0894183b4af94ed24e84b94c0478b551..271cd66e4b3b66534d8686bec94132bc9737314e 100644 (file)
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -141,6 +141,11 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
  static void tipc_subscrp_timeout(unsigned long data)
  {
         struct tipc_subscription *sub = (struct tipc_subscription *)data;
+       struct tipc_subscriber *subscriber = sub->subscriber;
+
+       spin_lock_bh(&subscriber->lock);
+       tipc_nametbl_unsubscribe(sub);
+       spin_unlock_bh(&subscriber->lock);
  
         /* Notify subscriber of timeout */
         tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
@@ -173,7 +178,6 @@ static void tipc_subscrp_kref_release(struct kref *kref)
         struct tipc_subscriber *subscriber = sub->subscriber;
  
         spin_lock_bh(&subscriber->lock);
-       tipc_nametbl_unsubscribe(sub);
         list_del(&sub->subscrp_list);
         atomic_dec(&tn->subscription_count);
         spin_unlock_bh(&subscriber->lock);
@@ -205,6 +209,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
                 if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
                         continue;
  
+               tipc_nametbl_unsubscribe(sub);
                 tipc_subscrp_get(sub);
                 spin_unlock_bh(&subscriber->lock);
                 tipc_subscrp_delete(sub);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

index ee37b390260a62f026f08e3da827ae45666bc2a6..928691c434087e8ff72b84fc6515539115c0509b 100644 (file)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -636,7 +636,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int);
  static int unix_stream_connect(struct socket *, struct sockaddr *,
                                int addr_len, int flags);
  static int unix_socketpair(struct socket *, struct socket *);
-static int unix_accept(struct socket *, struct socket *, int);
+static int unix_accept(struct socket *, struct socket *, int, bool);
  static int unix_getname(struct socket *, struct sockaddr *, int *, int);
  static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
  static unsigned int unix_dgram_poll(struct file *, struct socket *,
@@ -1402,7 +1402,8 @@ static void unix_sock_inherit_flags(const struct socket *old,
                 set_bit(SOCK_PASSSEC, &new->flags);
  }
  
-static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
+static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
+                      bool kern)
  {
         struct sock *sk = sock->sk;
         struct sock *tsk;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c

index 6a0d48525fcf9a71f54bb43495b200b300f5341e..c36757e728442bb936c17f8a975b420f5f8a5972 100644 (file)
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
         if (s) {
                 struct unix_sock *u = unix_sk(s);
  
+               BUG_ON(!atomic_long_read(&u->inflight));
                 BUG_ON(list_empty(&u->link));
  
                 if (atomic_long_dec_and_test(&u->inflight))
@@ -341,6 +342,14 @@ void unix_gc(void)
         }
         list_del(&cursor);
  
+       /* Now gc_candidates contains only garbage.  Restore original
+        * inflight counters for these as well, and remove the skbuffs
+        * which are creating the cycle(s).
+        */
+       skb_queue_head_init(&hitlist);
+       list_for_each_entry(u, &gc_candidates, link)
+               scan_children(&u->sk, inc_inflight, &hitlist);
+
         /* not_cycle_list contains those sockets which do not make up a
          * cycle.  Restore these to the inflight list.
          */
@@ -350,14 +359,6 @@ void unix_gc(void)
                 list_move_tail(&u->link, &gc_inflight_list);
         }
  
-       /* Now gc_candidates contains only garbage.  Restore original
-        * inflight counters for these as well, and remove the skbuffs
-        * which are creating the cycle(s).
-        */
-       skb_queue_head_init(&hitlist);
-       list_for_each_entry(u, &gc_candidates, link)
-       scan_children(&u->sk, inc_inflight, &hitlist);
-
         spin_unlock(&unix_gc_lock);
  
         /* Here we are. Hitlist is filled. Die. */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c

index 9192ead6675114128817267926befe23f7cc1111..6f7f6757ceefb500551fafbf40c462835c4baf88 100644 (file)
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1102,10 +1102,19 @@ static const struct proto_ops vsock_dgram_ops = {
         .sendpage = sock_no_sendpage,
  };
  
+static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+       if (!transport->cancel_pkt)
+               return -EOPNOTSUPP;
+
+       return transport->cancel_pkt(vsk);
+}
+
  static void vsock_connect_timeout(struct work_struct *work)
  {
         struct sock *sk;
         struct vsock_sock *vsk;
+       int cancel = 0;
  
         vsk = container_of(work, struct vsock_sock, dwork.work);
         sk = sk_vsock(vsk);
@@ -1116,8 +1125,11 @@ static void vsock_connect_timeout(struct work_struct *work)
                 sk->sk_state = SS_UNCONNECTED;
                 sk->sk_err = ETIMEDOUT;
                 sk->sk_error_report(sk);
+               cancel = 1;
         }
         release_sock(sk);
+       if (cancel)
+               vsock_transport_cancel_pkt(vsk);
  
         sock_put(sk);
  }
@@ -1224,11 +1236,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
                         err = sock_intr_errno(timeout);
                         sk->sk_state = SS_UNCONNECTED;
                         sock->state = SS_UNCONNECTED;
+                       vsock_transport_cancel_pkt(vsk);
                         goto out_wait;
                 } else if (timeout == 0) {
                         err = -ETIMEDOUT;
                         sk->sk_state = SS_UNCONNECTED;
                         sock->state = SS_UNCONNECTED;
+                       vsock_transport_cancel_pkt(vsk);
                         goto out_wait;
                 }
  
@@ -1250,7 +1264,8 @@ out:
         return err;
  }
  
-static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
+                       bool kern)
  {
         struct sock *listener;
         int err;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c

index 9d24c0e958b18e614e30b24c0fcfbbe2152941f3..68675a151f22b8b63c02b25a67b833d9a6046d84 100644 (file)
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -213,6 +213,47 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
         return len;
  }
  
+static int
+virtio_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+       struct virtio_vsock *vsock;
+       struct virtio_vsock_pkt *pkt, *n;
+       int cnt = 0;
+       LIST_HEAD(freeme);
+
+       vsock = virtio_vsock_get();
+       if (!vsock) {
+               return -ENODEV;
+       }
+
+       spin_lock_bh(&vsock->send_pkt_list_lock);
+       list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+               if (pkt->vsk != vsk)
+                       continue;
+               list_move(&pkt->list, &freeme);
+       }
+       spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+       list_for_each_entry_safe(pkt, n, &freeme, list) {
+               if (pkt->reply)
+                       cnt++;
+               list_del(&pkt->list);
+               virtio_transport_free_pkt(pkt);
+       }
+
+       if (cnt) {
+               struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
+               int new_cnt;
+
+               new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
+               if (new_cnt + cnt >= virtqueue_get_vring_size(rx_vq) &&
+                   new_cnt < virtqueue_get_vring_size(rx_vq))
+                       queue_work(virtio_vsock_workqueue, &vsock->rx_work);
+       }
+
+       return 0;
+}
+
  static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
  {
         int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
@@ -462,6 +503,7 @@ static struct virtio_transport virtio_transport = {
                 .release                  = virtio_transport_release,
                 .connect                  = virtio_transport_connect,
                 .shutdown                 = virtio_transport_shutdown,
+               .cancel_pkt               = virtio_transport_cancel_pkt,
  
                 .dgram_bind               = virtio_transport_dgram_bind,
                 .dgram_dequeue            = virtio_transport_dgram_dequeue,
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c

index 8d592a45b59786746d186e12d0c362d07c30bdac..af087b44ceea2311e53060e2442b4af2024bb037 100644 (file)
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -58,6 +58,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
         pkt->len                = len;
         pkt->hdr.len            = cpu_to_le32(len);
         pkt->reply              = info->reply;
+       pkt->vsk                = info->vsk;
  
         if (info->msg && len > 0) {
                 pkt->buf = kmalloc(len, GFP_KERNEL);
@@ -180,6 +181,7 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
         struct virtio_vsock_pkt_info info = {
                 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
                 .type = type,
+               .vsk = vsk,
         };
  
         return virtio_transport_send_pkt_info(vsk, &info);
@@ -519,6 +521,7 @@ int virtio_transport_connect(struct vsock_sock *vsk)
         struct virtio_vsock_pkt_info info = {
                 .op = VIRTIO_VSOCK_OP_REQUEST,
                 .type = VIRTIO_VSOCK_TYPE_STREAM,
+               .vsk = vsk,
         };
  
         return virtio_transport_send_pkt_info(vsk, &info);
@@ -534,6 +537,7 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
                           VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
                          (mode & SEND_SHUTDOWN ?
                           VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
+               .vsk = vsk,
         };
  
         return virtio_transport_send_pkt_info(vsk, &info);
@@ -560,6 +564,7 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk,
                 .type = VIRTIO_VSOCK_TYPE_STREAM,
                 .msg = msg,
                 .pkt_len = len,
+               .vsk = vsk,
         };
  
         return virtio_transport_send_pkt_info(vsk, &info);
@@ -581,6 +586,7 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
                 .op = VIRTIO_VSOCK_OP_RST,
                 .type = VIRTIO_VSOCK_TYPE_STREAM,
                 .reply = !!pkt,
+               .vsk = vsk,
         };
  
         /* Send RST only if the original pkt is not a RST pkt */
@@ -826,6 +832,7 @@ virtio_transport_send_response(struct vsock_sock *vsk,
                 .remote_cid = le64_to_cpu(pkt->hdr.src_cid),
                 .remote_port = le32_to_cpu(pkt->hdr.src_port),
                 .reply = true,
+               .vsk = vsk,
         };
  
         return virtio_transport_send_pkt_info(vsk, &info);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c

index d7f8be4e321a32eba3a615aa69a860c212511625..2312dc2ffdb98b37b2909274c57eed68935267d7 100644 (file)
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -545,22 +545,18 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
  {
         int err;
  
-       rtnl_lock();
-
         if (!cb->args[0]) {
                 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
                                   genl_family_attrbuf(&nl80211_fam),
                                   nl80211_fam.maxattr, nl80211_policy);
                 if (err)
-                       goto out_unlock;
+                       return err;
  
                 *wdev = __cfg80211_wdev_from_attrs(
                                         sock_net(skb->sk),
                                         genl_family_attrbuf(&nl80211_fam));
-               if (IS_ERR(*wdev)) {
-                       err = PTR_ERR(*wdev);
-                       goto out_unlock;
-               }
+               if (IS_ERR(*wdev))
+                       return PTR_ERR(*wdev);
                 *rdev = wiphy_to_rdev((*wdev)->wiphy);
                 /* 0 is the first index - add 1 to parse only once */
                 cb->args[0] = (*rdev)->wiphy_idx + 1;
@@ -570,10 +566,8 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
                 struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
                 struct wireless_dev *tmp;
  
-               if (!wiphy) {
-                       err = -ENODEV;
-                       goto out_unlock;
-               }
+               if (!wiphy)
+                       return -ENODEV;
                 *rdev = wiphy_to_rdev(wiphy);
                 *wdev = NULL;
  
@@ -584,21 +578,11 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
                         }
                 }
  
-               if (!*wdev) {
-                       err = -ENODEV;
-                       goto out_unlock;
-               }
+               if (!*wdev)
+                       return -ENODEV;
         }
  
         return 0;
- out_unlock:
-       rtnl_unlock();
-       return err;
-}
-
-static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev)
-{
-       rtnl_unlock();
  }
  
  /* IE validation */
@@ -2608,17 +2592,17 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
         int filter_wiphy = -1;
         struct cfg80211_registered_device *rdev;
         struct wireless_dev *wdev;
+       int ret;
  
         rtnl_lock();
         if (!cb->args[2]) {
                 struct nl80211_dump_wiphy_state state = {
                         .filter_wiphy = -1,
                 };
-               int ret;
  
                 ret = nl80211_dump_wiphy_parse(skb, cb, &state);
                 if (ret)
-                       return ret;
+                       goto out_unlock;
  
                 filter_wiphy = state.filter_wiphy;
  
@@ -2663,12 +2647,14 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
                 wp_idx++;
         }
   out:
-       rtnl_unlock();
-
         cb->args[0] = wp_idx;
         cb->args[1] = if_idx;
  
-       return skb->len;
+       ret = skb->len;
+ out_unlock:
+       rtnl_unlock();
+
+       return ret;
  }
  
  static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
@@ -4452,9 +4438,10 @@ static int nl80211_dump_station(struct sk_buff *skb,
         int sta_idx = cb->args[2];
         int err;
  
+       rtnl_lock();
         err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
         if (err)
-               return err;
+               goto out_err;
  
         if (!wdev->netdev) {
                 err = -EINVAL;
@@ -4489,7 +4476,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
         cb->args[2] = sta_idx;
         err = skb->len;
   out_err:
-       nl80211_finish_wdev_dump(rdev);
+       rtnl_unlock();
  
         return err;
  }
@@ -5275,9 +5262,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
         int path_idx = cb->args[2];
         int err;
  
+       rtnl_lock();
         err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
         if (err)
-               return err;
+               goto out_err;
  
         if (!rdev->ops->dump_mpath) {
                 err = -EOPNOTSUPP;
@@ -5310,7 +5298,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
         cb->args[2] = path_idx;
         err = skb->len;
   out_err:
-       nl80211_finish_wdev_dump(rdev);
+       rtnl_unlock();
         return err;
  }
  
@@ -5470,9 +5458,10 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
         int path_idx = cb->args[2];
         int err;
  
+       rtnl_lock();
         err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
         if (err)
-               return err;
+               goto out_err;
  
         if (!rdev->ops->dump_mpp) {
                 err = -EOPNOTSUPP;
@@ -5505,7 +5494,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
         cb->args[2] = path_idx;
         err = skb->len;
   out_err:
-       nl80211_finish_wdev_dump(rdev);
+       rtnl_unlock();
         return err;
  }
  
@@ -7674,9 +7663,12 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
         int start = cb->args[2], idx = 0;
         int err;
  
+       rtnl_lock();
         err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
-       if (err)
+       if (err) {
+               rtnl_unlock();
                 return err;
+       }
  
         wdev_lock(wdev);
         spin_lock_bh(&rdev->bss_lock);
@@ -7699,7 +7691,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
         wdev_unlock(wdev);
  
         cb->args[2] = idx;
-       nl80211_finish_wdev_dump(rdev);
+       rtnl_unlock();
  
         return skb->len;
  }
@@ -7784,9 +7776,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
         int res;
         bool radio_stats;
  
+       rtnl_lock();
         res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
         if (res)
-               return res;
+               goto out_err;
  
         /* prepare_wdev_dump parsed the attributes */
         radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
@@ -7827,7 +7820,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
         cb->args[2] = survey_idx;
         res = skb->len;
   out_err:
-       nl80211_finish_wdev_dump(rdev);
+       rtnl_unlock();
         return res;
  }
  
@@ -11508,17 +11501,13 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
         void *data = NULL;
         unsigned int data_len = 0;
  
-       rtnl_lock();
-
         if (cb->args[0]) {
                 /* subtract the 1 again here */
                 struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
                 struct wireless_dev *tmp;
  
-               if (!wiphy) {
-                       err = -ENODEV;
-                       goto out_unlock;
-               }
+               if (!wiphy)
+                       return -ENODEV;
                 *rdev = wiphy_to_rdev(wiphy);
                 *wdev = NULL;
  
@@ -11538,23 +11527,19 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
         err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
                           attrbuf, nl80211_fam.maxattr, nl80211_policy);
         if (err)
-               goto out_unlock;
+               return err;
  
         if (!attrbuf[NL80211_ATTR_VENDOR_ID] ||
-           !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
-               err = -EINVAL;
-               goto out_unlock;
-       }
+           !attrbuf[NL80211_ATTR_VENDOR_SUBCMD])
+               return -EINVAL;
  
         *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf);
         if (IS_ERR(*wdev))
                 *wdev = NULL;
  
         *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
-       if (IS_ERR(*rdev)) {
-               err = PTR_ERR(*rdev);
-               goto out_unlock;
-       }
+       if (IS_ERR(*rdev))
+               return PTR_ERR(*rdev);
  
         vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]);
         subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
@@ -11567,19 +11552,15 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
                 if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd)
                         continue;
  
-               if (!vcmd->dumpit) {
-                       err = -EOPNOTSUPP;
-                       goto out_unlock;
-               }
+               if (!vcmd->dumpit)
+                       return -EOPNOTSUPP;
  
                 vcmd_idx = i;
                 break;
         }
  
-       if (vcmd_idx < 0) {
-               err = -EOPNOTSUPP;
-               goto out_unlock;
-       }
+       if (vcmd_idx < 0)
+               return -EOPNOTSUPP;
  
         if (attrbuf[NL80211_ATTR_VENDOR_DATA]) {
                 data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]);
@@ -11596,9 +11577,6 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
  
         /* keep rtnl locked in successful case */
         return 0;
- out_unlock:
-       rtnl_unlock();
-       return err;
  }
  
  static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
@@ -11613,9 +11591,10 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
         int err;
         struct nlattr *vendor_data;
  
+       rtnl_lock();
         err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev);
         if (err)
-               return err;
+               goto out;
  
         vcmd_idx = cb->args[2];
         data = (void *)cb->args[3];
@@ -11624,15 +11603,21 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
  
         if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV |
                            WIPHY_VENDOR_CMD_NEED_NETDEV)) {
-               if (!wdev)
-                       return -EINVAL;
+               if (!wdev) {
+                       err = -EINVAL;
+                       goto out;
+               }
                 if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV &&
-                   !wdev->netdev)
-                       return -EINVAL;
+                   !wdev->netdev) {
+                       err = -EINVAL;
+                       goto out;
+               }
  
                 if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
-                       if (!wdev_running(wdev))
-                               return -ENETDOWN;
+                       if (!wdev_running(wdev)) {
+                               err = -ENETDOWN;
+                               goto out;
+                       }
                 }
         }
  
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c

index 16b6b5988be969299c34a9881f258a300b366e2c..570a2b67ca1036796cc5021a0f0ce546811a4e6f 100644 (file)
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -132,12 +132,10 @@ static int wiphy_resume(struct device *dev)
         /* Age scan results with time spent in suspend */
         cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at);
  
-       if (rdev->ops->resume) {
-               rtnl_lock();
-               if (rdev->wiphy.registered)
-                       ret = rdev_resume(rdev);
-               rtnl_unlock();
-       }
+       rtnl_lock();
+       if (rdev->wiphy.registered && rdev->ops->resume)
+               ret = rdev_resume(rdev);
+       rtnl_unlock();
  
         return ret;
  }
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c

index fd28a49dbe8f0c99bb798acec314c63084fc22c6..8b911c29860e79f21b0ac8e1d3a80ed373fd537e 100644 (file)
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -852,7 +852,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
         return rc;
  }
  
-static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int x25_accept(struct socket *sock, struct socket *newsock, int flags,
+                     bool kern)
  {
         struct sock *sk = sock->sk;
         struct sock *newsk;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c

index 46bdb4fbed0bb34a5d6ae40991b3fda6e5dff82c..e23570b647ae721516997130d7abebeaf3f8bb03 100644 (file)
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -395,7 +395,7 @@ resume:
                 if (xo)
                         xfrm_gro = xo->flags & XFRM_GRO;
  
-               err = x->inner_mode->afinfo->transport_finish(skb, async);
+               err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
                 if (xfrm_gro) {
                         skb_dst_drop(skb);
                         gro_cells_receive(&gro_cells, skb);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c

index 0806dccdf5078451e0dd9c5b5573d040ab21c831..dfc77b9c5e5a8dd2b31440be47fb4480513680e1 100644 (file)
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1006,6 +1006,10 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
                 err = -ESRCH;
  out:
         spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+
+       if (cnt)
+               xfrm_garbage_collect(net);
+
         return err;
  }
  EXPORT_SYMBOL(xfrm_policy_flush);
@@ -1243,7 +1247,7 @@ static inline int policy_to_flow_dir(int dir)
  }
  
  static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
-                                                const struct flowi *fl)
+                                                const struct flowi *fl, u16 family)
  {
         struct xfrm_policy *pol;
  
@@ -1251,8 +1255,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
   again:
         pol = rcu_dereference(sk->sk_policy[dir]);
         if (pol != NULL) {
-               bool match = xfrm_selector_match(&pol->selector, fl,
-                                                sk->sk_family);
+               bool match = xfrm_selector_match(&pol->selector, fl, family);
                 int err = 0;
  
                 if (match) {
@@ -2239,7 +2242,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
         sk = sk_const_to_full_sk(sk);
         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
                 num_pols = 1;
-               pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+               pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
                 err = xfrm_expand_policies(fl, family, pols,
                                            &num_pols, &num_xfrms);
                 if (err < 0)
@@ -2518,7 +2521,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
         pol = NULL;
         sk = sk_to_full_sk(sk);
         if (sk && sk->sk_policy[dir]) {
-               pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+               pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
                 if (IS_ERR(pol)) {
                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
                         return 0;
@@ -3069,6 +3072,11 @@ static int __net_init xfrm_net_init(struct net *net)
  {
         int rv;
  
+       /* Initialize the per-net locks here */
+       spin_lock_init(&net->xfrm.xfrm_state_lock);
+       spin_lock_init(&net->xfrm.xfrm_policy_lock);
+       mutex_init(&net->xfrm.xfrm_cfg_mutex);
+
         rv = xfrm_statistics_init(net);
         if (rv < 0)
                 goto out_statistics;
@@ -3085,11 +3093,6 @@ static int __net_init xfrm_net_init(struct net *net)
         if (rv < 0)
                 goto out;
  
-       /* Initialize the per-net locks here */
-       spin_lock_init(&net->xfrm.xfrm_state_lock);
-       spin_lock_init(&net->xfrm.xfrm_policy_lock);
-       mutex_init(&net->xfrm.xfrm_cfg_mutex);
-
         return 0;
  
  out:
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c

index 9705c279494b248b759155d671cfbc778fa25058..40a8aa39220d67d349c36f80ab1628d0e90c3fa7 100644 (file)
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -412,7 +412,14 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
         up = nla_data(rp);
         ulen = xfrm_replay_state_esn_len(up);
  
-       if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen)
+       /* Check the overall length and the internal bitmap length to avoid
+        * potential overflow. */
+       if (nla_len(rp) < ulen ||
+           xfrm_replay_state_esn_len(replay_esn) != ulen ||
+           replay_esn->bmp_len != up->bmp_len)
+               return -EINVAL;
+
+       if (up->replay_window > up->bmp_len * sizeof(__u32) * 8)
                 return -EINVAL;
  
         return 0;
diff --git a/samples/statx/test-statx.c b/samples/statx/test-statx.c

index 8571d766331dd1a513a3f772f3a7ce050909598b..d4d77b09412c416fbe96e648e179a3789347d5ea 100644 (file)
--- a/samples/statx/test-statx.c
+++ b/samples/statx/test-statx.c
@@ -141,8 +141,8 @@ static void dump_statx(struct statx *stx)
         if (stx->stx_mask & STATX_BTIME)
                 print_time(" Birth: ", &stx->stx_btime);
  
-       if (stx->stx_attributes) {
-               unsigned char bits;
+       if (stx->stx_attributes_mask) {
+               unsigned char bits, mbits;
                 int loop, byte;
  
                 static char attr_representation[64 + 1] =
@@ -160,14 +160,18 @@ static void dump_statx(struct statx *stx)
                 printf("Attributes: %016llx (", stx->stx_attributes);
                 for (byte = 64 - 8; byte >= 0; byte -= 8) {
                         bits = stx->stx_attributes >> byte;
+                       mbits = stx->stx_attributes_mask >> byte;
                         for (loop = 7; loop >= 0; loop--) {
                                 int bit = byte + loop;
  
-                               if (bits & 0x80)
+                               if (!(mbits & 0x80))
+                                       putchar('.');   /* Not supported */
+                               else if (bits & 0x80)
                                         putchar(attr_representation[63 - bit]);
                                 else
-                                       putchar('-');
+                                       putchar('-');   /* Not set */
                                 bits <<= 1;
+                               mbits <<= 1;
                         }
                         if (byte)
                                 putchar(' ');
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include

index d6ca649cb0e96d4d91a4980048912544b8183e06..afe3fd3af1e40616857b3e6c425be632c1fa2667 100644 (file)
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -148,6 +148,10 @@ cc-fullversion = $(shell $(CONFIG_SHELL) \
  # Usage:  EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
  cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4))
  
+# cc-if-fullversion
+# Usage:  EXTRA_CFLAGS += $(call cc-if-fullversion, -lt, 040502, -O1)
+cc-if-fullversion = $(shell [ $(cc-fullversion) $(1) $(2) ] && echo $(3) || echo $(4))
+
  # cc-ldoption
  # Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
  cc-ldoption = $(call try-run,\
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib

index 0a07f9014944ed92a8e2e42983ae43be60b3e471..7234e61e7ce370a775ec6981b391b6d102a01770 100644 (file)
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -155,7 +155,7 @@ else
  # $(call addtree,-I$(obj)) locates .h files in srctree, from generated .c files
  #   and locates generated .h files
  # FIXME: Replace both with specific CFLAGS* statements in the makefiles
-__c_flags      = $(if $(obj),-I$(srctree)/$(src) -I$(obj)) \
+__c_flags      = $(if $(obj),$(call addtree,-I$(src)) -I$(obj)) \
                   $(call flags,_c_flags)
  __a_flags      = $(call flags,_a_flags)
  __cpp_flags     = $(call flags,_cpp_flags)
diff --git a/scripts/gcc-plugins/sancov_plugin.c b/scripts/gcc-plugins/sancov_plugin.c

index 9b0b5cbc5b899be4ddbafe2ce5f3ec5ab0743b6c..0f98634c20a097697cec9849dc9e4b338cd5e5c9 100644 (file)
--- a/scripts/gcc-plugins/sancov_plugin.c
+++ b/scripts/gcc-plugins/sancov_plugin.c
@@ -133,7 +133,7 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gc
  #if BUILDING_GCC_VERSION < 6000
         register_callback(plugin_name, PLUGIN_START_UNIT, &sancov_start_unit, NULL);
         register_callback(plugin_name, PLUGIN_REGISTER_GGC_ROOTS, NULL, (void *)&gt_ggc_r_gt_sancov);
-       register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_plugin_pass_info);
+       register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_pass_info);
  #endif
  
         return 0;
diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c

index 26d208b435a0d347b8f11df13bcb79cf3618770c..cfddddb9c9d722b63c522450a5b277a4fd22d318 100644 (file)
--- a/scripts/kconfig/gconf.c
+++ b/scripts/kconfig/gconf.c
@@ -914,7 +914,7 @@ on_treeview2_button_press_event(GtkWidget * widget,
                         current = menu;
                         display_tree_part();
                         gtk_widget_set_sensitive(back_btn, TRUE);
-               } else if ((col == COL_OPTION)) {
+               } else if (col == COL_OPTION) {
                         toggle_sym_value(menu);
                         gtk_tree_view_expand_row(view, path, TRUE);
                 }
diff --git a/scripts/module-common.lds b/scripts/module-common.lds

index cf7e52e4781b9b193f28dcbe1f6de9ed415eb50d..9b6e246a45d09f530b3527b81946a30b1256697f 100644 (file)
--- a/scripts/module-common.lds
+++ b/scripts/module-common.lds
@@ -22,4 +22,6 @@ SECTIONS {
  
         . = ALIGN(8);
         .init_array             0 : { *(SORT(.init_array.*)) *(.init_array) }
+
+       __jump_table            0 : ALIGN(8) { KEEP(*(__jump_table)) }
  }
diff --git a/scripts/spelling.txt b/scripts/spelling.txt

index 0458b037c8a137daa0f0fc205cabc188b18ae513..0545f5a8cabed76cb2c49cfd8c2d08f567bc4980 100644 (file)
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -372,6 +372,8 @@ disassocation||disassociation
  disapear||disappear
  disapeared||disappeared
  disappared||disappeared
+disble||disable
+disbled||disabled
  disconnet||disconnect
  discontinous||discontinuous
  dispertion||dispersion
@@ -732,6 +734,7 @@ oustanding||outstanding
  overaall||overall
  overhread||overhead
  overlaping||overlapping
+overide||override
  overrided||overridden
  overriden||overridden
  overun||overrun
diff --git a/security/keys/gc.c b/security/keys/gc.c

index addf060399e09547307d9c023f36d8dbf869a931..9cb4fe4478a137e7bb3352ae958830657f6b5b7c 100644 (file)
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -46,7 +46,7 @@ static unsigned long key_gc_flags;
   * immediately unlinked.
   */
  struct key_type key_type_dead = {
-       .name = "dead",
+       .name = ".dead",
  };
  
  /*
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c

index 52c34532c78562643fce84832a5b536baf84988b..4ad3212adebe8becc152f22d2448f8db4716146b 100644 (file)
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -273,7 +273,8 @@ error:
   * Create and join an anonymous session keyring or join a named session
   * keyring, creating it if necessary.  A named session keyring must have Search
   * permission for it to be joined.  Session keyrings without this permit will
- * be skipped over.
+ * be skipped over.  It is not permitted for userspace to create or join
+ * keyrings whose name begin with a dot.
   *
   * If successful, the ID of the joined session keyring will be returned.
   */
@@ -290,12 +291,16 @@ long keyctl_join_session_keyring(const char __user *_name)
                         ret = PTR_ERR(name);
                         goto error;
                 }
+
+               ret = -EPERM;
+               if (name[0] == '.')
+                       goto error_name;
         }
  
         /* join the session */
         ret = join_session_keyring(name);
+error_name:
         kfree(name);
-
  error:
         return ret;
  }
@@ -1253,8 +1258,8 @@ error:
   * Read or set the default keyring in which request_key() will cache keys and
   * return the old setting.
   *
- * If a process keyring is specified then this will be created if it doesn't
- * yet exist.  The old setting will be returned if successful.
+ * If a thread or process keyring is specified then it will be created if it
+ * doesn't yet exist.  The old setting will be returned if successful.
   */
  long keyctl_set_reqkey_keyring(int reqkey_defl)
  {
@@ -1279,11 +1284,8 @@ long keyctl_set_reqkey_keyring(int reqkey_defl)
  
         case KEY_REQKEY_DEFL_PROCESS_KEYRING:
                 ret = install_process_keyring_to_cred(new);
-               if (ret < 0) {
-                       if (ret != -EEXIST)
-                               goto error;
-                       ret = 0;
-               }
+               if (ret < 0)
+                       goto error;
                 goto set;
  
         case KEY_REQKEY_DEFL_DEFAULT:
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c

index b6fdd22205b169b663cdb00aecd5d214c7a376dd..9139b18fc863eb36d62d7777f1553dda63236dfe 100644 (file)
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -128,13 +128,18 @@ error:
  }
  
  /*
- * Install a fresh thread keyring directly to new credentials.  This keyring is
- * allowed to overrun the quota.
+ * Install a thread keyring to the given credentials struct if it didn't have
+ * one already.  This is allowed to overrun the quota.
+ *
+ * Return: 0 if a thread keyring is now present; -errno on failure.
   */
  int install_thread_keyring_to_cred(struct cred *new)
  {
         struct key *keyring;
  
+       if (new->thread_keyring)
+               return 0;
+
         keyring = keyring_alloc("_tid", new->uid, new->gid, new,
                                 KEY_POS_ALL | KEY_USR_VIEW,
                                 KEY_ALLOC_QUOTA_OVERRUN,
@@ -147,7 +152,9 @@ int install_thread_keyring_to_cred(struct cred *new)
  }
  
  /*
- * Install a fresh thread keyring, discarding the old one.
+ * Install a thread keyring to the current task if it didn't have one already.
+ *
+ * Return: 0 if a thread keyring is now present; -errno on failure.
   */
  static int install_thread_keyring(void)
  {
@@ -158,8 +165,6 @@ static int install_thread_keyring(void)
         if (!new)
                 return -ENOMEM;
  
-       BUG_ON(new->thread_keyring);
-
         ret = install_thread_keyring_to_cred(new);
         if (ret < 0) {
                 abort_creds(new);
@@ -170,17 +175,17 @@ static int install_thread_keyring(void)
  }
  
  /*
- * Install a process keyring directly to a credentials struct.
+ * Install a process keyring to the given credentials struct if it didn't have
+ * one already.  This is allowed to overrun the quota.
   *
- * Returns -EEXIST if there was already a process keyring, 0 if one installed,
- * and other value on any other error
+ * Return: 0 if a process keyring is now present; -errno on failure.
   */
  int install_process_keyring_to_cred(struct cred *new)
  {
         struct key *keyring;
  
         if (new->process_keyring)
-               return -EEXIST;
+               return 0;
  
         keyring = keyring_alloc("_pid", new->uid, new->gid, new,
                                 KEY_POS_ALL | KEY_USR_VIEW,
@@ -194,11 +199,9 @@ int install_process_keyring_to_cred(struct cred *new)
  }
  
  /*
- * Make sure a process keyring is installed for the current process.  The
- * existing process keyring is not replaced.
+ * Install a process keyring to the current task if it didn't have one already.
   *
- * Returns 0 if there is a process keyring by the end of this function, some
- * error otherwise.
+ * Return: 0 if a process keyring is now present; -errno on failure.
   */
  static int install_process_keyring(void)
  {
@@ -212,14 +215,18 @@ static int install_process_keyring(void)
         ret = install_process_keyring_to_cred(new);
         if (ret < 0) {
                 abort_creds(new);
-               return ret != -EEXIST ? ret : 0;
+               return ret;
         }
  
         return commit_creds(new);
  }
  
  /*
- * Install a session keyring directly to a credentials struct.
+ * Install the given keyring as the session keyring of the given credentials
+ * struct, replacing the existing one if any.  If the given keyring is NULL,
+ * then install a new anonymous session keyring.
+ *
+ * Return: 0 on success; -errno on failure.
   */
  int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
  {
@@ -254,8 +261,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
  }
  
  /*
- * Install a session keyring, discarding the old one.  If a keyring is not
- * supplied, an empty one is invented.
+ * Install the given keyring as the session keyring of the current task,
+ * replacing the existing one if any.  If the given keyring is NULL, then
+ * install a new anonymous session keyring.
+ *
+ * Return: 0 on success; -errno on failure.
   */
  static int install_session_keyring(struct key *keyring)
  {
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c

index 4c935202ce23be4fc57c9d79c3fe0a160d9b8a89..f3b1d7f50b81156d4c29c5c0958a884e9292bbae 100644 (file)
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -1832,6 +1832,7 @@ static int snd_seq_ioctl_set_client_pool(struct snd_seq_client *client,
              info->output_pool != client->pool->size)) {
                 if (snd_seq_write_pool_allocated(client)) {
                         /* remove all existing cells */
+                       snd_seq_pool_mark_closing(client->pool);
                         snd_seq_queue_client_leave_cells(client->number);
                         snd_seq_pool_done(client->pool);
                 }
diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c

index 448efd4e980edf97138b43b6263a9909d07c076a..01c4cfe30c9feffd4fa24c7223e58d9a604cb026 100644 (file)
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -72,6 +72,9 @@ void snd_seq_fifo_delete(struct snd_seq_fifo **fifo)
                 return;
         *fifo = NULL;
  
+       if (f->pool)
+               snd_seq_pool_mark_closing(f->pool);
+
         snd_seq_fifo_clear(f);
  
         /* wake up clients if any */
@@ -264,6 +267,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize)
         /* NOTE: overflow flag is not cleared */
         spin_unlock_irqrestore(&f->lock, flags);
  
+       /* close the old pool and wait until all users are gone */
+       snd_seq_pool_mark_closing(oldpool);
+       snd_use_lock_sync(&f->use_lock);
+
         /* release cells in old pool */
         for (cell = oldhead; cell; cell = next) {
                 next = cell->next;
diff --git a/sound/core/seq/seq_lock.c b/sound/core/seq/seq_lock.c

index 3b693e924db745c0ec8be74171bb189f17dfc53d..12ba83367b1bc882f6d6fbab9329185d58125090 100644 (file)
--- a/sound/core/seq/seq_lock.c
+++ b/sound/core/seq/seq_lock.c
@@ -28,19 +28,16 @@
  /* wait until all locks are released */
  void snd_use_lock_sync_helper(snd_use_lock_t *lockp, const char *file, int line)
  {
-       int max_count = 5 * HZ;
+       int warn_count = 5 * HZ;
  
         if (atomic_read(lockp) < 0) {
                 pr_warn("ALSA: seq_lock: lock trouble [counter = %d] in %s:%d\n", atomic_read(lockp), file, line);
                 return;
         }
         while (atomic_read(lockp) > 0) {
-               if (max_count == 0) {
-                       pr_warn("ALSA: seq_lock: timeout [%d left] in %s:%d\n", atomic_read(lockp), file, line);
-                       break;
-               }
+               if (warn_count-- == 0)
+                       pr_warn("ALSA: seq_lock: waiting [%d left] in %s:%d\n", atomic_read(lockp), file, line);
                 schedule_timeout_uninterruptible(1);
-               max_count--;
         }
  }
  
diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c

index 1a1acf3ddda4c9aeb022548b9438498e0f036762..d4c61ec9be13d7389addd27bc70acf58bda2eecc 100644 (file)
--- a/sound/core/seq/seq_memory.c
+++ b/sound/core/seq/seq_memory.c
@@ -415,6 +415,18 @@ int snd_seq_pool_init(struct snd_seq_pool *pool)
         return 0;
  }
  
+/* refuse the further insertion to the pool */
+void snd_seq_pool_mark_closing(struct snd_seq_pool *pool)
+{
+       unsigned long flags;
+
+       if (snd_BUG_ON(!pool))
+               return;
+       spin_lock_irqsave(&pool->lock, flags);
+       pool->closing = 1;
+       spin_unlock_irqrestore(&pool->lock, flags);
+}
+
  /* remove events */
  int snd_seq_pool_done(struct snd_seq_pool *pool)
  {
@@ -425,10 +437,6 @@ int snd_seq_pool_done(struct snd_seq_pool *pool)
                 return -EINVAL;
  
         /* wait for closing all threads */
-       spin_lock_irqsave(&pool->lock, flags);
-       pool->closing = 1;
-       spin_unlock_irqrestore(&pool->lock, flags);
-
         if (waitqueue_active(&pool->output_sleep))
                 wake_up(&pool->output_sleep);
  
@@ -485,6 +493,7 @@ int snd_seq_pool_delete(struct snd_seq_pool **ppool)
         *ppool = NULL;
         if (pool == NULL)
                 return 0;
+       snd_seq_pool_mark_closing(pool);
         snd_seq_pool_done(pool);
         kfree(pool);
         return 0;
diff --git a/sound/core/seq/seq_memory.h b/sound/core/seq/seq_memory.h

index 4a2ec779b8a701b1aba2402e9de583878f7a39ca..32f959c17786d9ac8c071ba0e6fd070dc06da78b 100644 (file)
--- a/sound/core/seq/seq_memory.h
+++ b/sound/core/seq/seq_memory.h
@@ -84,6 +84,7 @@ static inline int snd_seq_total_cells(struct snd_seq_pool *pool)
  int snd_seq_pool_init(struct snd_seq_pool *pool);
  
  /* done pool - free events */
+void snd_seq_pool_mark_closing(struct snd_seq_pool *pool);
  int snd_seq_pool_done(struct snd_seq_pool *pool);
  
  /* create pool */
diff --git a/sound/firewire/lib.h b/sound/firewire/lib.h

index f6769312ebfccbe473ac291e81b386b48b45488f..c3768cd494a5f3fe3bb9059bd8e588c73b806c86 100644 (file)
--- a/sound/firewire/lib.h
+++ b/sound/firewire/lib.h
@@ -45,7 +45,7 @@ struct snd_fw_async_midi_port {
  
         struct snd_rawmidi_substream *substream;
         snd_fw_async_midi_port_fill fill;
-       unsigned int consume_bytes;
+       int consume_bytes;
  };
  
  int snd_fw_async_midi_port_init(struct snd_fw_async_midi_port *port,
diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c

index 74d7fb6efce6ca8258ece95a83c3460588a3e99e..413ab6313bb66515c284734ca92c87bef014c5fd 100644 (file)
--- a/sound/firewire/oxfw/oxfw.c
+++ b/sound/firewire/oxfw/oxfw.c
@@ -227,11 +227,11 @@ static void do_registration(struct work_struct *work)
         if (err < 0)
                 goto error;
  
-       err = detect_quirks(oxfw);
+       err = snd_oxfw_stream_discover(oxfw);
         if (err < 0)
                 goto error;
  
-       err = snd_oxfw_stream_discover(oxfw);
+       err = detect_quirks(oxfw);
         if (err < 0)
                 goto error;
  
diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c

index ab4cdab5cfa57abf3db2a8da806d0bf7031fed67..79edd88d5cd08398afb86c63b3d32b025162164e 100644 (file)
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -1905,7 +1905,7 @@ static int hw_card_start(struct hw *hw)
                 return err;
  
         /* Set DMA transfer mask */
-       if (dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) {
+       if (!dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) {
                 dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits));
         } else {
                 dma_set_mask(&pci->dev, DMA_BIT_MASK(32));
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c

index c15c51bea26d0afdcc6d8c806993754eaaa2e031..69266b8ea2ad7b498097c4bc231fbad6e55ff37e 100644 (file)
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -261,6 +261,7 @@ enum {
         CXT_FIXUP_HP_530,
         CXT_FIXUP_CAP_MIX_AMP_5047,
         CXT_FIXUP_MUTE_LED_EAPD,
+       CXT_FIXUP_HP_DOCK,
         CXT_FIXUP_HP_SPECTRE,
         CXT_FIXUP_HP_GATE_MIC,
  };
@@ -778,6 +779,14 @@ static const struct hda_fixup cxt_fixups[] = {
                 .type = HDA_FIXUP_FUNC,
                 .v.func = cxt_fixup_mute_led_eapd,
         },
+       [CXT_FIXUP_HP_DOCK] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x16, 0x21011020 }, /* line-out */
+                       { 0x18, 0x2181103f }, /* line-in */
+                       { }
+               }
+       },
         [CXT_FIXUP_HP_SPECTRE] = {
                 .type = HDA_FIXUP_PINS,
                 .v.pins = (const struct hda_pintbl[]) {
@@ -839,6 +848,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
         SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC),
         SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
         SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
+       SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
         SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
         SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
         SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
@@ -871,6 +881,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = {
         { .id = CXT_PINCFG_LEMOTE_A1205, .name = "lemote-a1205" },
         { .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" },
         { .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" },
+       { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" },
         {}
  };
  
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c

index 4e112221d825462ef7e1ae38c092e40e58667f38..299835d1fbaadb5f312ee86502deacd4f7643603 100644 (file)
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4847,6 +4847,7 @@ enum {
         ALC286_FIXUP_HP_GPIO_LED,
         ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY,
         ALC280_FIXUP_HP_DOCK_PINS,
+       ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED,
         ALC280_FIXUP_HP_9480M,
         ALC288_FIXUP_DELL_HEADSET_MODE,
         ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
@@ -4857,6 +4858,7 @@ enum {
         ALC292_FIXUP_DISABLE_AAMIX,
         ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK,
         ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+       ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
         ALC275_FIXUP_DELL_XPS,
         ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
         ALC293_FIXUP_LENOVO_SPK_NOISE,
@@ -5388,6 +5390,16 @@ static const struct hda_fixup alc269_fixups[] = {
                 .chained = true,
                 .chain_id = ALC280_FIXUP_HP_GPIO4
         },
+       [ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1b, 0x21011020 }, /* line-out */
+                       { 0x18, 0x2181103f }, /* line-in */
+                       { },
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HP_GPIO_MIC1_LED
+       },
         [ALC280_FIXUP_HP_9480M] = {
                 .type = HDA_FIXUP_FUNC,
                 .v.func = alc280_fixup_hp_9480m,
@@ -5459,6 +5471,15 @@ static const struct hda_fixup alc269_fixups[] = {
                 .chained = true,
                 .chain_id = ALC269_FIXUP_HEADSET_MODE
         },
+       [ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE
+       },
         [ALC275_FIXUP_DELL_XPS] = {
                 .type = HDA_FIXUP_VERBS,
                 .v.verbs = (const struct hda_verb[]) {
@@ -5531,7 +5552,7 @@ static const struct hda_fixup alc269_fixups[] = {
                 .type = HDA_FIXUP_FUNC,
                 .v.func = alc298_fixup_speaker_volume,
                 .chained = true,
-               .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+               .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
         },
         [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
                 .type = HDA_FIXUP_PINS,
@@ -5647,7 +5668,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x103c, 0x2256, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
         SND_PCI_QUIRK(0x103c, 0x2257, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
         SND_PCI_QUIRK(0x103c, 0x2259, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
-       SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
+       SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED),
         SND_PCI_QUIRK(0x103c, 0x2260, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
         SND_PCI_QUIRK(0x103c, 0x2263, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
         SND_PCI_QUIRK(0x103c, 0x2264, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
@@ -5816,6 +5837,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
         {.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"},
         {.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"},
         {.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"},
+       {.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"},
         {.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
         {.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"},
         {.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"},
@@ -6090,6 +6112,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                 ALC295_STANDARD_PINS,
                 {0x17, 0x21014040},
                 {0x18, 0x21a19050}),
+       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC295_STANDARD_PINS),
         SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
                 ALC298_STANDARD_PINS,
                 {0x17, 0x90170110}),
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c

index ec1067a679da406019bd4c98e6b6cf22fd5a4432..08b1399d1da2b818b997b752555532ebdf45312e 100644 (file)
--- a/sound/soc/amd/acp-pcm-dma.c
+++ b/sound/soc/amd/acp-pcm-dma.c
@@ -89,7 +89,7 @@ static void acp_reg_write(u32 val, void __iomem *acp_mmio, u32 reg)
         writel(val, acp_mmio + (reg * 4));
  }
  
-/* Configure a given dma channel parameters - enable/disble,
+/* Configure a given dma channel parameters - enable/disable,
   * number of descriptors, priority
   */
  static void config_acp_dma_channel(void __iomem *acp_mmio, u8 ch_num,
diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c

index 89ac5f5a93eb31f510d8ab189badc781fd319e6f..7ae46c2647d453bcad1176b6877fcbbae110416b 100644 (file)
--- a/sound/soc/atmel/atmel-classd.c
+++ b/sound/soc/atmel/atmel-classd.c
@@ -349,7 +349,7 @@ static int atmel_classd_codec_dai_digital_mute(struct snd_soc_dai *codec_dai,
  }
  
  #define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8)
-#define CLASSD_ACLK_RATE_12M288_MPY_8  (12228 * 1000 * 8)
+#define CLASSD_ACLK_RATE_12M288_MPY_8  (12288 * 1000 * 8)
  
  static struct {
         int rate;
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c

index 78fca8acd3ec0a2209876bf5b20a1363a03c6336..fd272a40485b077039c68f786671218a7f4204e2 100644 (file)
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -1534,21 +1534,20 @@ static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe)
                         pin->mst_capable = false;
                         /* if not MST, default is port[0] */
                         hport = &pin->ports[0];
-                       goto out;
                 } else {
                         for (i = 0; i < pin->num_ports; i++) {
                                 pin->mst_capable = true;
                                 if (pin->ports[i].id == pipe) {
                                         hport = &pin->ports[i];
-                                       goto out;
+                                       break;
                                 }
                         }
                 }
+
+               if (hport)
+                       hdac_hdmi_present_sense(pin, hport);
         }
  
-out:
-       if (pin && hport)
-               hdac_hdmi_present_sense(pin, hport);
  }
  
  static struct i915_audio_component_audio_ops aops = {
@@ -1998,7 +1997,7 @@ static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev)
         struct hdac_hdmi_pin *pin, *pin_next;
         struct hdac_hdmi_cvt *cvt, *cvt_next;
         struct hdac_hdmi_pcm *pcm, *pcm_next;
-       struct hdac_hdmi_port *port;
+       struct hdac_hdmi_port *port, *port_next;
         int i;
  
         snd_soc_unregister_codec(&edev->hdac.dev);
@@ -2008,8 +2007,9 @@ static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev)
                 if (list_empty(&pcm->port_list))
                         continue;
  
-               list_for_each_entry(port, &pcm->port_list, head)
-                       port = NULL;
+               list_for_each_entry_safe(port, port_next,
+                                       &pcm->port_list, head)
+                       list_del(&port->head);
  
                 list_del(&pcm->head);
                 kfree(pcm);
diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c

index 324461e985b3918211ad29052a5142a6560e26af..476135ec57268cf6863e9792d5fb027c6383e190 100644 (file)
--- a/sound/soc/codecs/rt5665.c
+++ b/sound/soc/codecs/rt5665.c
@@ -1241,7 +1241,7 @@ static irqreturn_t rt5665_irq(int irq, void *data)
  static void rt5665_jd_check_handler(struct work_struct *work)
  {
         struct rt5665_priv *rt5665 = container_of(work, struct rt5665_priv,
-               calibrate_work.work);
+               jd_check_work.work);
  
         if (snd_soc_read(rt5665->codec, RT5665_AJD1_CTRL) & 0x0010) {
                 /* jack out */
@@ -2252,7 +2252,7 @@ static const char * const rt5665_if2_1_adc_in_src[] = {
  
  static const SOC_ENUM_SINGLE_DECL(
         rt5665_if2_1_adc_in_enum, RT5665_DIG_INF2_DATA,
-       RT5665_IF3_ADC_IN_SFT, rt5665_if2_1_adc_in_src);
+       RT5665_IF2_1_ADC_IN_SFT, rt5665_if2_1_adc_in_src);
  
  static const struct snd_kcontrol_new rt5665_if2_1_adc_in_mux =
         SOC_DAPM_ENUM("IF2_1 ADC IN Source", rt5665_if2_1_adc_in_enum);
@@ -3178,6 +3178,9 @@ static const struct snd_soc_dapm_route rt5665_dapm_routes[] = {
         {"DAC Mono Right Filter", NULL, "DAC Mono R ASRC", is_using_asrc},
         {"DAC Stereo1 Filter", NULL, "DAC STO1 ASRC", is_using_asrc},
         {"DAC Stereo2 Filter", NULL, "DAC STO2 ASRC", is_using_asrc},
+       {"I2S1 ASRC", NULL, "CLKDET"},
+       {"I2S2 ASRC", NULL, "CLKDET"},
+       {"I2S3 ASRC", NULL, "CLKDET"},
  
         /*Vref*/
         {"Mic Det Power", NULL, "Vref2"},
@@ -3912,6 +3915,7 @@ static const struct snd_soc_dapm_route rt5665_dapm_routes[] = {
         {"Mono MIX", "MONOVOL Switch", "MONOVOL"},
         {"Mono Amp", NULL, "Mono MIX"},
         {"Mono Amp", NULL, "Vref2"},
+       {"Mono Amp", NULL, "Vref3"},
         {"Mono Amp", NULL, "CLKDET SYS"},
         {"Mono Amp", NULL, "CLKDET MONO"},
         {"Mono Playback", "Switch", "Mono Amp"},
@@ -4798,7 +4802,7 @@ static int rt5665_i2c_probe(struct i2c_client *i2c,
         /* Enhance performance*/
         regmap_update_bits(rt5665->regmap, RT5665_PWR_ANLG_1,
                 RT5665_HP_DRIVER_MASK | RT5665_LDO1_DVO_MASK,
-               RT5665_HP_DRIVER_5X | RT5665_LDO1_DVO_09);
+               RT5665_HP_DRIVER_5X | RT5665_LDO1_DVO_12);
  
         INIT_DELAYED_WORK(&rt5665->jack_detect_work,
                                 rt5665_jack_detect_handler);
diff --git a/sound/soc/codecs/rt5665.h b/sound/soc/codecs/rt5665.h

index 12f7080a0d3c3f1eeab2e3083020cffe98599891..a30f5e6d062882724230e2bf2f2d1157ee823a9b 100644 (file)
--- a/sound/soc/codecs/rt5665.h
+++ b/sound/soc/codecs/rt5665.h
@@ -1106,7 +1106,7 @@
  #define RT5665_HP_DRIVER_MASK                  (0x3 << 2)
  #define RT5665_HP_DRIVER_1X                    (0x0 << 2)
  #define RT5665_HP_DRIVER_3X                    (0x1 << 2)
-#define RT5665_HP_DRIVER_5X                    (0x2 << 2)
+#define RT5665_HP_DRIVER_5X                    (0x3 << 2)
  #define RT5665_LDO1_DVO_MASK                   (0x3)
  #define RT5665_LDO1_DVO_09                     (0x0)
  #define RT5665_LDO1_DVO_10                     (0x1)
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c

index d151224ffcca411a5685b2076c43748fd2134790..bbdb72f73df19ddf954daab19d3deb222208dc7c 100644 (file)
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -899,7 +899,10 @@ static int wm_coeff_put(struct snd_kcontrol *kctl,
  
         mutex_lock(&ctl->dsp->pwr_lock);
  
-       memcpy(ctl->cache, p, ctl->len);
+       if (ctl->flags & WMFW_CTL_FLAG_VOLATILE)
+               ret = -EPERM;
+       else
+               memcpy(ctl->cache, p, ctl->len);
  
         ctl->set = 1;
         if (ctl->enabled && ctl->dsp->running)
@@ -926,6 +929,8 @@ static int wm_coeff_tlv_put(struct snd_kcontrol *kctl,
                 ctl->set = 1;
                 if (ctl->enabled && ctl->dsp->running)
                         ret = wm_coeff_write_control(ctl, ctl->cache, size);
+               else if (ctl->flags & WMFW_CTL_FLAG_VOLATILE)
+                       ret = -EPERM;
         }
  
         mutex_unlock(&ctl->dsp->pwr_lock);
@@ -947,7 +952,7 @@ static int wm_coeff_put_acked(struct snd_kcontrol *kctl,
  
         mutex_lock(&ctl->dsp->pwr_lock);
  
-       if (ctl->enabled)
+       if (ctl->enabled && ctl->dsp->running)
                 ret = wm_coeff_write_acked_control(ctl, val);
         else
                 ret = -EPERM;
diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c

index 4924575d2e95d3d49c0bdad57bc8691c472da5b2..343b291fc3725f46b4d5270b65d5100ca083ef61 100644 (file)
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -115,6 +115,7 @@ int asoc_simple_card_parse_clk(struct device *dev,
         clk = devm_get_clk_from_child(dev, node, NULL);
         if (!IS_ERR(clk)) {
                 simple_dai->sysclk = clk_get_rate(clk);
+               simple_dai->clk = clk;
         } else if (!of_property_read_u32(node, "system-clock-frequency", &val)) {
                 simple_dai->sysclk = val;
         } else {
diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c

index 5c7219fb3aa86738a49cff98bf6f16693f7a0192..9e2a3404a836bf919f68b6cbd33c8f569b3c8843 100644 (file)
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -621,7 +621,7 @@ static struct snd_soc_dai_link byt_rt5640_dais[] = {
                 .codec_dai_name = "snd-soc-dummy-dai",
                 .codec_name = "snd-soc-dummy",
                 .platform_name = "sst-mfld-platform",
-               .ignore_suspend = 1,
+               .nonatomic = true,
                 .dynamic = 1,
                 .dpcm_playback = 1,
                 .dpcm_capture = 1,
@@ -634,7 +634,6 @@ static struct snd_soc_dai_link byt_rt5640_dais[] = {
                 .codec_dai_name = "snd-soc-dummy-dai",
                 .codec_name = "snd-soc-dummy",
                 .platform_name = "sst-mfld-platform",
-               .ignore_suspend = 1,
                 .nonatomic = true,
                 .dynamic = 1,
                 .dpcm_playback = 1,
@@ -661,6 +660,7 @@ static struct snd_soc_dai_link byt_rt5640_dais[] = {
                                                 | SND_SOC_DAIFMT_CBS_CFS,
                 .be_hw_params_fixup = byt_rt5640_codec_fixup,
                 .ignore_suspend = 1,
+               .nonatomic = true,
                 .dpcm_playback = 1,
                 .dpcm_capture = 1,
                 .init = byt_rt5640_init,
diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c

index 3186f015939fb5fce3e8393fd40463362464dbf8..8164bec63bf15b874da49a4406ae9f6124cd776c 100644 (file)
--- a/sound/soc/intel/boards/bytcr_rt5651.c
+++ b/sound/soc/intel/boards/bytcr_rt5651.c
@@ -235,7 +235,6 @@ static struct snd_soc_dai_link byt_rt5651_dais[] = {
                 .codec_dai_name = "snd-soc-dummy-dai",
                 .codec_name = "snd-soc-dummy",
                 .platform_name = "sst-mfld-platform",
-               .ignore_suspend = 1,
                 .nonatomic = true,
                 .dynamic = 1,
                 .dpcm_playback = 1,
@@ -249,7 +248,6 @@ static struct snd_soc_dai_link byt_rt5651_dais[] = {
                 .codec_dai_name = "snd-soc-dummy-dai",
                 .codec_name = "snd-soc-dummy",
                 .platform_name = "sst-mfld-platform",
-               .ignore_suspend = 1,
                 .nonatomic = true,
                 .dynamic = 1,
                 .dpcm_playback = 1,
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c

index ed58b5b3555a869ff91772689b761369873d6c89..2dbfb1b24ef4a629ecdac8004630fc66aeb529fc 100644 (file)
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -512,7 +512,7 @@ static int skl_tplg_set_module_init_data(struct snd_soc_dapm_widget *w)
                         if (bc->set_params != SKL_PARAM_INIT)
                                 continue;
  
-                       mconfig->formats_config.caps = (u32 *)&bc->params;
+                       mconfig->formats_config.caps = (u32 *)bc->params;
                         mconfig->formats_config.caps_size = bc->size;
  
                         break;
diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig

index 05cf809cf9e1467dca19e4f02433f6e6b34532a3..d7013bde6f45fc7ed82db8ea0e92ae81226705f1 100644 (file)
--- a/sound/soc/mediatek/Kconfig
+++ b/sound/soc/mediatek/Kconfig
@@ -13,7 +13,7 @@ config SND_SOC_MT2701
  
  config SND_SOC_MT2701_CS42448
         tristate "ASoc Audio driver for MT2701 with CS42448 codec"
-       depends on SND_SOC_MT2701
+       depends on SND_SOC_MT2701 && I2C
         select SND_SOC_CS42XX8_I2C
         select SND_SOC_BT_SCO
         help
diff --git a/sound/soc/sh/rcar/cmd.c b/sound/soc/sh/rcar/cmd.c

index abb5eaac854a9b9c47027e278cd5204c18aceace..7d92a24b7cfa558afbb8331401c974c59d5f1ae5 100644 (file)
--- a/sound/soc/sh/rcar/cmd.c
+++ b/sound/soc/sh/rcar/cmd.c
@@ -31,23 +31,24 @@ static int rsnd_cmd_init(struct rsnd_mod *mod,
         struct rsnd_mod *mix = rsnd_io_to_mod_mix(io);
         struct device *dev = rsnd_priv_to_dev(priv);
         u32 data;
+       u32 path[] = {
+               [1] = 1 << 0,
+               [5] = 1 << 8,
+               [6] = 1 << 12,
+               [9] = 1 << 15,
+       };
  
         if (!mix && !dvc)
                 return 0;
  
+       if (ARRAY_SIZE(path) < rsnd_mod_id(mod) + 1)
+               return -ENXIO;
+
         if (mix) {
                 struct rsnd_dai *rdai;
                 struct rsnd_mod *src;
                 struct rsnd_dai_stream *tio;
                 int i;
-               u32 path[] = {
-                       [0] = 0,
-                       [1] = 1 << 0,
-                       [2] = 0,
-                       [3] = 0,
-                       [4] = 0,
-                       [5] = 1 << 8
-               };
  
                 /*
                  * it is assuming that integrater is well understanding about
@@ -70,16 +71,19 @@ static int rsnd_cmd_init(struct rsnd_mod *mod,
         } else {
                 struct rsnd_mod *src = rsnd_io_to_mod_src(io);
  
-               u32 path[] = {
-                       [0] = 0x30000,
-                       [1] = 0x30001,
-                       [2] = 0x40000,
-                       [3] = 0x10000,
-                       [4] = 0x20000,
-                       [5] = 0x40100
+               u8 cmd_case[] = {
+                       [0] = 0x3,
+                       [1] = 0x3,
+                       [2] = 0x4,
+                       [3] = 0x1,
+                       [4] = 0x2,
+                       [5] = 0x4,
+                       [6] = 0x1,
+                       [9] = 0x2,
                 };
  
-               data = path[rsnd_mod_id(src)];
+               data = path[rsnd_mod_id(src)] |
+                       cmd_case[rsnd_mod_id(src)] << 16;
         }
  
         dev_dbg(dev, "ctu/mix path = 0x%08x", data);
diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c

index 1f405c83386759a1bfc7dbfd3ff51dba2ee33614..241cb3b08a0755dc93c06034cca814e9c509c090 100644 (file)
--- a/sound/soc/sh/rcar/dma.c
+++ b/sound/soc/sh/rcar/dma.c
@@ -454,6 +454,20 @@ static u32 rsnd_dmapp_read(struct rsnd_dma *dma, u32 reg)
         return ioread32(rsnd_dmapp_addr(dmac, dma, reg));
  }
  
+static void rsnd_dmapp_bset(struct rsnd_dma *dma, u32 data, u32 mask, u32 reg)
+{
+       struct rsnd_mod *mod = rsnd_mod_get(dma);
+       struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
+       struct rsnd_dma_ctrl *dmac = rsnd_priv_to_dmac(priv);
+       void __iomem *addr = rsnd_dmapp_addr(dmac, dma, reg);
+       u32 val = ioread32(addr);
+
+       val &= ~mask;
+       val |= (data & mask);
+
+       iowrite32(val, addr);
+}
+
  static int rsnd_dmapp_stop(struct rsnd_mod *mod,
                            struct rsnd_dai_stream *io,
                            struct rsnd_priv *priv)
@@ -461,10 +475,10 @@ static int rsnd_dmapp_stop(struct rsnd_mod *mod,
         struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
         int i;
  
-       rsnd_dmapp_write(dma, 0, PDMACHCR);
+       rsnd_dmapp_bset(dma, 0,  PDMACHCR_DE, PDMACHCR);
  
         for (i = 0; i < 1024; i++) {
-               if (0 == rsnd_dmapp_read(dma, PDMACHCR))
+               if (0 == (rsnd_dmapp_read(dma, PDMACHCR) & PDMACHCR_DE))
                         return 0;
                 udelay(1);
         }
diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c

index 4e817c8a18c0bbe899374028ae56ef0754042d59..14fafdaf1395f9737191df18599ee58fc4f858fd 100644 (file)
--- a/sound/soc/sh/rcar/ssiu.c
+++ b/sound/soc/sh/rcar/ssiu.c
@@ -64,7 +64,11 @@ static int rsnd_ssiu_init(struct rsnd_mod *mod,
         mask1 = (1 << 4) | (1 << 20);   /* mask sync bit */
         mask2 = (1 << 4);               /* mask sync bit */
         val1  = val2  = 0;
-       if (rsnd_ssi_is_pin_sharing(io)) {
+       if (id == 8) {
+               /*
+                * SSI8 pin is sharing with SSI7, nothing to do.
+                */
+       } else if (rsnd_ssi_is_pin_sharing(io)) {
                 int shift = -1;
  
                 switch (id) {
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c

index 6dca408faae334d223494c33e14d503518b497d7..2722bb0c557310d97816cfa7857b24d7c9bd4948 100644 (file)
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -3326,7 +3326,10 @@ static int snd_soc_platform_drv_pcm_new(struct snd_soc_pcm_runtime *rtd)
  {
         struct snd_soc_platform *platform = rtd->platform;
  
-       return platform->driver->pcm_new(rtd);
+       if (platform->driver->pcm_new)
+               return platform->driver->pcm_new(rtd);
+       else
+               return 0;
  }
  
  static void snd_soc_platform_drv_pcm_free(struct snd_pcm *pcm)
@@ -3334,7 +3337,8 @@ static void snd_soc_platform_drv_pcm_free(struct snd_pcm *pcm)
         struct snd_soc_pcm_runtime *rtd = pcm->private_data;
         struct snd_soc_platform *platform = rtd->platform;
  
-       platform->driver->pcm_free(pcm);
+       if (platform->driver->pcm_free)
+               platform->driver->pcm_free(pcm);
  }
  
  /**
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c

index 3e9b1c0bb1ce3cb1864e1825ade56720803df289..058bc99c6c3479e4d7a92c97a885f634b168105d 100644 (file)
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -933,6 +933,7 @@ static int soc_tplg_denum_create_texts(struct soc_enum *se,
                 }
         }
  
+       se->texts = (const char * const *)se->dobj.control.dtexts;
         return 0;
  
  err:
diff --git a/sound/soc/sti/uniperif.h b/sound/soc/sti/uniperif.h

index d487dd2ef016fee0f25d6a5705feb98a0f26356a..cfcb0ea9d99d8981abc2425d15f3e7148e0dc7a6 100644 (file)
--- a/sound/soc/sti/uniperif.h
+++ b/sound/soc/sti/uniperif.h
@@ -1299,6 +1299,7 @@ struct uniperif {
         int ver; /* IP version, used by register access macros */
         struct regmap_field *clk_sel;
         struct regmap_field *valid_sel;
+       spinlock_t irq_lock; /* use to prevent race condition with IRQ */
  
         /* capabilities */
         const struct snd_pcm_hardware *hw;
diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c

index 60ae31a303ab001e5724518248f59ef12033570b..d7e8dd46d2cc40ba2c937a4ea627f4859fe1f478 100644 (file)
--- a/sound/soc/sti/uniperif_player.c
+++ b/sound/soc/sti/uniperif_player.c
@@ -65,10 +65,13 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id)
         unsigned int status;
         unsigned int tmp;
  
-       if (player->state == UNIPERIF_STATE_STOPPED) {
-               /* Unexpected IRQ: do nothing */
-               return IRQ_NONE;
-       }
+       spin_lock(&player->irq_lock);
+       if (!player->substream)
+               goto irq_spin_unlock;
+
+       snd_pcm_stream_lock(player->substream);
+       if (player->state == UNIPERIF_STATE_STOPPED)
+               goto stream_unlock;
  
         /* Get interrupt status & clear them immediately */
         status = GET_UNIPERIF_ITS(player);
@@ -88,9 +91,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id)
                         SET_UNIPERIF_ITM_BCLR_FIFO_ERROR(player);
  
                         /* Stop the player */
-                       snd_pcm_stream_lock(player->substream);
                         snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN);
-                       snd_pcm_stream_unlock(player->substream);
                 }
  
                 ret = IRQ_HANDLED;
@@ -104,9 +105,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id)
                 SET_UNIPERIF_ITM_BCLR_DMA_ERROR(player);
  
                 /* Stop the player */
-               snd_pcm_stream_lock(player->substream);
                 snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN);
-               snd_pcm_stream_unlock(player->substream);
  
                 ret = IRQ_HANDLED;
         }
@@ -116,7 +115,8 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id)
                 if (!player->underflow_enabled) {
                         dev_err(player->dev,
                                 "unexpected Underflow recovering\n");
-                       return -EPERM;
+                       ret = -EPERM;
+                       goto stream_unlock;
                 }
                 /* Read the underflow recovery duration */
                 tmp = GET_UNIPERIF_STATUS_1_UNDERFLOW_DURATION(player);
@@ -138,13 +138,16 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id)
                 dev_err(player->dev, "Underflow recovery failed\n");
  
                 /* Stop the player */
-               snd_pcm_stream_lock(player->substream);
                 snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN);
-               snd_pcm_stream_unlock(player->substream);
  
                 ret = IRQ_HANDLED;
         }
  
+stream_unlock:
+       snd_pcm_stream_unlock(player->substream);
+irq_spin_unlock:
+       spin_unlock(&player->irq_lock);
+
         return ret;
  }
  
@@ -588,6 +591,7 @@ static int uni_player_ctl_iec958_put(struct snd_kcontrol *kcontrol,
         struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai);
         struct uniperif *player = priv->dai_data.uni;
         struct snd_aes_iec958 *iec958 =  &player->stream_settings.iec958;
+       unsigned long flags;
  
         mutex_lock(&player->ctrl_lock);
         iec958->status[0] = ucontrol->value.iec958.status[0];
@@ -596,12 +600,14 @@ static int uni_player_ctl_iec958_put(struct snd_kcontrol *kcontrol,
         iec958->status[3] = ucontrol->value.iec958.status[3];
         mutex_unlock(&player->ctrl_lock);
  
+       spin_lock_irqsave(&player->irq_lock, flags);
         if (player->substream && player->substream->runtime)
                 uni_player_set_channel_status(player,
                                               player->substream->runtime);
         else
                 uni_player_set_channel_status(player, NULL);
  
+       spin_unlock_irqrestore(&player->irq_lock, flags);
         return 0;
  }
  
@@ -686,9 +692,12 @@ static int uni_player_startup(struct snd_pcm_substream *substream,
  {
         struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai);
         struct uniperif *player = priv->dai_data.uni;
+       unsigned long flags;
         int ret;
  
+       spin_lock_irqsave(&player->irq_lock, flags);
         player->substream = substream;
+       spin_unlock_irqrestore(&player->irq_lock, flags);
  
         player->clk_adj = 0;
  
@@ -986,12 +995,15 @@ static void uni_player_shutdown(struct snd_pcm_substream *substream,
  {
         struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai);
         struct uniperif *player = priv->dai_data.uni;
+       unsigned long flags;
  
+       spin_lock_irqsave(&player->irq_lock, flags);
         if (player->state != UNIPERIF_STATE_STOPPED)
                 /* Stop the player */
                 uni_player_stop(player);
  
         player->substream = NULL;
+       spin_unlock_irqrestore(&player->irq_lock, flags);
  }
  
  static int uni_player_parse_dt_audio_glue(struct platform_device *pdev,
@@ -1096,6 +1108,7 @@ int uni_player_init(struct platform_device *pdev,
         }
  
         mutex_init(&player->ctrl_lock);
+       spin_lock_init(&player->irq_lock);
  
         /* Ensure that disabled by default */
         SET_UNIPERIF_CONFIG_BACK_STALL_REQ_DISABLE(player);
diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c

index 5992c6ab3833ef60c15e21656fd7f5731b43c2d3..ee0055e608529195fda9b76a4226ebf752ead6e3 100644 (file)
--- a/sound/soc/sti/uniperif_reader.c
+++ b/sound/soc/sti/uniperif_reader.c
@@ -46,10 +46,15 @@ static irqreturn_t uni_reader_irq_handler(int irq, void *dev_id)
         struct uniperif *reader = dev_id;
         unsigned int status;
  
+       spin_lock(&reader->irq_lock);
+       if (!reader->substream)
+               goto irq_spin_unlock;
+
+       snd_pcm_stream_lock(reader->substream);
         if (reader->state == UNIPERIF_STATE_STOPPED) {
                 /* Unexpected IRQ: do nothing */
                 dev_warn(reader->dev, "unexpected IRQ\n");
-               return IRQ_HANDLED;
+               goto stream_unlock;
         }
  
         /* Get interrupt status & clear them immediately */
@@ -60,13 +65,16 @@ static irqreturn_t uni_reader_irq_handler(int irq, void *dev_id)
         if (unlikely(status & UNIPERIF_ITS_FIFO_ERROR_MASK(reader))) {
                 dev_err(reader->dev, "FIFO error detected\n");
  
-               snd_pcm_stream_lock(reader->substream);
                 snd_pcm_stop(reader->substream, SNDRV_PCM_STATE_XRUN);
-               snd_pcm_stream_unlock(reader->substream);
  
-               return IRQ_HANDLED;
+               ret = IRQ_HANDLED;
         }
  
+stream_unlock:
+       snd_pcm_stream_unlock(reader->substream);
+irq_spin_unlock:
+       spin_unlock(&reader->irq_lock);
+
         return ret;
  }
  
@@ -347,8 +355,13 @@ static int uni_reader_startup(struct snd_pcm_substream *substream,
  {
         struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai);
         struct uniperif *reader = priv->dai_data.uni;
+       unsigned long flags;
         int ret;
  
+       spin_lock_irqsave(&reader->irq_lock, flags);
+       reader->substream = substream;
+       spin_unlock_irqrestore(&reader->irq_lock, flags);
+
         if (!UNIPERIF_TYPE_IS_TDM(reader))
                 return 0;
  
@@ -373,11 +386,15 @@ static void uni_reader_shutdown(struct snd_pcm_substream *substream,
  {
         struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai);
         struct uniperif *reader = priv->dai_data.uni;
+       unsigned long flags;
  
+       spin_lock_irqsave(&reader->irq_lock, flags);
         if (reader->state != UNIPERIF_STATE_STOPPED) {
                 /* Stop the reader */
                 uni_reader_stop(reader);
         }
+       reader->substream = NULL;
+       spin_unlock_irqrestore(&reader->irq_lock, flags);
  }
  
  static const struct snd_soc_dai_ops uni_reader_dai_ops = {
@@ -412,6 +429,8 @@ int uni_reader_init(struct platform_device *pdev,
                 return -EBUSY;
         }
  
+       spin_lock_init(&reader->irq_lock);
+
         return 0;
  }
  EXPORT_SYMBOL_GPL(uni_reader_init);
diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c

index b92bdc8361af3a118b0d944585e3076ae1e8c947..7527ba29a5a0ea6eb9c6498d8d0293ad4aead18a 100644 (file)
--- a/sound/soc/sunxi/sun8i-codec.c
+++ b/sound/soc/sunxi/sun8i-codec.c
@@ -259,25 +259,20 @@ static int sun8i_codec_hw_params(struct snd_pcm_substream *substream,
         return 0;
  }
  
-static const struct snd_kcontrol_new sun8i_output_left_mixer_controls[] = {
-       SOC_DAPM_SINGLE("LSlot 0", SUN8I_DAC_MXR_SRC,
-                       SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA0L, 1, 0),
-       SOC_DAPM_SINGLE("LSlot 1", SUN8I_DAC_MXR_SRC,
-                       SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA1L, 1, 0),
-       SOC_DAPM_SINGLE("DACL", SUN8I_DAC_MXR_SRC,
-                       SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF2DACL, 1, 0),
-       SOC_DAPM_SINGLE("ADCL", SUN8I_DAC_MXR_SRC,
-                       SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_ADCL, 1, 0),
-};
-
-static const struct snd_kcontrol_new sun8i_output_right_mixer_controls[] = {
-       SOC_DAPM_SINGLE("RSlot 0", SUN8I_DAC_MXR_SRC,
+static const struct snd_kcontrol_new sun8i_dac_mixer_controls[] = {
+       SOC_DAPM_DOUBLE("AIF1 Slot 0 Digital DAC Playback Switch",
+                       SUN8I_DAC_MXR_SRC,
+                       SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA0L,
                         SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_AIF1DA0R, 1, 0),
-       SOC_DAPM_SINGLE("RSlot 1", SUN8I_DAC_MXR_SRC,
+       SOC_DAPM_DOUBLE("AIF1 Slot 1 Digital DAC Playback Switch",
+                       SUN8I_DAC_MXR_SRC,
+                       SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA1L,
                         SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_AIF1DA1R, 1, 0),
-       SOC_DAPM_SINGLE("DACR", SUN8I_DAC_MXR_SRC,
+       SOC_DAPM_DOUBLE("AIF2 Digital DAC Playback Switch", SUN8I_DAC_MXR_SRC,
+                       SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF2DACL,
                         SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_AIF2DACR, 1, 0),
-       SOC_DAPM_SINGLE("ADCR", SUN8I_DAC_MXR_SRC,
+       SOC_DAPM_DOUBLE("ADC Digital DAC Playback Switch", SUN8I_DAC_MXR_SRC,
+                       SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_ADCL,
                         SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_ADCR, 1, 0),
  };
  
@@ -286,19 +281,21 @@ static const struct snd_soc_dapm_widget sun8i_codec_dapm_widgets[] = {
         SND_SOC_DAPM_SUPPLY("DAC", SUN8I_DAC_DIG_CTRL, SUN8I_DAC_DIG_CTRL_ENDA,
                             0, NULL, 0),
  
-       /* Analog DAC */
-       SND_SOC_DAPM_DAC("Digital Left DAC", "Playback", SUN8I_AIF1_DACDAT_CTRL,
-                        SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0L_ENA, 0),
-       SND_SOC_DAPM_DAC("Digital Right DAC", "Playback", SUN8I_AIF1_DACDAT_CTRL,
-                        SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0R_ENA, 0),
+       /* Analog DAC AIF */
+       SND_SOC_DAPM_AIF_IN("AIF1 Slot 0 Left", "Playback", 0,
+                           SUN8I_AIF1_DACDAT_CTRL,
+                           SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0L_ENA, 0),
+       SND_SOC_DAPM_AIF_IN("AIF1 Slot 0 Right", "Playback", 0,
+                           SUN8I_AIF1_DACDAT_CTRL,
+                           SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0R_ENA, 0),
  
         /* DAC Mixers */
-       SND_SOC_DAPM_MIXER("Left DAC Mixer", SND_SOC_NOPM, 0, 0,
-                          sun8i_output_left_mixer_controls,
-                          ARRAY_SIZE(sun8i_output_left_mixer_controls)),
-       SND_SOC_DAPM_MIXER("Right DAC Mixer", SND_SOC_NOPM, 0, 0,
-                          sun8i_output_right_mixer_controls,
-                          ARRAY_SIZE(sun8i_output_right_mixer_controls)),
+       SND_SOC_DAPM_MIXER("Left Digital DAC Mixer", SND_SOC_NOPM, 0, 0,
+                          sun8i_dac_mixer_controls,
+                          ARRAY_SIZE(sun8i_dac_mixer_controls)),
+       SND_SOC_DAPM_MIXER("Right Digital DAC Mixer", SND_SOC_NOPM, 0, 0,
+                          sun8i_dac_mixer_controls,
+                          ARRAY_SIZE(sun8i_dac_mixer_controls)),
  
         /* Clocks */
         SND_SOC_DAPM_SUPPLY("MODCLK AFI1", SUN8I_MOD_CLK_ENA,
@@ -321,8 +318,6 @@ static const struct snd_soc_dapm_widget sun8i_codec_dapm_widgets[] = {
                             SUN8I_MOD_RST_CTL_AIF1, 0, NULL, 0),
         SND_SOC_DAPM_SUPPLY("RST DAC", SUN8I_MOD_RST_CTL,
                             SUN8I_MOD_RST_CTL_DAC, 0, NULL, 0),
-
-       SND_SOC_DAPM_OUTPUT("HP"),
  };
  
  static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = {
@@ -338,16 +333,14 @@ static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = {
         { "DAC", NULL, "MODCLK DAC" },
  
         /* DAC Routes */
-       { "Digital Left DAC", NULL, "DAC" },
-       { "Digital Right DAC", NULL, "DAC" },
+       { "AIF1 Slot 0 Right", NULL, "DAC" },
+       { "AIF1 Slot 0 Left", NULL, "DAC" },
  
         /* DAC Mixer Routes */
-       { "Left DAC Mixer", "LSlot 0", "Digital Left DAC"},
-       { "Right DAC Mixer", "RSlot 0", "Digital Right DAC"},
-
-       /* End of route : HP out */
-       { "HP", NULL, "Left DAC Mixer" },
-       { "HP", NULL, "Right DAC Mixer" },
+       { "Left Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch",
+         "AIF1 Slot 0 Left"},
+       { "Right Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch",
+         "AIF1 Slot 0 Right"},
  };
  
  static struct snd_soc_dai_ops sun8i_codec_dai_ops = {
diff --git a/sound/x86/Kconfig b/sound/x86/Kconfig

index 84c8f8fc597cd6046d17ee27839024b98329e33d..8adf4d1bd46e71237e6827f0eda8fc89e7b40ea1 100644 (file)
--- a/sound/x86/Kconfig
+++ b/sound/x86/Kconfig
@@ -1,6 +1,7 @@
  menuconfig SND_X86
-       tristate "X86 sound devices"
+       bool "X86 sound devices"
         depends on X86
+       default y
         ---help---
           X86 sound devices that don't fall under SoC or PCI categories
  
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h

index 122153b16ea4eeba1e84bf30a71a3dda199ae534..390d7c9685fd6107c83be2296ead9cb198b571a3 100644 (file)
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -168,6 +168,16 @@
                 .off   = OFF,                                   \
                 .imm   = 0 })
  
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF)                      \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,   \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = OFF,                                   \
+               .imm   = 0 })
+
  /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
  
  #define BPF_ST_MEM(SIZE, DST, OFF, IMM)                                \
diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h

new file mode 100644 (file)

index 0000000..0674272
--- /dev/null
+++ b/tools/include/uapi/linux/bpf_perf_event.h
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct bpf_perf_event_data {
+       struct pt_regs regs;
+       __u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c

index 11c8d9bc762ef0c4bde99dec4292e84ff00d3477..5d19fdf80292c226769a91ccef519a47b3788b2b 100644 (file)
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -1387,7 +1387,7 @@ static bool pci_data_iowrite(u16 port, u32 mask, u32 val)
                 /* Allow writing to any other BAR, or expansion ROM */
                 iowrite(portoff, val, mask, &d->config_words[reg]);
                 return true;
-               /* We let them overide latency timer and cacheline size */
+               /* We let them override latency timer and cacheline size */
         } else if (&d->config_words[reg] == (void *)&d->config.cacheline_size) {
                 /* Only let them change the first two fields. */
                 if (mask == 0xFFFFFFFF)
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile

index e2efddf1023177c202d626257c8466f3cb8c40c3..1f5300e56b44dc7bca0b269261d5f7987eb564b6 100644 (file)
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -132,7 +132,7 @@ else
    Q = @
  endif
  
-# Disable command line variables (CFLAGS) overide from top
+# Disable command line variables (CFLAGS) override from top
  # level Makefile (perf), otherwise build Makefile will get
  # the same command line setup.
  MAKEOVERRIDES=
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile

index 47076b15eebeaa5b54583761130b10ecef2fc0aa..9b8555ea3459c85bef282dad5166700771f0e5ed 100644 (file)
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -135,7 +135,7 @@ else
    Q = @
  endif
  
-# Disable command line variables (CFLAGS) overide from top
+# Disable command line variables (CFLAGS) override from top
  # level Makefile (perf), otherwise build Makefile will get
  # the same command line setup.
  MAKEOVERRIDES=
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h

index 66342804161c80ea611b3dfa554a602fadc4213e..0c03538df74c01a1ecedc353e21b6c81083ee1e1 100644 (file)
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -140,7 +140,7 @@ struct pevent_plugin_option {
   *   struct pevent_plugin_option PEVENT_PLUGIN_OPTIONS[] = {
   *     {
   *             .name = "option-name",
- *             .plugin_alias = "overide-file-name", (optional)
+ *             .plugin_alias = "override-file-name", (optional)
   *             .description = "description of option to show users",
   *     },
   *     {
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c

index 4cfdbb5b696783cbeb097f04220c180e7e87e82a..066086dd59a8017e293993a50d2f432d47441cfe 100644 (file)
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -805,11 +805,20 @@ static struct rela *find_switch_table(struct objtool_file *file,
                      insn->jump_dest->offset > orig_insn->offset))
                     break;
  
+               /* look for a relocation which references .rodata */
                 text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
                                                     insn->len);
-               if (text_rela && text_rela->sym == file->rodata->sym)
-                       return find_rela_by_dest(file->rodata,
-                                                text_rela->addend);
+               if (!text_rela || text_rela->sym != file->rodata->sym)
+                       continue;
+
+               /*
+                * Make sure the .rodata address isn't associated with a
+                * symbol.  gcc jump tables are anonymous data.
+                */
+               if (find_symbol_containing(file->rodata, text_rela->addend))
+                       continue;
+
+               return find_rela_by_dest(file->rodata, text_rela->addend);
         }
  
         return NULL;
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c

index 0d7983ac63ef9e300110d9a6ec6771a75378784e..d897702ce7427804da2c09387f674077f22accc5 100644 (file)
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -85,6 +85,18 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
         return NULL;
  }
  
+struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
+{
+       struct symbol *sym;
+
+       list_for_each_entry(sym, &sec->symbol_list, list)
+               if (sym->type != STT_SECTION &&
+                   offset >= sym->offset && offset < sym->offset + sym->len)
+                       return sym;
+
+       return NULL;
+}
+
  struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
                                      unsigned int len)
  {
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h

index aa1ff6596684f9304d0dd4bd3165f819b4dcdaf7..731973e1a3f5eb6bb1d6e67890c54c1440f237f3 100644 (file)
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -79,6 +79,7 @@ struct elf {
  struct elf *elf_open(const char *name);
  struct section *find_section_by_name(struct elf *elf, const char *name);
  struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
  struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
  struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
                                      unsigned int len);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c

index 273f21fa32b55999ab1271e6d3cc316c3a257a41..7aa57225cbf7971b08db04c27151494c071b163b 100644 (file)
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -130,6 +130,12 @@ static struct arch architectures[] = {
                 .name = "powerpc",
                 .init = powerpc__annotate_init,
         },
+       {
+               .name = "s390",
+               .objdump =  {
+                       .comment_char = '#',
+               },
+       },
  };
  
  static void ins__delete(struct ins_operands *ops)
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c

index 7913363bde5c0407fded864f62a839c8b28056ea..4f3c758d875d6ce6855db7fa0731436f9bb06671 100644 (file)
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -31,7 +31,7 @@
  #error Instruction buffer size too small
  #endif
  
-/* Based on branch_type() from perf_event_intel_lbr.c */
+/* Based on branch_type() from arch/x86/events/intel/lbr.c */
  static void intel_pt_insn_decoder(struct insn *insn,
                                   struct intel_pt_insn *intel_pt_insn)
  {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c

index 70e389bc4af71aa8f18ae67507fb65b5093a7f98..9b4d8ba22fed85f1f2bef6f5d47dc88cbb4df5d1 100644 (file)
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -202,7 +202,7 @@ void symbols__fixup_end(struct rb_root *symbols)
  
         /* Last entry */
         if (curr->end == curr->start)
-               curr->end = roundup(curr->start, 4096);
+               curr->end = roundup(curr->start, 4096) + 4096;
  }
  
  void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c

index 93b0aa74ca03bada7db24ae827b902ce09591bef..39c2c7d067bba55cae5cf19983128369af1bb81c 100644 (file)
--- a/tools/power/cpupower/utils/helpers/cpuid.c
+++ b/tools/power/cpupower/utils/helpers/cpuid.c
@@ -156,6 +156,7 @@ out:
                                          */
                         case 0x2C:      /* Westmere EP - Gulftown */
                                 cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO;
+                               break;
                         case 0x2A:      /* SNB */
                         case 0x2D:      /* SNB Xeon */
                         case 0x3A:      /* IVB */
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8

index fedca32853262152cb7e1028139728c4aa677b11..ccf2a69365ccbb2a7cd27fa03089be54ce506c1a 100644 (file)
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -100,6 +100,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics
  \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.  These numbers are from hardware residency counters.
  \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
  \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
+\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
  \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states.  These numbers are from hardware residency counters.
  \fBPkgWatt\fP Watts consumed by the whole package.
  \fBCorWatt\fP Watts consumed by the core part of the package.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c

index 828dccd3f01eaf324bf2d3d2c674a585417cd07a..b11294730771bed6766f77138460813f8abbfce8 100644 (file)
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1142,7 +1142,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
                  * it is possible for mperf's non-halted cycles + idle states
                  * to exceed TSC's all cycles: show c1 = 0% in that case.
                  */
-               if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
+               if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
                         old->c1 = 0;
                 else {
                         /* normal case, derive c1 */
@@ -2485,8 +2485,10 @@ int snapshot_gfx_mhz(void)
  
         if (fp == NULL)
                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
-       else
+       else {
                 rewind(fp);
+               fflush(fp);
+       }
  
         retval = fscanf(fp, "%d", &gfx_cur_mhz);
         if (retval != 1)
@@ -3111,7 +3113,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                 return 0;
  
         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
-                       "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
+                       "(high %d guar %d eff %d low %d)\n",
                         cpu, msr,
                         (unsigned int)HWP_HIGHEST_PERF(msr),
                         (unsigned int)HWP_GUARANTEED_PERF(msr),
@@ -3122,7 +3124,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                 return 0;
  
         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
-                       "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
+                       "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
                         cpu, msr,
                         (unsigned int)(((msr) >> 0) & 0xff),
                         (unsigned int)(((msr) >> 8) & 0xff),
@@ -3136,7 +3138,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                         return 0;
  
                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
-                       "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
+                       "(min %d max %d des %d epp 0x%x window 0x%x)\n",
                         cpu, msr,
                         (unsigned int)(((msr) >> 0) & 0xff),
                         (unsigned int)(((msr) >> 8) & 0xff),
@@ -3353,17 +3355,19 @@ void rapl_probe(unsigned int family, unsigned int model)
         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
-               do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
                 BIC_PRESENT(BIC_PKG__);
                 BIC_PRESENT(BIC_RAM__);
                 if (rapl_joules) {
                         BIC_PRESENT(BIC_Pkg_J);
                         BIC_PRESENT(BIC_Cor_J);
                         BIC_PRESENT(BIC_RAM_J);
+                       BIC_PRESENT(BIC_GFX_J);
                 } else {
                         BIC_PRESENT(BIC_PkgWatt);
                         BIC_PRESENT(BIC_CorWatt);
                         BIC_PRESENT(BIC_RAMWatt);
+                       BIC_PRESENT(BIC_GFXWatt);
                 }
                 break;
         case INTEL_FAM6_HASWELL_X:      /* HSX */
@@ -3478,7 +3482,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
  int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
  {
         unsigned long long msr;
-       unsigned int dts;
+       unsigned int dts, dts2;
         int cpu;
  
         if (!(do_dts || do_ptm))
@@ -3503,7 +3507,6 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
                         cpu, msr, tcc_activation_temp - dts);
  
-#ifdef THERM_DEBUG
                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
                         return 0;
  
@@ -3511,11 +3514,10 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
                 dts2 = (msr >> 8) & 0x7F;
                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
-#endif
         }
  
  
-       if (do_dts) {
+       if (do_dts && debug) {
                 unsigned int resolution;
  
                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
@@ -3526,7 +3528,6 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
                         cpu, msr, tcc_activation_temp - dts, resolution);
  
-#ifdef THERM_DEBUG
                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
                         return 0;
  
@@ -3534,7 +3535,6 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
                 dts2 = (msr >> 8) & 0x7F;
                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
-#endif
         }
  
         return 0;
@@ -4578,7 +4578,7 @@ int get_and_dump_counters(void)
  }
  
  void print_version() {
-       fprintf(outf, "turbostat version 17.02.24"
+       fprintf(outf, "turbostat version 17.04.12"
                 " - Len Brown <lenb@kernel.org>\n");
  }
  
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl

index 6e4eb2fc2d1e78edc356692dcadfe3bfaebd65ec..0c8b61f8398edace8b4d7be42e50b8638679520d 100755 (executable)
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1880,6 +1880,7 @@ sub get_grub_index {
  sub wait_for_input
  {
      my ($fp, $time) = @_;
+    my $start_time;
      my $rin;
      my $rout;
      my $nr;
@@ -1895,17 +1896,22 @@ sub wait_for_input
      vec($rin, fileno($fp), 1) = 1;
      vec($rin, fileno(\*STDIN), 1) = 1;
  
+    $start_time = time;
+
      while (1) {
         $nr = select($rout=$rin, undef, undef, $time);
  
-       if ($nr <= 0) {
-           return undef;
-       }
+       last if ($nr <= 0);
  
         # copy data from stdin to the console
         if (vec($rout, fileno(\*STDIN), 1) == 1) {
-           sysread(\*STDIN, $buf, 1000);
-           syswrite($fp, $buf, 1000);
+           $nr = sysread(\*STDIN, $buf, 1000);
+           syswrite($fp, $buf, $nr) if ($nr > 0);
+       }
+
+       # The timeout is based on time waiting for the fp data
+       if (vec($rout, fileno($fp), 1) != 1) {
+           last if (defined($time) && (time - $start_time > $time));
             next;
         }
  
@@ -1917,12 +1923,11 @@ sub wait_for_input
             last if ($ch eq "\n");
         }
  
-       if (!length($line)) {
-           return undef;
-       }
+       last if (!length($line));
  
         return $line;
      }
+    return undef;
  }
  
  sub reboot_to {
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile

index f11315bedefc3d68152bef22da0f8ab60f8c5be6..6a9480c03cbdfce0ae5e29e32f2cedcc342d66e1 100644 (file)
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,6 +1,7 @@
  
  CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
-LDFLAGS += -lpthread -lurcu
+LDFLAGS += -fsanitize=address
+LDLIBS+= -lpthread -lurcu
  TARGETS = main idr-test multiorder
  CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
  OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
@@ -10,23 +11,25 @@ ifndef SHIFT
         SHIFT=3
  endif
  
+ifeq ($(BUILD), 32)
+       CFLAGS += -m32
+       LDFLAGS += -m32
+endif
+
  targets: mapshift $(TARGETS)
  
  main:  $(OFILES)
-       $(CC) $(CFLAGS) $(LDFLAGS) $^ -o main
  
  idr-test: idr-test.o $(CORE_OFILES)
-       $(CC) $(CFLAGS) $(LDFLAGS) $^ -o idr-test
  
  multiorder: multiorder.o $(CORE_OFILES)
-       $(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder
  
  clean:
         $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
  
  vpath %.c ../../lib
  
-$(OFILES): *.h */*.h generated/map-shift.h \
+$(OFILES): Makefile *.h */*.h generated/map-shift.h \
         ../../include/linux/*.h \
         ../../include/asm/*.h \
         ../../../include/linux/radix-tree.h \
@@ -41,7 +44,7 @@ idr.c: ../../../lib/idr.c
  .PHONY: mapshift
  
  mapshift:
-       @if ! grep -qw $(SHIFT) generated/map-shift.h; then             \
+       @if ! grep -qws $(SHIFT) generated/map-shift.h; then            \
                 echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >          \
                                 generated/map-shift.h;                  \
         fi
diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c

index 9b09ddfe462fd3b2ea782805560c349e720a4637..99c40f3ed1337f5e1bd6e6b2fd4d88dea8ec8064 100644 (file)
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -17,6 +17,9 @@
  #include <time.h>
  #include "test.h"
  
+#define for_each_index(i, base, order) \
+               for (i = base; i < base + (1 << order); i++)
+
  #define NSEC_PER_SEC   1000000000L
  
  static long long benchmark_iter(struct radix_tree_root *root, bool tagged)
@@ -57,27 +60,176 @@ again:
         return nsec;
  }
  
+static void benchmark_insert(struct radix_tree_root *root,
+                            unsigned long size, unsigned long step, int order)
+{
+       struct timespec start, finish;
+       unsigned long index;
+       long long nsec;
+
+       clock_gettime(CLOCK_MONOTONIC, &start);
+
+       for (index = 0 ; index < size ; index += step)
+               item_insert_order(root, index, order);
+
+       clock_gettime(CLOCK_MONOTONIC, &finish);
+
+       nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+              (finish.tv_nsec - start.tv_nsec);
+
+       printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n",
+               size, step, order, nsec);
+}
+
+static void benchmark_tagging(struct radix_tree_root *root,
+                            unsigned long size, unsigned long step, int order)
+{
+       struct timespec start, finish;
+       unsigned long index;
+       long long nsec;
+
+       clock_gettime(CLOCK_MONOTONIC, &start);
+
+       for (index = 0 ; index < size ; index += step)
+               radix_tree_tag_set(root, index, 0);
+
+       clock_gettime(CLOCK_MONOTONIC, &finish);
+
+       nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+              (finish.tv_nsec - start.tv_nsec);
+
+       printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n",
+               size, step, order, nsec);
+}
+
+static void benchmark_delete(struct radix_tree_root *root,
+                            unsigned long size, unsigned long step, int order)
+{
+       struct timespec start, finish;
+       unsigned long index, i;
+       long long nsec;
+
+       clock_gettime(CLOCK_MONOTONIC, &start);
+
+       for (index = 0 ; index < size ; index += step)
+               for_each_index(i, index, order)
+                       item_delete(root, i);
+
+       clock_gettime(CLOCK_MONOTONIC, &finish);
+
+       nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+              (finish.tv_nsec - start.tv_nsec);
+
+       printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n",
+               size, step, order, nsec);
+}
+
  static void benchmark_size(unsigned long size, unsigned long step, int order)
  {
         RADIX_TREE(tree, GFP_KERNEL);
         long long normal, tagged;
-       unsigned long index;
  
-       for (index = 0 ; index < size ; index += step) {
-               item_insert_order(&tree, index, order);
-               radix_tree_tag_set(&tree, index, 0);
-       }
+       benchmark_insert(&tree, size, step, order);
+       benchmark_tagging(&tree, size, step, order);
  
         tagged = benchmark_iter(&tree, true);
         normal = benchmark_iter(&tree, false);
  
-       printv(2, "Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n",
-               size, step, order, tagged, normal);
+       printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n",
+               size, step, order, tagged);
+       printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n",
+               size, step, order, normal);
+
+       benchmark_delete(&tree, size, step, order);
  
         item_kill_tree(&tree);
         rcu_barrier();
  }
  
+static long long  __benchmark_split(unsigned long index,
+                                   int old_order, int new_order)
+{
+       struct timespec start, finish;
+       long long nsec;
+       RADIX_TREE(tree, GFP_ATOMIC);
+
+       item_insert_order(&tree, index, old_order);
+
+       clock_gettime(CLOCK_MONOTONIC, &start);
+       radix_tree_split(&tree, index, new_order);
+       clock_gettime(CLOCK_MONOTONIC, &finish);
+       nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+              (finish.tv_nsec - start.tv_nsec);
+
+       item_kill_tree(&tree);
+
+       return nsec;
+
+}
+
+static void benchmark_split(unsigned long size, unsigned long step)
+{
+       int i, j, idx;
+       long long nsec = 0;
+
+
+       for (idx = 0; idx < size; idx += step) {
+               for (i = 3; i < 11; i++) {
+                       for (j = 0; j < i; j++) {
+                               nsec += __benchmark_split(idx, i, j);
+                       }
+               }
+       }
+
+       printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
+                       size, step, nsec);
+
+}
+
+static long long  __benchmark_join(unsigned long index,
+                            unsigned order1, unsigned order2)
+{
+       unsigned long loc;
+       struct timespec start, finish;
+       long long nsec;
+       void *item, *item2 = item_create(index + 1, order1);
+       RADIX_TREE(tree, GFP_KERNEL);
+
+       item_insert_order(&tree, index, order2);
+       item = radix_tree_lookup(&tree, index);
+
+       clock_gettime(CLOCK_MONOTONIC, &start);
+       radix_tree_join(&tree, index + 1, order1, item2);
+       clock_gettime(CLOCK_MONOTONIC, &finish);
+       nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+               (finish.tv_nsec - start.tv_nsec);
+
+       loc = find_item(&tree, item);
+       if (loc == -1)
+               free(item);
+
+       item_kill_tree(&tree);
+
+       return nsec;
+}
+
+static void benchmark_join(unsigned long step)
+{
+       int i, j, idx;
+       long long nsec = 0;
+
+       for (idx = 0; idx < 1 << 10; idx += step) {
+               for (i = 1; i < 15; i++) {
+                       for (j = 0; j < i; j++) {
+                               nsec += __benchmark_join(idx, i, j);
+                       }
+               }
+       }
+
+       printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
+                       1 << 10, step, nsec);
+}
+
  void benchmark(void)
  {
         unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -95,4 +247,11 @@ void benchmark(void)
         for (c = 0; size[c]; c++)
                 for (s = 0; step[s]; s++)
                         benchmark_size(size[c], step[s] << 9, 9);
+
+       for (c = 0; size[c]; c++)
+               for (s = 0; step[s]; s++)
+                       benchmark_split(size[c], step[s]);
+
+       for (s = 0; step[s]; s++)
+               benchmark_join(step[s]);
  }
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c

index a26098c6123d1cf99ce2b6669a0e186bd22cedbe..30cd0b296f1a76847122f009c2cd0f2d5b9109f4 100644 (file)
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -153,6 +153,30 @@ void idr_nowait_test(void)
         idr_destroy(&idr);
  }
  
+void idr_get_next_test(void)
+{
+       unsigned long i;
+       int nextid;
+       DEFINE_IDR(idr);
+
+       int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
+
+       for(i = 0; indices[i]; i++) {
+               struct item *item = item_create(indices[i], 0);
+               assert(idr_alloc(&idr, item, indices[i], indices[i+1],
+                                GFP_KERNEL) == indices[i]);
+       }
+
+       for(i = 0, nextid = 0; indices[i]; i++) {
+               idr_get_next(&idr, &nextid);
+               assert(nextid == indices[i]);
+               nextid++;
+       }
+
+       idr_for_each(&idr, item_idr_free, &idr);
+       idr_destroy(&idr);
+}
+
  void idr_checks(void)
  {
         unsigned long i;
@@ -202,6 +226,7 @@ void idr_checks(void)
         idr_alloc_test();
         idr_null_test();
         idr_nowait_test();
+       idr_get_next_test();
  }
  
  /*
@@ -338,7 +363,7 @@ void ida_check_random(void)
  {
         DEFINE_IDA(ida);
         DECLARE_BITMAP(bitmap, 2048);
-       int id;
+       int id, err;
         unsigned int i;
         time_t s = time(NULL);
  
@@ -352,8 +377,11 @@ void ida_check_random(void)
                         ida_remove(&ida, bit);
                 } else {
                         __set_bit(bit, bitmap);
-                       ida_pre_get(&ida, GFP_KERNEL);
-                       assert(!ida_get_new_above(&ida, bit, &id));
+                       do {
+                               ida_pre_get(&ida, GFP_KERNEL);
+                               err = ida_get_new_above(&ida, bit, &id);
+                       } while (err == -ENOMEM);
+                       assert(!err);
                         assert(id == bit);
                 }
         }
@@ -362,6 +390,24 @@ void ida_check_random(void)
                 goto repeat;
  }
  
+void ida_simple_get_remove_test(void)
+{
+       DEFINE_IDA(ida);
+       unsigned long i;
+
+       for (i = 0; i < 10000; i++) {
+               assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
+       }
+       assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
+
+       for (i = 0; i < 10000; i++) {
+               ida_simple_remove(&ida, i);
+       }
+       assert(ida_is_empty(&ida));
+
+       ida_destroy(&ida);
+}
+
  void ida_checks(void)
  {
         DEFINE_IDA(ida);
@@ -428,15 +474,41 @@ void ida_checks(void)
         ida_check_max();
         ida_check_conv();
         ida_check_random();
+       ida_simple_get_remove_test();
  
         radix_tree_cpu_dead(1);
  }
  
+static void *ida_random_fn(void *arg)
+{
+       rcu_register_thread();
+       ida_check_random();
+       rcu_unregister_thread();
+       return NULL;
+}
+
+void ida_thread_tests(void)
+{
+       pthread_t threads[10];
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(threads); i++)
+               if (pthread_create(&threads[i], NULL, ida_random_fn, NULL)) {
+                       perror("creating ida thread");
+                       exit(1);
+               }
+
+       while (i--)
+               pthread_join(threads[i], NULL);
+}
+
  int __weak main(void)
  {
         radix_tree_init();
         idr_checks();
         ida_checks();
+       ida_thread_tests();
+       radix_tree_cpu_dead(1);
         rcu_barrier();
         if (nr_allocated)
                 printf("nr_allocated = %d\n", nr_allocated);
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c

index b829127d56705747a0a74c73e8b11b8cae8ecc27..bc9a78449572f10331a8bbc35070801f307b6caa 100644 (file)
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -368,6 +368,7 @@ int main(int argc, char **argv)
         iteration_test(0, 10 + 90 * long_run);
         iteration_test(7, 10 + 90 * long_run);
         single_thread_tests(long_run);
+       ida_thread_tests();
  
         /* Free any remaining preallocated nodes */
         radix_tree_cpu_dead(0);
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c

index d4ff009892456a3b588df788488027da45209ecf..36dcf7d6945dc631ce7b2bd1c95fb0d14f662167 100644 (file)
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -330,6 +330,34 @@ static void single_check(void)
         item_kill_tree(&tree);
  }
  
+void radix_tree_clear_tags_test(void)
+{
+       unsigned long index;
+       struct radix_tree_node *node;
+       struct radix_tree_iter iter;
+       void **slot;
+
+       RADIX_TREE(tree, GFP_KERNEL);
+
+       item_insert(&tree, 0);
+       item_tag_set(&tree, 0, 0);
+       __radix_tree_lookup(&tree, 0, &node, &slot);
+       radix_tree_clear_tags(&tree, node, slot);
+       assert(item_tag_get(&tree, 0, 0) == 0);
+
+       for (index = 0; index < 1000; index++) {
+               item_insert(&tree, index);
+               item_tag_set(&tree, index, 0);
+       }
+
+       radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+               radix_tree_clear_tags(&tree, iter.node, slot);
+               assert(item_tag_get(&tree, iter.index, 0) == 0);
+       }
+
+       item_kill_tree(&tree);
+}
+
  void tag_check(void)
  {
         single_check();
@@ -347,4 +375,5 @@ void tag_check(void)
         thrash_tags();
         rcu_barrier();
         printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
+       radix_tree_clear_tags_test();
  }
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h

index b30e11d9d271c39ccb284019938876ae2785f7ca..0f8220cc61663ffa2a872db42c96b4e5433ed0a7 100644 (file)
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -36,6 +36,7 @@ void iteration_test(unsigned order, unsigned duration);
  void benchmark(void);
  void idr_checks(void);
  void ida_checks(void);
+void ida_thread_tests(void);
  
  struct item *
  item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index 4b498265dae6dc3b52b35818453f2c895809b323..9af09e8099c0aae9fd6acd5d42cc5afb949871e9 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,20 +1,30 @@
  LIBDIR := ../../../lib
-BPFOBJ := $(LIBDIR)/bpf/bpf.o
+BPFDIR := $(LIBDIR)/bpf
+APIDIR := ../../../include/uapi
+GENDIR := ../../../../include/generated
+GENHDR := $(GENDIR)/autoconf.h
  
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR)
+ifneq ($(wildcard $(GENHDR)),)
+  GENFLAGS := -DHAVE_GENHDR
+endif
+
+CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS)
+LDLIBS += -lcap
  
  TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
  
  TEST_PROGS := test_kmod.sh
  
-.PHONY: all clean force
+include ../lib.mk
+
+BPFOBJ := $(OUTPUT)/bpf.o
+
+$(TEST_GEN_PROGS): $(BPFOBJ)
+
+.PHONY: force
  
  # force a rebuild of BPFOBJ when its dependencies are updated
  force:
  
  $(BPFOBJ): force
-       $(MAKE) -C $(dir $(BPFOBJ))
-
-$(test_objs): $(BPFOBJ)
-
-include ../lib.mk
+       $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c

index cada17ac00b8e6b5af37554ea8489be6ffc873a2..20f1871874df54a7d567797c753e379a760e492c 100644 (file)
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -80,8 +80,9 @@ static void test_hashmap(int task, void *data)
         assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
         key = 2;
         assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
-       key = 1;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+       key = 3;
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+              errno == E2BIG);
  
         /* Check that key = 0 doesn't exist. */
         key = 0;
@@ -110,6 +111,24 @@ static void test_hashmap(int task, void *data)
         close(fd);
  }
  
+static void test_hashmap_sizes(int task, void *data)
+{
+       int fd, i, j;
+
+       for (i = 1; i <= 512; i <<= 1)
+               for (j = 1; j <= 1 << 18; j <<= 1) {
+                       fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
+                                           2, map_flags);
+                       if (fd < 0) {
+                               printf("Failed to create hashmap key=%d value=%d '%s'\n",
+                                      i, j, strerror(errno));
+                               exit(1);
+                       }
+                       close(fd);
+                       usleep(10); /* give kernel time to destroy */
+               }
+}
+
  static void test_hashmap_percpu(int task, void *data)
  {
         unsigned int nr_cpus = bpf_num_possible_cpus();
@@ -263,7 +282,7 @@ static void test_arraymap_percpu(int task, void *data)
  {
         unsigned int nr_cpus = bpf_num_possible_cpus();
         int key, next_key, fd, i;
-       long values[nr_cpus];
+       long long values[nr_cpus];
  
         fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
                             sizeof(values[0]), 2, 0);
@@ -317,8 +336,11 @@ static void test_arraymap_percpu(int task, void *data)
  static void test_arraymap_percpu_many_keys(void)
  {
         unsigned int nr_cpus = bpf_num_possible_cpus();
-       unsigned int nr_keys = 20000;
-       long values[nr_cpus];
+       /* nr_keys is not too large otherwise the test stresses percpu
+        * allocator more than anything else
+        */
+       unsigned int nr_keys = 2000;
+       long long values[nr_cpus];
         int key, fd, i;
  
         fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
@@ -419,6 +441,7 @@ static void test_map_stress(void)
  {
         run_parallel(100, test_hashmap, NULL);
         run_parallel(100, test_hashmap_percpu, NULL);
+       run_parallel(100, test_hashmap_sizes, NULL);
  
         run_parallel(100, test_arraymap, NULL);
         run_parallel(100, test_arraymap_percpu, NULL);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c

index e1f5b9eea1e874ab7f4698a1e20abc5588fe00bb..c848e90b64213128a248c9669a2a2796692c5776 100644 (file)
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8,6 +8,8 @@
   * License as published by the Free Software Foundation.
   */
  
+#include <asm/types.h>
+#include <linux/types.h>
  #include <stdint.h>
  #include <stdio.h>
  #include <stdlib.h>
@@ -28,6 +30,14 @@
  
  #include <bpf/bpf.h>
  
+#ifdef HAVE_GENHDR
+# include "autoconf.h"
+#else
+# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
+#  define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
+# endif
+#endif
+
  #include "../../../include/linux/filter.h"
  
  #ifndef ARRAY_SIZE
@@ -37,6 +47,8 @@
  #define MAX_INSNS      512
  #define MAX_FIXUPS     8
  
+#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS     (1 << 0)
+
  struct bpf_test {
         const char *descr;
         struct bpf_insn insns[MAX_INSNS];
@@ -51,6 +63,7 @@ struct bpf_test {
                 REJECT
         } result, result_unpriv;
         enum bpf_prog_type prog_type;
+       uint8_t flags;
  };
  
  /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -2429,6 +2442,30 @@ static struct bpf_test tests[] = {
                 .result = ACCEPT,
                 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
         },
+       {
+               "direct packet access: test15 (spill with xadd)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+                       BPF_MOV64_IMM(BPF_REG_5, 4096),
+                       BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+                       BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+                       BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .errstr = "R2 invalid mem access 'inv'",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
         {
                 "helper access to packet: test1, valid packet_ptr range",
                 .insns = {
@@ -2932,6 +2969,7 @@ static struct bpf_test tests[] = {
                 .errstr_unpriv = "R0 pointer arithmetic prohibited",
                 .result_unpriv = REJECT,
                 .result = ACCEPT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
         {
                 "valid map access into an array with a variable",
@@ -2955,6 +2993,7 @@ static struct bpf_test tests[] = {
                 .errstr_unpriv = "R0 pointer arithmetic prohibited",
                 .result_unpriv = REJECT,
                 .result = ACCEPT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
         {
                 "valid map access into an array with a signed variable",
@@ -2982,6 +3021,7 @@ static struct bpf_test tests[] = {
                 .errstr_unpriv = "R0 pointer arithmetic prohibited",
                 .result_unpriv = REJECT,
                 .result = ACCEPT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
         {
                 "invalid map access into an array with a constant",
@@ -3023,6 +3063,7 @@ static struct bpf_test tests[] = {
                 .errstr = "R0 min value is outside of the array range",
                 .result_unpriv = REJECT,
                 .result = REJECT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
         {
                 "invalid map access into an array with a variable",
@@ -3046,6 +3087,7 @@ static struct bpf_test tests[] = {
                 .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
                 .result_unpriv = REJECT,
                 .result = REJECT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
         {
                 "invalid map access into an array with no floor check",
@@ -3072,6 +3114,7 @@ static struct bpf_test tests[] = {
                 .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
                 .result_unpriv = REJECT,
                 .result = REJECT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
         {
                 "invalid map access into an array with a invalid max check",
@@ -3098,6 +3141,7 @@ static struct bpf_test tests[] = {
                 .errstr = "invalid access to map value, value_size=48 off=44 size=8",
                 .result_unpriv = REJECT,
                 .result = REJECT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
         {
                 "invalid map access into an array with a invalid max check",
@@ -3127,6 +3171,7 @@ static struct bpf_test tests[] = {
                 .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
                 .result_unpriv = REJECT,
                 .result = REJECT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
         {
                 "multiple registers share map_lookup_elem result",
@@ -3250,6 +3295,7 @@ static struct bpf_test tests[] = {
                 .result = REJECT,
                 .errstr_unpriv = "R0 pointer arithmetic prohibited",
                 .result_unpriv = REJECT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
         {
                 "constant register |= constant should keep constant type",
@@ -3415,6 +3461,26 @@ static struct bpf_test tests[] = {
                 .result = ACCEPT,
                 .prog_type = BPF_PROG_TYPE_LWT_XMIT,
         },
+       {
+               "overlapping checks for direct packet access",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+                       BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_LWT_XMIT,
+       },
         {
                 "invalid access of tc_classid for LWT_IN",
                 .insns = {
@@ -3959,7 +4025,208 @@ static struct bpf_test tests[] = {
                 .result_unpriv = REJECT,
         },
         {
-               "map element value (adjusted) is preserved across register spilling",
+               "map element value or null is marked on register spilling",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map2 = { 3 },
+               .errstr_unpriv = "R0 leaks addr",
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+       },
+       {
+               "map element value store of cleared call register",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map2 = { 3 },
+               .errstr_unpriv = "R1 !read_ok",
+               .errstr = "R1 !read_ok",
+               .result = REJECT,
+               .result_unpriv = REJECT,
+       },
+       {
+               "map element value with unaligned store",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44),
+                       BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24),
+                       BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map2 = { 3 },
+               .errstr_unpriv = "R0 pointer arithmetic prohibited",
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+       },
+       {
+               "map element value with unaligned load",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2),
+                       BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map2 = { 3 },
+               .errstr_unpriv = "R0 pointer arithmetic prohibited",
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+       },
+       {
+               "map element value illegal alu op, 1",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map2 = { 3 },
+               .errstr_unpriv = "R0 pointer arithmetic prohibited",
+               .errstr = "invalid mem access 'inv'",
+               .result = REJECT,
+               .result_unpriv = REJECT,
+       },
+       {
+               "map element value illegal alu op, 2",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                       BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map2 = { 3 },
+               .errstr_unpriv = "R0 pointer arithmetic prohibited",
+               .errstr = "invalid mem access 'inv'",
+               .result = REJECT,
+               .result_unpriv = REJECT,
+       },
+       {
+               "map element value illegal alu op, 3",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                       BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map2 = { 3 },
+               .errstr_unpriv = "R0 pointer arithmetic prohibited",
+               .errstr = "invalid mem access 'inv'",
+               .result = REJECT,
+               .result_unpriv = REJECT,
+       },
+       {
+               "map element value illegal alu op, 4",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                       BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map2 = { 3 },
+               .errstr_unpriv = "R0 pointer arithmetic prohibited",
+               .errstr = "invalid mem access 'inv'",
+               .result = REJECT,
+               .result_unpriv = REJECT,
+       },
+       {
+               "map element value illegal alu op, 5",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+                       BPF_MOV64_IMM(BPF_REG_3, 4096),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+                       BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map2 = { 3 },
+               .errstr_unpriv = "R0 invalid mem access 'inv'",
+               .errstr = "R0 invalid mem access 'inv'",
+               .result = REJECT,
+               .result_unpriv = REJECT,
+       },
+       {
+               "map element value is preserved across register spilling",
                 .insns = {
                         BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
@@ -3981,6 +4248,7 @@ static struct bpf_test tests[] = {
                 .errstr_unpriv = "R0 pointer arithmetic prohibited",
                 .result = ACCEPT,
                 .result_unpriv = REJECT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
         {
                 "helper access to variable memory: stack, bitwise AND + JMP, correct bounds",
@@ -4419,6 +4687,7 @@ static struct bpf_test tests[] = {
                 .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
                 .result = REJECT,
                 .result_unpriv = REJECT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
         {
                 "invalid range check",
@@ -4450,6 +4719,7 @@ static struct bpf_test tests[] = {
                 .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
                 .result = REJECT,
                 .result_unpriv = REJECT,
+               .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         }
  };
  
@@ -4528,11 +4798,11 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
  static void do_test_single(struct bpf_test *test, bool unpriv,
                            int *passes, int *errors)
  {
+       int fd_prog, expected_ret, reject_from_alignment;
         struct bpf_insn *prog = test->insns;
         int prog_len = probe_filter_length(prog);
         int prog_type = test->prog_type;
         int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1;
-       int fd_prog, expected_ret;
         const char *expected_err;
  
         do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3);
@@ -4545,8 +4815,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                        test->result_unpriv : test->result;
         expected_err = unpriv && test->errstr_unpriv ?
                        test->errstr_unpriv : test->errstr;
+
+       reject_from_alignment = fd_prog < 0 &&
+                               (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) &&
+                               strstr(bpf_vlog, "Unknown alignment.");
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+       if (reject_from_alignment) {
+               printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n",
+                      strerror(errno));
+               goto fail_log;
+       }
+#endif
         if (expected_ret == ACCEPT) {
-               if (fd_prog < 0) {
+               if (fd_prog < 0 && !reject_from_alignment) {
                         printf("FAIL\nFailed to load prog '%s'!\n",
                                strerror(errno));
                         goto fail_log;
@@ -4556,14 +4837,15 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                         printf("FAIL\nUnexpected success to load!\n");
                         goto fail_log;
                 }
-               if (!strstr(bpf_vlog, expected_err)) {
+               if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
                         printf("FAIL\nUnexpected error message!\n");
                         goto fail_log;
                 }
         }
  
         (*passes)++;
-       printf("OK\n");
+       printf("OK%s\n", reject_from_alignment ?
+              " (NOTE: reject due to unknown alignment)" : "");
  close_fds:
         close(fd_prog);
         close(fd_f1);
@@ -4583,10 +4865,12 @@ static bool is_admin(void)
         cap_flag_value_t sysadmin = CAP_CLEAR;
         const cap_value_t cap_val = CAP_SYS_ADMIN;
  
+#ifdef CAP_IS_SUPPORTED
         if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
                 perror("cap_get_flag");
                 return false;
         }
+#endif
         caps = cap_get_proc();
         if (!caps) {
                 perror("cap_get_proc");
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc

new file mode 100644 (file)

index 0000000..bab5ff7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -0,0 +1,117 @@
+#!/bin/sh
+# description: ftrace - function pid filters
+
+# Make sure that function pid matching filter works.
+# Also test it on an instance directory
+
+if ! grep -q function available_tracers; then
+    echo "no function tracer configured"
+    exit_unsupported
+fi
+
+if [ ! -f set_ftrace_pid ]; then
+    echo "set_ftrace_pid not found? Is function tracer not set?"
+    exit_unsupported
+fi
+
+if [ ! -f set_ftrace_filter ]; then
+    echo "set_ftrace_filter not found? Is function tracer not set?"
+    exit_unsupported
+fi
+
+do_function_fork=1
+
+if [ ! -f options/function-fork ]; then
+    do_function_fork=0
+    echo "no option for function-fork found. Option will not be tested."
+fi
+
+read PID _ < /proc/self/stat
+
+if [ $do_function_fork -eq 1 ]; then
+    # default value of function-fork option
+    orig_value=`grep function-fork trace_options`
+fi
+
+do_reset() {
+    reset_tracer
+    clear_trace
+    enable_tracing
+    echo > set_ftrace_filter
+    echo > set_ftrace_pid
+
+    if [ $do_function_fork -eq 0 ]; then
+       return
+    fi
+
+    echo $orig_value > trace_options
+}
+
+fail() { # msg
+    do_reset
+    echo $1
+    exit $FAIL
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+do_test() {
+    disable_tracing
+
+    echo do_execve* > set_ftrace_filter
+    echo *do_fork >> set_ftrace_filter
+
+    echo $PID > set_ftrace_pid
+    echo function > current_tracer
+
+    if [ $do_function_fork -eq 1 ]; then
+       # don't allow children to be traced
+       echo nofunction-fork > trace_options
+    fi
+
+    enable_tracing
+    yield
+
+    count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+    count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+    # count_other should be 0
+    if [ $count_pid -eq 0 -o $count_other -ne 0 ]; then
+       fail "PID filtering not working?"
+    fi
+
+    disable_tracing
+    clear_trace
+
+    if [ $do_function_fork -eq 0 ]; then
+       return
+    fi
+
+    # allow children to be traced
+    echo function-fork > trace_options
+
+    enable_tracing
+    yield
+
+    count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+    count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+    # count_other should NOT be 0
+    if [ $count_pid -eq 0 -o $count_other -eq 0 ]; then
+       fail "PID filtering not following fork?"
+    fi
+}
+
+do_test
+
+mkdir instances/foo
+cd instances/foo
+do_test
+cd ../../
+rmdir instances/foo
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c

index 4124593696862fcb370fee15bcf7e34e11cdf9e0..e62bb354820cacdc653a19db385e3bd497931d0b 100644 (file)
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -75,7 +75,7 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets)
  {
         int fd, val;
  
-       fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
+       fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP));
         if (fd < 0) {
                 perror("socket packet");
                 exit(1);
@@ -95,6 +95,24 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets)
         return fd;
  }
  
+static void sock_fanout_set_cbpf(int fd)
+{
+       struct sock_filter bpf_filter[] = {
+               BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80),           /* ldb [80] */
+               BPF_STMT(BPF_RET+BPF_A, 0),                   /* ret A */
+       };
+       struct sock_fprog bpf_prog;
+
+       bpf_prog.filter = bpf_filter;
+       bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+
+       if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog,
+                      sizeof(bpf_prog))) {
+               perror("fanout data cbpf");
+               exit(1);
+       }
+}
+
  static void sock_fanout_set_ebpf(int fd)
  {
         const int len_off = __builtin_offsetof(struct __sk_buff, len);
@@ -270,7 +288,7 @@ static int test_datapath(uint16_t typeflags, int port_off,
                 exit(1);
         }
         if (type == PACKET_FANOUT_CBPF)
-               sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA);
+               sock_fanout_set_cbpf(fds[0]);
         else if (type == PACKET_FANOUT_EBPF)
                 sock_fanout_set_ebpf(fds[0]);
  
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h

index a77da88bf9469d515713e1d1f0f6d318544ac458..7d990d6c861b5863f23fe1d29523707c48ac28d5 100644 (file)
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -38,7 +38,7 @@
  # define __maybe_unused                __attribute__ ((__unused__))
  #endif
  
-static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum)
+static __maybe_unused void pair_udp_setfilter(int fd)
  {
         /* the filter below checks for all of the following conditions that
          * are based on the contents of create_payload()
@@ -76,23 +76,16 @@ static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum)
         };
         struct sock_fprog bpf_prog;
  
-       if (lvl == SOL_PACKET && optnum == PACKET_FANOUT_DATA)
-               bpf_filter[5].code = 0x16;   /* RET A                         */
-
         bpf_prog.filter = bpf_filter;
         bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
-       if (setsockopt(fd, lvl, optnum, &bpf_prog,
+
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
                        sizeof(bpf_prog))) {
                 perror("setsockopt SO_ATTACH_FILTER");
                 exit(1);
         }
  }
  
-static __maybe_unused void pair_udp_setfilter(int fd)
-{
-       sock_setfilter(fd, SOL_SOCKET, SO_ATTACH_FILTER);
-}
-
  static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
  {
         struct sockaddr_in saddr, daddr;
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile

index 1c5d0575802e47113b49b4b0facfd7bff47a6620..bf13fc2297aab48fad929415dbee9bc9f10532a2 100644 (file)
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -34,34 +34,34 @@ endif
  all: $(SUB_DIRS)
  
  $(SUB_DIRS):
-       BUILD_TARGET=$$OUTPUT/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
+       BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
  
  include ../lib.mk
  
  override define RUN_TESTS
         @for TARGET in $(SUB_DIRS); do \
-               BUILD_TARGET=$$OUTPUT/$$TARGET; \
+               BUILD_TARGET=$(OUTPUT)/$$TARGET;        \
                 $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
         done;
  endef
  
  override define INSTALL_RULE
         @for TARGET in $(SUB_DIRS); do \
-               BUILD_TARGET=$$OUTPUT/$$TARGET; \
+               BUILD_TARGET=$(OUTPUT)/$$TARGET;        \
                 $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\
         done;
  endef
  
  override define EMIT_TESTS
         @for TARGET in $(SUB_DIRS); do \
-               BUILD_TARGET=$$OUTPUT/$$TARGET; \
+               BUILD_TARGET=$(OUTPUT)/$$TARGET;        \
                 $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\
         done;
  endef
  
  clean:
         @for TARGET in $(SUB_DIRS); do \
-               BUILD_TARGET=$$OUTPUT/$$TARGET; \
+               BUILD_TARGET=$(OUTPUT)/$$TARGET;        \
                 $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \
         done;
         rm -f tags
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c

index 248a820048dfe89018697926f4f308acd786e694..66d31de60b9ae93ee53175b33983e3d86d67795f 100644 (file)
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name)
  
         rc = run_test(test_function, name);
  
-       if (rc == MAGIC_SKIP_RETURN_VALUE)
+       if (rc == MAGIC_SKIP_RETURN_VALUE) {
                 test_skip(name);
-       else
+               /* so that skipped test is not marked as failed */
+               rc = 0;
+       } else
                 test_finish(name, rc);
  
         return rc;
diff --git a/tools/testing/selftests/powerpc/include/vsx_asm.h b/tools/testing/selftests/powerpc/include/vsx_asm.h

index d828bfb6ef2d9a55f5752352458bca0ab1958549..54064ced9e95b3c66e57748f82ddf65aca91e94b 100644 (file)
--- a/tools/testing/selftests/powerpc/include/vsx_asm.h
+++ b/tools/testing/selftests/powerpc/include/vsx_asm.h
@@ -16,56 +16,56 @@
   */
  FUNC_START(load_vsx)
         li      r5,0
-       lxvx    vs20,r5,r3
+       lxvd2x  vs20,r5,r3
         addi    r5,r5,16
-       lxvx    vs21,r5,r3
+       lxvd2x  vs21,r5,r3
         addi    r5,r5,16
-       lxvx    vs22,r5,r3
+       lxvd2x  vs22,r5,r3
         addi    r5,r5,16
-       lxvx    vs23,r5,r3
+       lxvd2x  vs23,r5,r3
         addi    r5,r5,16
-       lxvx    vs24,r5,r3
+       lxvd2x  vs24,r5,r3
         addi    r5,r5,16
-       lxvx    vs25,r5,r3
+       lxvd2x  vs25,r5,r3
         addi    r5,r5,16
-       lxvx    vs26,r5,r3
+       lxvd2x  vs26,r5,r3
         addi    r5,r5,16
-       lxvx    vs27,r5,r3
+       lxvd2x  vs27,r5,r3
         addi    r5,r5,16
-       lxvx    vs28,r5,r3
+       lxvd2x  vs28,r5,r3
         addi    r5,r5,16
-       lxvx    vs29,r5,r3
+       lxvd2x  vs29,r5,r3
         addi    r5,r5,16
-       lxvx    vs30,r5,r3
+       lxvd2x  vs30,r5,r3
         addi    r5,r5,16
-       lxvx    vs31,r5,r3
+       lxvd2x  vs31,r5,r3
         blr
  FUNC_END(load_vsx)
  
  FUNC_START(store_vsx)
         li      r5,0
-       stxvx   vs20,r5,r3
+       stxvd2x vs20,r5,r3
         addi    r5,r5,16
-       stxvx   vs21,r5,r3
+       stxvd2x vs21,r5,r3
         addi    r5,r5,16
-       stxvx   vs22,r5,r3
+       stxvd2x vs22,r5,r3
         addi    r5,r5,16
-       stxvx   vs23,r5,r3
+       stxvd2x vs23,r5,r3
         addi    r5,r5,16
-       stxvx   vs24,r5,r3
+       stxvd2x vs24,r5,r3
         addi    r5,r5,16
-       stxvx   vs25,r5,r3
+       stxvd2x vs25,r5,r3
         addi    r5,r5,16
-       stxvx   vs26,r5,r3
+       stxvd2x vs26,r5,r3
         addi    r5,r5,16
-       stxvx   vs27,r5,r3
+       stxvd2x vs27,r5,r3
         addi    r5,r5,16
-       stxvx   vs28,r5,r3
+       stxvd2x vs28,r5,r3
         addi    r5,r5,16
-       stxvx   vs29,r5,r3
+       stxvd2x vs29,r5,r3
         addi    r5,r5,16
-       stxvx   vs30,r5,r3
+       stxvd2x vs30,r5,r3
         addi    r5,r5,16
-       stxvx   vs31,r5,r3
+       stxvd2x vs31,r5,r3
         blr
  FUNC_END(store_vsx)
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile

index 4cff7e7ddcc47b80ef30a06a779ea45dae5a5f3e..41642ba5e318a153d805720e47475436817be53e 100644 (file)
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,5 +1,9 @@
  # Makefile for vm selftests
  
+ifndef OUTPUT
+  OUTPUT := $(shell pwd)
+endif
+
  CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
  LDLIBS = -lrt
  TEST_GEN_FILES = compaction_test
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c

index 5b2b4b3c634ca17462730a7d3740fd698540a975..b4967d8752365545149274cf9e70b7717a826866 100644 (file)
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -245,7 +245,7 @@ void do_unexpected_base(void)
                 long ret;
                 asm volatile ("int $0x80"
                               : "=a" (ret) : "a" (243), "b" (low_desc)
-                             : "flags");
+                             : "r8", "r9", "r10", "r11");
                 memcpy(&desc, low_desc, sizeof(desc));
                 munmap(low_desc, sizeof(desc));
  
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c

index 4af47079cf04305cec7e6a8d2aa0960a6fcfe352..f6121612e769f5600d1cc0920037ee4c6ee0bf92 100644 (file)
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -45,6 +45,12 @@
  #define AR_DB                  (1 << 22)
  #define AR_G                   (1 << 23)
  
+#ifdef __x86_64__
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define INT80_CLOBBERS
+#endif
+
  static int nerrs;
  
  /* Points to an array of 1024 ints, each holding its own index. */
@@ -588,7 +594,7 @@ static int invoke_set_thread_area(void)
         asm volatile ("int $0x80"
                       : "=a" (ret), "+m" (low_user_desc) :
                         "a" (243), "b" (low_user_desc)
-                     : "flags");
+                     : INT80_CLOBBERS);
         return ret;
  }
  
@@ -657,7 +663,7 @@ static void test_gdt_invalidation(void)
                         "+a" (eax)
                       : "m" (low_user_desc_clear),
                         [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-                     : "flags");
+                     : INT80_CLOBBERS);
  
         if (sel != 0) {
                 result = "FAIL";
@@ -688,7 +694,7 @@ static void test_gdt_invalidation(void)
                         "+a" (eax)
                       : "m" (low_user_desc_clear),
                         [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-                     : "flags");
+                     : INT80_CLOBBERS);
  
         if (sel != 0) {
                 result = "FAIL";
@@ -721,7 +727,7 @@ static void test_gdt_invalidation(void)
                         "+a" (eax)
                       : "m" (low_user_desc_clear),
                         [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-                     : "flags");
+                     : INT80_CLOBBERS);
  
  #ifdef __x86_64__
         syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
@@ -774,7 +780,7 @@ static void test_gdt_invalidation(void)
                         "+a" (eax)
                       : "m" (low_user_desc_clear),
                         [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-                     : "flags");
+                     : INT80_CLOBBERS);
  
  #ifdef __x86_64__
         syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c

index b037ce9cf116b1da0ef57601f8f1840e45fcc775..eaea9243970840dab196cb1ddf84586e30803f0a 100644 (file)
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args)
         asm volatile ("int $0x80"
                       : "+a" (args->nr),
                         "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
-                       "+S" (args->arg3), "+D" (args->arg4), "+r" (bp));
+                       "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)
+                       : : "r8", "r9", "r10", "r11");
         args->arg5 = bp;
  #else
         sys32_helper(args, int80_and_ret);
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c

index 50c26358e8b7ec055000ead54c2c80c69b371a6f..a48da95c18fdf1f0ea46e7cb628ff9a9caba931b 100644 (file)
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps;
  #ifdef __x86_64__
  # define REG_IP REG_RIP
  # define WIDTH "q"
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
  #else
  # define REG_IP REG_EIP
  # define WIDTH "l"
+# define INT80_CLOBBERS
  #endif
  
  static unsigned long get_eflags(void)
@@ -140,7 +142,8 @@ int main()
  
         printf("[RUN]\tSet TF and check int80\n");
         set_eflags(get_eflags() | X86_EFLAGS_TF);
-       asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
+       asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
+                       : INT80_CLOBBERS);
         check_result();
  
         /*
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c

index 276139a24e6fd097f791537c07b7c182717e0693..702f8108608d053d43a6eff18ef8be84c3589994 100644 (file)
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -391,6 +391,25 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
         return IRQ_HANDLED;
  }
  
+/**
+ * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
+ *
+ * For a specific CPU, initialize the GIC VE hardware.
+ */
+void kvm_vgic_init_cpu_hardware(void)
+{
+       BUG_ON(preemptible());
+
+       /*
+        * We want to make sure the list registers start out clear so that we
+        * only have the program the used registers.
+        */
+       if (kvm_vgic_global_state.type == VGIC_V2)
+               vgic_v2_init_lrs();
+       else
+               kvm_call_hyp(__vgic_v3_init_lrs);
+}
+
  /**
   * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
   * according to the host GIC model. Accordingly calls either
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c

index 571b64a01c509741146e5e2263d5042457a2e14c..8d1da1af4b09e47c174cf7151b37b98666f03953 100644 (file)
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
         return ret;
  }
  
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
-                                            struct vgic_its *its,
-                                            gpa_t addr, unsigned int len)
-{
-       u32 reg = 0;
-
-       mutex_lock(&its->cmd_lock);
-       if (its->creadr == its->cwriter)
-               reg |= GITS_CTLR_QUIESCENT;
-       if (its->enabled)
-               reg |= GITS_CTLR_ENABLE;
-       mutex_unlock(&its->cmd_lock);
-
-       return reg;
-}
-
-static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
-                                    gpa_t addr, unsigned int len,
-                                    unsigned long val)
-{
-       its->enabled = !!(val & GITS_CTLR_ENABLE);
-}
-
  static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
                                               struct vgic_its *its,
                                               gpa_t addr, unsigned int len)
@@ -1161,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
  #define ITS_CMD_SIZE                   32
  #define ITS_CMD_OFFSET(reg)            ((reg) & GENMASK(19, 5))
  
-/*
- * By writing to CWRITER the guest announces new commands to be processed.
- * To avoid any races in the first place, we take the its_cmd lock, which
- * protects our ring buffer variables, so that there is only one user
- * per ITS handling commands at a given time.
- */
-static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
-                                       gpa_t addr, unsigned int len,
-                                       unsigned long val)
+/* Must be called with the cmd_lock held. */
+static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
  {
         gpa_t cbaser;
         u64 cmd_buf[4];
-       u32 reg;
  
-       if (!its)
-               return;
-
-       mutex_lock(&its->cmd_lock);
-
-       reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
-       reg = ITS_CMD_OFFSET(reg);
-       if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
-               mutex_unlock(&its->cmd_lock);
+       /* Commands are only processed when the ITS is enabled. */
+       if (!its->enabled)
                 return;
-       }
  
-       its->cwriter = reg;
         cbaser = CBASER_ADDRESS(its->cbaser);
  
         while (its->cwriter != its->creadr) {
@@ -1207,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
                 if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
                         its->creadr = 0;
         }
+}
+
+/*
+ * By writing to CWRITER the guest announces new commands to be processed.
+ * To avoid any races in the first place, we take the its_cmd lock, which
+ * protects our ring buffer variables, so that there is only one user
+ * per ITS handling commands at a given time.
+ */
+static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
+                                       gpa_t addr, unsigned int len,
+                                       unsigned long val)
+{
+       u64 reg;
+
+       if (!its)
+               return;
+
+       mutex_lock(&its->cmd_lock);
+
+       reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
+       reg = ITS_CMD_OFFSET(reg);
+       if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+               mutex_unlock(&its->cmd_lock);
+               return;
+       }
+       its->cwriter = reg;
+
+       vgic_its_process_commands(kvm, its);
  
         mutex_unlock(&its->cmd_lock);
  }
@@ -1287,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
         *regptr = reg;
  }
  
+static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
+                                            struct vgic_its *its,
+                                            gpa_t addr, unsigned int len)
+{
+       u32 reg = 0;
+
+       mutex_lock(&its->cmd_lock);
+       if (its->creadr == its->cwriter)
+               reg |= GITS_CTLR_QUIESCENT;
+       if (its->enabled)
+               reg |= GITS_CTLR_ENABLE;
+       mutex_unlock(&its->cmd_lock);
+
+       return reg;
+}
+
+static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
+                                    gpa_t addr, unsigned int len,
+                                    unsigned long val)
+{
+       mutex_lock(&its->cmd_lock);
+
+       its->enabled = !!(val & GITS_CTLR_ENABLE);
+
+       /*
+        * Try to process any pending commands. This function bails out early
+        * if the ITS is disabled or no commands have been queued.
+        */
+       vgic_its_process_commands(kvm, its);
+
+       mutex_unlock(&its->cmd_lock);
+}
+
  #define REGISTER_ITS_DESC(off, rd, wr, length, acc)            \
  {                                                              \
         .reg_offset = off,                                      \
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c

index a3ad7ff95c9b3ba95e80863d8e16a2f05fba8071..0a4283ed9aa735e55e476bdea0a19ed159952646 100644 (file)
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -229,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
                 val = vmcr.ctlr;
                 break;
         case GIC_CPU_PRIMASK:
-               val = vmcr.pmr;
+               /*
+                * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
+                * the PMR field as GICH_VMCR.VMPriMask rather than
+                * GICC_PMR.Priority, so we expose the upper five bits of
+                * priority mask to userspace using the lower bits in the
+                * unsigned long.
+                */
+               val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >>
+                       GICV_PMR_PRIORITY_SHIFT;
                 break;
         case GIC_CPU_BINPOINT:
                 val = vmcr.bpr;
@@ -262,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
                 vmcr.ctlr = val;
                 break;
         case GIC_CPU_PRIMASK:
-               vmcr.pmr = val;
+               /*
+                * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
+                * the PMR field as GICH_VMCR.VMPriMask rather than
+                * GICC_PMR.Priority, so we expose the upper five bits of
+                * priority mask to userspace using the lower bits in the
+                * unsigned long.
+                */
+               vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) &
+                       GICV_PMR_PRIORITY_MASK;
                 break;
         case GIC_CPU_BINPOINT:
                 vmcr.bpr = val;
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c

index 3654b4c835ef733c8f1255137849b253b71c1659..2a5db135272215d5c9d4bfa544b7d3ed11a9b9c3 100644 (file)
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -180,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
  static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
                                     bool new_active_state)
  {
+       struct kvm_vcpu *requester_vcpu;
         spin_lock(&irq->irq_lock);
+
+       /*
+        * The vcpu parameter here can mean multiple things depending on how
+        * this function is called; when handling a trap from the kernel it
+        * depends on the GIC version, and these functions are also called as
+        * part of save/restore from userspace.
+        *
+        * Therefore, we have to figure out the requester in a reliable way.
+        *
+        * When accessing VGIC state from user space, the requester_vcpu is
+        * NULL, which is fine, because we guarantee that no VCPUs are running
+        * when accessing VGIC state from user space so irq->vcpu->cpu is
+        * always -1.
+        */
+       requester_vcpu = kvm_arm_get_running_vcpu();
+
         /*
          * If this virtual IRQ was written into a list register, we
          * have to make sure the CPU that runs the VCPU thread has
-        * synced back LR state to the struct vgic_irq.  We can only
-        * know this for sure, when either this irq is not assigned to
-        * anyone's AP list anymore, or the VCPU thread is not
-        * running on any CPUs.
+        * synced back the LR state to the struct vgic_irq.
          *
-        * In the opposite case, we know the VCPU thread may be on its
-        * way back from the guest and still has to sync back this
-        * IRQ, so we release and re-acquire the spin_lock to let the
-        * other thread sync back the IRQ.
+        * As long as the conditions below are true, we know the VCPU thread
+        * may be on its way back from the guest (we kicked the VCPU thread in
+        * vgic_change_active_prepare)  and still has to sync back this IRQ,
+        * so we release and re-acquire the spin_lock to let the other thread
+        * sync back the IRQ.
          */
         while (irq->vcpu && /* IRQ may have state in an LR somewhere */
+              irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
                irq->vcpu->cpu != -1) /* VCPU thread is running */
                 cond_resched_lock(&irq->irq_lock);
  
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c

index b834ecdf322503c09bffc58c7af5609cd4b71e43..b637d9c7afe3ff51b9e8dfdcd947ee37ef2df029 100644 (file)
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -36,6 +36,21 @@ static unsigned long *u64_to_bitmask(u64 *val)
         return (unsigned long *)val;
  }
  
+static inline void vgic_v2_write_lr(int lr, u32 val)
+{
+       void __iomem *base = kvm_vgic_global_state.vctrl_base;
+
+       writel_relaxed(val, base + GICH_LR0 + (lr * 4));
+}
+
+void vgic_v2_init_lrs(void)
+{
+       int i;
+
+       for (i = 0; i < kvm_vgic_global_state.nr_lr; i++)
+               vgic_v2_write_lr(i, 0);
+}
+
  void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
  {
         struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
@@ -191,8 +206,8 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
                 GICH_VMCR_ALIAS_BINPOINT_MASK;
         vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
                 GICH_VMCR_BINPOINT_MASK;
-       vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
-               GICH_VMCR_PRIMASK_MASK;
+       vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) <<
+                GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
  
         vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
  }
@@ -207,8 +222,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
                         GICH_VMCR_ALIAS_BINPOINT_SHIFT;
         vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
                         GICH_VMCR_BINPOINT_SHIFT;
-       vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >>
-                       GICH_VMCR_PRIMASK_SHIFT;
+       vmcrp->pmr  = ((vmcr & GICH_VMCR_PRIMASK_MASK) >>
+                       GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT;
  }
  
  void vgic_v2_enable(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c

index edc6ee2dc852e9fb0f425e44e741434a71983731..be0f4c3e0142e04216cb28e1f965487d52d0b4c9 100644 (file)
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -229,10 +229,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
         /*
          * If we are emulating a GICv3, we do it in an non-GICv2-compatible
          * way, so we force SRE to 1 to demonstrate this to the guest.
+        * Also, we don't support any form of IRQ/FIQ bypass.
          * This goes with the spec allowing the value to be RAO/WI.
          */
         if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
-               vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
+               vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
+                                    ICC_SRE_EL1_DFB |
+                                    ICC_SRE_EL1_SRE);
                 vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
         } else {
                 vgic_v3->vgic_sre = 0;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h

index db28f7cadab28b5859ce58dcd35eb36b6632e60f..6cf557e9f71807f05017d10af8074bd4416c151c 100644 (file)
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -81,11 +81,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
                 return irq->pending_latch || irq->line_level;
  }
  
+/*
+ * This struct provides an intermediate representation of the fields contained
+ * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
+ * state to userspace can generate either GICv2 or GICv3 CPU interface
+ * registers regardless of the hardware backed GIC used.
+ */
  struct vgic_vmcr {
         u32     ctlr;
         u32     abpr;
         u32     bpr;
-       u32     pmr;
+       u32     pmr;  /* Priority mask field in the GICC_PMR and
+                      * ICC_PMR_EL1 priority field format */
         /* Below member variable are valid only for GICv3 */
         u32     grpen0;
         u32     grpen1;
@@ -130,6 +137,8 @@ int vgic_v2_map_resources(struct kvm *kvm);
  int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
                              enum vgic_type);
  
+void vgic_v2_init_lrs(void);
+
  static inline void vgic_get_irq_kref(struct vgic_irq *irq)
  {
         if (irq->intid < VGIC_MIN_LPI)
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c

index a29786dd95221017b141a060b031c5c899dac2e5..4d28a9ddbee01077fea01beeeae5523917822da9 100644 (file)
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
                         continue;
  
                 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
-               kvm->buses[bus_idx]->ioeventfd_count--;
+               if (kvm->buses[bus_idx])
+                       kvm->buses[bus_idx]->ioeventfd_count--;
                 ioeventfd_release(p);
                 ret = 0;
                 break;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index a17d78759727f352991a97b4c2bed21266657760..88257b311cb579b5b720330456f99fbec97a58ac 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -727,8 +727,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
         list_del(&kvm->vm_list);
         spin_unlock(&kvm_lock);
         kvm_free_irq_routing(kvm);
-       for (i = 0; i < KVM_NR_BUSES; i++)
-               kvm_io_bus_destroy(kvm->buses[i]);
+       for (i = 0; i < KVM_NR_BUSES; i++) {
+               if (kvm->buses[i])
+                       kvm_io_bus_destroy(kvm->buses[i]);
+               kvm->buses[i] = NULL;
+       }
         kvm_coalesced_mmio_free(kvm);
  #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
         mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -1062,7 +1065,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
          * changes) is disallowed above, so any other attribute changes getting
          * here can be skipped.
          */
-       if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+       if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) {
                 r = kvm_iommu_map_pages(kvm, &new);
                 return r;
         }
@@ -3474,6 +3477,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
         };
  
         bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+       if (!bus)
+               return -ENOMEM;
         r = __kvm_io_bus_write(vcpu, bus, &range, val);
         return r < 0 ? r : 0;
  }
@@ -3491,6 +3496,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
         };
  
         bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+       if (!bus)
+               return -ENOMEM;
  
         /* First try the device referenced by cookie. */
         if ((cookie >= 0) && (cookie < bus->dev_count) &&
@@ -3541,6 +3548,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
         };
  
         bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+       if (!bus)
+               return -ENOMEM;
         r = __kvm_io_bus_read(vcpu, bus, &range, val);
         return r < 0 ? r : 0;
  }
@@ -3553,6 +3562,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
         struct kvm_io_bus *new_bus, *bus;
  
         bus = kvm->buses[bus_idx];
+       if (!bus)
+               return -ENOMEM;
+
         /* exclude ioeventfd which is limited by maximum fd */
         if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
                 return -ENOSPC;
@@ -3572,37 +3584,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
  }
  
  /* Caller must hold slots_lock. */
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-                             struct kvm_io_device *dev)
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+                              struct kvm_io_device *dev)
  {
-       int i, r;
+       int i;
         struct kvm_io_bus *new_bus, *bus;
  
         bus = kvm->buses[bus_idx];
-       r = -ENOENT;
+       if (!bus)
+               return;
+
         for (i = 0; i < bus->dev_count; i++)
                 if (bus->range[i].dev == dev) {
-                       r = 0;
                         break;
                 }
  
-       if (r)
-               return r;
+       if (i == bus->dev_count)
+               return;
  
         new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
                           sizeof(struct kvm_io_range)), GFP_KERNEL);
-       if (!new_bus)
-               return -ENOMEM;
+       if (!new_bus)  {
+               pr_err("kvm: failed to shrink bus, removing it completely\n");
+               goto broken;
+       }
  
         memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
         new_bus->dev_count--;
         memcpy(new_bus->range + i, bus->range + i + 1,
                (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
  
+broken:
         rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
         synchronize_srcu_expedited(&kvm->srcu);
         kfree(bus);
-       return r;
+       return;
  }
  
  struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -3615,6 +3631,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
         srcu_idx = srcu_read_lock(&kvm->srcu);
  
         bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+       if (!bus)
+               goto out_unlock;
  
         dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
         if (dev_idx < 0)
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 1 May 2017 20:12:49 +0000 (13:12 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 1 May 2017 20:12:49 +0000 (13:12 -0700)