BLOCK LAYER
P: Jens Axboe
-M: axboe@suse.de
+M: axboe@kernel.dk
L: linux-kernel@vger.kernel.org
T: git kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
S: Maintained
IDE/ATAPI CDROM DRIVER
P: Jens Axboe
-M: axboe@suse.de
+M: axboe@kernel.dk
L: linux-kernel@vger.kernel.org
W: http://www.kernel.dk
S: Maintained
SCSI CDROM DRIVER
P: Jens Axboe
-M: axboe@suse.de
+M: axboe@kernel.dk
L: linux-scsi@vger.kernel.org
W: http://www.kernel.dk
S: Maintained
UNIFORM CDROM DRIVER
P: Jens Axboe
-M: axboe@suse.de
+M: axboe@kernel.dk
L: linux-kernel@vger.kernel.org
W: http://www.kernel.dk
S: Maintained
#include "signal-common.h"
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
/*
* Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
*/
#endif
};
-extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat);
-
save_static_function(sysn32_rt_sigsuspend);
__attribute_used__ noinline static int
_sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs)
__u64 offset;
int len;
- if(req->rq_status == RQ_INACTIVE) return(1);
-
/* This should be impossible now */
if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
printk("Write attempted on readonly ubd device %s\n",
#
# Block layer core configuration
#
+config BLOCK
+ bool "Enable the block layer"
+ default y
+ help
+ This permits the block layer to be removed from the kernel if it's not
+ needed (on some embedded devices for example). If this option is
+ disabled, then blockdev files will become unusable and some
+ filesystems (such as ext3) will become unavailable.
+
+ This option will also disable SCSI character devices and USB storage
+ since they make use of various block layer definitions and
+ facilities.
+
+ Say Y here unless you know you really don't want to mount disks and
+ suchlike.
+
+if BLOCK
+
#XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64
#for instance.
config LBD
If unsure, say Y.
+endif
+
source block/Kconfig.iosched
+if BLOCK
menu "IO Schedulers"
default "noop" if DEFAULT_NOOP
endmenu
+
+endif
# Makefile for the kernel block layer
#
-obj-y := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
+obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
obj-$(CONFIG_IOSCHED_AS) += as-iosched.o
/*
* Anticipatory & deadline i/o scheduler.
*
- * Copyright (C) 2002 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
* Nick Piggin <nickpiggin@yahoo.com.au>
*
*/
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
-#include <linux/hash.h>
#include <linux/rbtree.h>
#include <linux/interrupt.h>
struct rb_root sort_list[2];
struct list_head fifo_list[2];
- struct as_rq *next_arq[2]; /* next in sort order */
+ struct request *next_rq[2]; /* next in sort order */
sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
- struct hlist_head *hash; /* request hash */
unsigned long exit_prob; /* probability a task will exit while
being waited on */
int write_batch_count; /* max # of reqs in a write batch */
int current_write_count; /* how many requests left this batch */
int write_batch_idled; /* has the write batch gone idle? */
- mempool_t *arq_pool;
enum anticipation_status antic_status;
unsigned long antic_start; /* jiffies: when it started */
unsigned long antic_expire;
};
-#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo)
-
/*
* per-request data.
*/
AS_RQ_POSTSCHED, /* when they shouldn't be */
};
-struct as_rq {
- /*
- * rbtree index, key is the starting offset
- */
- struct rb_node rb_node;
- sector_t rb_key;
-
- struct request *request;
-
- struct io_context *io_context; /* The submitting task */
-
- /*
- * request hash, key is the ending offset (for back merge lookup)
- */
- struct hlist_node hash;
-
- /*
- * expire fifo
- */
- struct list_head fifo;
- unsigned long expires;
+#define RQ_IOC(rq) ((struct io_context *) (rq)->elevator_private)
+#define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2)
+#define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state)
- unsigned int is_sync;
- enum arq_state state;
-};
-
-#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private)
-
-static kmem_cache_t *arq_pool;
-
-static atomic_t ioc_count = ATOMIC_INIT(0);
+static DEFINE_PER_CPU(unsigned long, ioc_count);
static struct completion *ioc_gone;
-static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq);
+static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
static void as_antic_stop(struct as_data *ad);
/*
static void free_as_io_context(struct as_io_context *aic)
{
kfree(aic);
- if (atomic_dec_and_test(&ioc_count) && ioc_gone)
+ elv_ioc_count_dec(ioc_count);
+ if (ioc_gone && !elv_ioc_count_read(ioc_count))
complete(ioc_gone);
}
ret->seek_total = 0;
ret->seek_samples = 0;
ret->seek_mean = 0;
- atomic_inc(&ioc_count);
+ elv_ioc_count_inc(ioc_count);
}
return ret;
* If the current task has no AS IO context then create one and initialise it.
* Then take a ref on the task's io context and return it.
*/
-static struct io_context *as_get_io_context(void)
+static struct io_context *as_get_io_context(int node)
{
- struct io_context *ioc = get_io_context(GFP_ATOMIC);
+ struct io_context *ioc = get_io_context(GFP_ATOMIC, node);
if (ioc && !ioc->aic) {
ioc->aic = alloc_as_io_context();
if (!ioc->aic) {
return ioc;
}
-static void as_put_io_context(struct as_rq *arq)
+static void as_put_io_context(struct request *rq)
{
struct as_io_context *aic;
- if (unlikely(!arq->io_context))
+ if (unlikely(!RQ_IOC(rq)))
return;
- aic = arq->io_context->aic;
+ aic = RQ_IOC(rq)->aic;
- if (arq->is_sync == REQ_SYNC && aic) {
+ if (rq_is_sync(rq) && aic) {
spin_lock(&aic->lock);
set_bit(AS_TASK_IORUNNING, &aic->state);
aic->last_end_request = jiffies;
spin_unlock(&aic->lock);
}
- put_io_context(arq->io_context);
-}
-
-/*
- * the back merge hash support functions
- */
-static const int as_hash_shift = 6;
-#define AS_HASH_BLOCK(sec) ((sec) >> 3)
-#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
-#define AS_HASH_ENTRIES (1 << as_hash_shift)
-#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
-
-static inline void __as_del_arq_hash(struct as_rq *arq)
-{
- hlist_del_init(&arq->hash);
-}
-
-static inline void as_del_arq_hash(struct as_rq *arq)
-{
- if (!hlist_unhashed(&arq->hash))
- __as_del_arq_hash(arq);
-}
-
-static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
- struct request *rq = arq->request;
-
- BUG_ON(!hlist_unhashed(&arq->hash));
-
- hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
-}
-
-/*
- * move hot entry to front of chain
- */
-static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
- struct request *rq = arq->request;
- struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
-
- if (hlist_unhashed(&arq->hash)) {
- WARN_ON(1);
- return;
- }
-
- if (&arq->hash != head->first) {
- hlist_del(&arq->hash);
- hlist_add_head(&arq->hash, head);
- }
-}
-
-static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
-{
- struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
- struct hlist_node *entry, *next;
- struct as_rq *arq;
-
- hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) {
- struct request *__rq = arq->request;
-
- BUG_ON(hlist_unhashed(&arq->hash));
-
- if (!rq_mergeable(__rq)) {
- as_del_arq_hash(arq);
- continue;
- }
-
- if (rq_hash_key(__rq) == offset)
- return __rq;
- }
-
- return NULL;
+ put_io_context(RQ_IOC(rq));
}
/*
* rb tree support functions
*/
-#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node)
-#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync])
-#define rq_rb_key(rq) (rq)->sector
-
-/*
- * as_find_first_arq finds the first (lowest sector numbered) request
- * for the specified data_dir. Used to sweep back to the start of the disk
- * (1-way elevator) after we process the last (highest sector) request.
- */
-static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir)
-{
- struct rb_node *n = ad->sort_list[data_dir].rb_node;
-
- if (n == NULL)
- return NULL;
-
- for (;;) {
- if (n->rb_left == NULL)
- return rb_entry_arq(n);
-
- n = n->rb_left;
- }
-}
-
-/*
- * Add the request to the rb tree if it is unique. If there is an alias (an
- * existing request against the same sector), which can happen when using
- * direct IO, then return the alias.
- */
-static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
-{
- struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
- struct rb_node *parent = NULL;
- struct as_rq *__arq;
- struct request *rq = arq->request;
-
- arq->rb_key = rq_rb_key(rq);
-
- while (*p) {
- parent = *p;
- __arq = rb_entry_arq(parent);
-
- if (arq->rb_key < __arq->rb_key)
- p = &(*p)->rb_left;
- else if (arq->rb_key > __arq->rb_key)
- p = &(*p)->rb_right;
- else
- return __arq;
- }
-
- rb_link_node(&arq->rb_node, parent, p);
- rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
-
- return NULL;
-}
+#define RQ_RB_ROOT(ad, rq) (&(ad)->sort_list[rq_is_sync((rq))])
-static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
+static void as_add_rq_rb(struct as_data *ad, struct request *rq)
{
- struct as_rq *alias;
+ struct request *alias;
- while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) {
+ while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) {
as_move_to_dispatch(ad, alias);
as_antic_stop(ad);
}
}
-static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)
-{
- if (!RB_EMPTY_NODE(&arq->rb_node)) {
- WARN_ON(1);
- return;
- }
-
- rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
- RB_CLEAR_NODE(&arq->rb_node);
-}
-
-static struct request *
-as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
+static inline void as_del_rq_rb(struct as_data *ad, struct request *rq)
{
- struct rb_node *n = ad->sort_list[data_dir].rb_node;
- struct as_rq *arq;
-
- while (n) {
- arq = rb_entry_arq(n);
-
- if (sector < arq->rb_key)
- n = n->rb_left;
- else if (sector > arq->rb_key)
- n = n->rb_right;
- else
- return arq->request;
- }
-
- return NULL;
+ elv_rb_del(RQ_RB_ROOT(ad, rq), rq);
}
/*
* as_choose_req selects the preferred one of two requests of the same data_dir
* ignoring time - eg. timeouts, which is the job of as_dispatch_request
*/
-static struct as_rq *
-as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2)
+static struct request *
+as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2)
{
int data_dir;
sector_t last, s1, s2, d1, d2;
int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */
const sector_t maxback = MAXBACK;
- if (arq1 == NULL || arq1 == arq2)
- return arq2;
- if (arq2 == NULL)
- return arq1;
+ if (rq1 == NULL || rq1 == rq2)
+ return rq2;
+ if (rq2 == NULL)
+ return rq1;
- data_dir = arq1->is_sync;
+ data_dir = rq_is_sync(rq1);
last = ad->last_sector[data_dir];
- s1 = arq1->request->sector;
- s2 = arq2->request->sector;
+ s1 = rq1->sector;
+ s2 = rq2->sector;
- BUG_ON(data_dir != arq2->is_sync);
+ BUG_ON(data_dir != rq_is_sync(rq2));
/*
* Strict one way elevator _except_ in the case where we allow
/* Found required data */
if (!r1_wrap && r2_wrap)
- return arq1;
+ return rq1;
else if (!r2_wrap && r1_wrap)
- return arq2;
+ return rq2;
else if (r1_wrap && r2_wrap) {
/* both behind the head */
if (s1 <= s2)
- return arq1;
+ return rq1;
else
- return arq2;
+ return rq2;
}
/* Both requests in front of the head */
if (d1 < d2)
- return arq1;
+ return rq1;
else if (d2 < d1)
- return arq2;
+ return rq2;
else {
if (s1 >= s2)
- return arq1;
+ return rq1;
else
- return arq2;
+ return rq2;
}
}
/*
- * as_find_next_arq finds the next request after @prev in elevator order.
+ * as_find_next_rq finds the next request after @prev in elevator order.
* this with as_choose_req form the basis for how the scheduler chooses
* what request to process next. Anticipation works on top of this.
*/
-static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last)
+static struct request *
+as_find_next_rq(struct as_data *ad, struct request *last)
{
- const int data_dir = last->is_sync;
- struct as_rq *ret;
struct rb_node *rbnext = rb_next(&last->rb_node);
struct rb_node *rbprev = rb_prev(&last->rb_node);
- struct as_rq *arq_next, *arq_prev;
+ struct request *next = NULL, *prev = NULL;
- BUG_ON(!RB_EMPTY_NODE(&last->rb_node));
+ BUG_ON(RB_EMPTY_NODE(&last->rb_node));
if (rbprev)
- arq_prev = rb_entry_arq(rbprev);
- else
- arq_prev = NULL;
+ prev = rb_entry_rq(rbprev);
if (rbnext)
- arq_next = rb_entry_arq(rbnext);
+ next = rb_entry_rq(rbnext);
else {
- arq_next = as_find_first_arq(ad, data_dir);
- if (arq_next == last)
- arq_next = NULL;
- }
+ const int data_dir = rq_is_sync(last);
- ret = as_choose_req(ad, arq_next, arq_prev);
+ rbnext = rb_first(&ad->sort_list[data_dir]);
+ if (rbnext && rbnext != &last->rb_node)
+ next = rb_entry_rq(rbnext);
+ }
- return ret;
+ return as_choose_req(ad, next, prev);
}
/*
static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
struct request *rq)
{
- struct as_rq *arq = RQ_DATA(rq);
- int data_dir = arq->is_sync;
+ int data_dir = rq_is_sync(rq);
unsigned long thinktime = 0;
sector_t seek_dist;
* previous one issued.
*/
static int as_close_req(struct as_data *ad, struct as_io_context *aic,
- struct as_rq *arq)
+ struct request *rq)
{
unsigned long delay; /* milliseconds */
sector_t last = ad->last_sector[ad->batch_data_dir];
- sector_t next = arq->request->sector;
+ sector_t next = rq->sector;
sector_t delta; /* acceptable close offset (in sectors) */
sector_t s;
*
* If this task has queued some other IO, do not enter enticipation.
*/
-static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
+static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
{
struct io_context *ioc;
struct as_io_context *aic;
ioc = ad->io_context;
BUG_ON(!ioc);
- if (arq && ioc == arq->io_context) {
+ if (rq && ioc == RQ_IOC(rq)) {
/* request from same process */
return 1;
}
return 1;
}
- if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) {
+ if (rq && rq_is_sync(rq) && as_close_req(ad, aic, rq)) {
/*
* Found a close request that is not one of ours.
*
ad->exit_no_coop = (7*ad->exit_no_coop)/8;
}
- as_update_iohist(ad, aic, arq->request);
+ as_update_iohist(ad, aic, rq);
return 1;
}
}
/*
- * as_can_anticipate indicates whether we should either run arq
+ * as_can_anticipate indicates whether we should either run rq
* or keep anticipating a better request.
*/
-static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
+static int as_can_anticipate(struct as_data *ad, struct request *rq)
{
if (!ad->io_context)
/*
*/
return 0;
- if (as_can_break_anticipation(ad, arq))
+ if (as_can_break_anticipation(ad, rq))
/*
* This request is a good candidate. Don't keep anticipating,
* run it.
}
/*
- * as_update_arq must be called whenever a request (arq) is added to
+ * as_update_rq must be called whenever a request (rq) is added to
* the sort_list. This function keeps caches up to date, and checks if the
* request might be one we are "anticipating"
*/
-static void as_update_arq(struct as_data *ad, struct as_rq *arq)
+static void as_update_rq(struct as_data *ad, struct request *rq)
{
- const int data_dir = arq->is_sync;
+ const int data_dir = rq_is_sync(rq);
- /* keep the next_arq cache up to date */
- ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]);
+ /* keep the next_rq cache up to date */
+ ad->next_rq[data_dir] = as_choose_req(ad, rq, ad->next_rq[data_dir]);
/*
* have we been anticipating this request?
*/
if (ad->antic_status == ANTIC_WAIT_REQ
|| ad->antic_status == ANTIC_WAIT_NEXT) {
- if (as_can_break_anticipation(ad, arq))
+ if (as_can_break_anticipation(ad, rq))
as_antic_stop(ad);
}
}
static void as_completed_request(request_queue_t *q, struct request *rq)
{
struct as_data *ad = q->elevator->elevator_data;
- struct as_rq *arq = RQ_DATA(rq);
WARN_ON(!list_empty(&rq->queuelist));
- if (arq->state != AS_RQ_REMOVED) {
- printk("arq->state %d\n", arq->state);
+ if (RQ_STATE(rq) != AS_RQ_REMOVED) {
+ printk("rq->state %d\n", RQ_STATE(rq));
WARN_ON(1);
goto out;
}
* actually serviced. This should help devices with big TCQ windows
* and writeback caches
*/
- if (ad->new_batch && ad->batch_data_dir == arq->is_sync) {
+ if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
update_write_batch(ad);
ad->current_batch_expires = jiffies +
ad->batch_expire[REQ_SYNC];
ad->new_batch = 0;
}
- if (ad->io_context == arq->io_context && ad->io_context) {
+ if (ad->io_context == RQ_IOC(rq) && ad->io_context) {
ad->antic_start = jiffies;
ad->ioc_finished = 1;
if (ad->antic_status == ANTIC_WAIT_REQ) {
}
}
- as_put_io_context(arq);
+ as_put_io_context(rq);
out:
- arq->state = AS_RQ_POSTSCHED;
+ RQ_SET_STATE(rq, AS_RQ_POSTSCHED);
}
/*
*/
static void as_remove_queued_request(request_queue_t *q, struct request *rq)
{
- struct as_rq *arq = RQ_DATA(rq);
- const int data_dir = arq->is_sync;
+ const int data_dir = rq_is_sync(rq);
struct as_data *ad = q->elevator->elevator_data;
+ struct io_context *ioc;
- WARN_ON(arq->state != AS_RQ_QUEUED);
+ WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
- if (arq->io_context && arq->io_context->aic) {
- BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued));
- atomic_dec(&arq->io_context->aic->nr_queued);
+ ioc = RQ_IOC(rq);
+ if (ioc && ioc->aic) {
+ BUG_ON(!atomic_read(&ioc->aic->nr_queued));
+ atomic_dec(&ioc->aic->nr_queued);
}
/*
- * Update the "next_arq" cache if we are about to remove its
+ * Update the "next_rq" cache if we are about to remove its
* entry
*/
- if (ad->next_arq[data_dir] == arq)
- ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
+ if (ad->next_rq[data_dir] == rq)
+ ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
- list_del_init(&arq->fifo);
- as_del_arq_hash(arq);
- as_del_arq_rb(ad, arq);
+ rq_fifo_clear(rq);
+ as_del_rq_rb(ad, rq);
}
/*
*/
static int as_fifo_expired(struct as_data *ad, int adir)
{
- struct as_rq *arq;
+ struct request *rq;
long delta_jif;
delta_jif = jiffies - ad->last_check_fifo[adir];
if (list_empty(&ad->fifo_list[adir]))
return 0;
- arq = list_entry_fifo(ad->fifo_list[adir].next);
+ rq = rq_entry_fifo(ad->fifo_list[adir].next);
- return time_after(jiffies, arq->expires);
+ return time_after(jiffies, rq_fifo_time(rq));
}
/*
/*
* move an entry to dispatch queue
*/
-static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
+static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
{
- struct request *rq = arq->request;
- const int data_dir = arq->is_sync;
+ const int data_dir = rq_is_sync(rq);
- BUG_ON(!RB_EMPTY_NODE(&arq->rb_node));
+ BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
as_antic_stop(ad);
ad->antic_status = ANTIC_OFF;
/*
* This has to be set in order to be correctly updated by
- * as_find_next_arq
+ * as_find_next_rq
*/
ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
if (data_dir == REQ_SYNC) {
+ struct io_context *ioc = RQ_IOC(rq);
/* In case we have to anticipate after this */
- copy_io_context(&ad->io_context, &arq->io_context);
+ copy_io_context(&ad->io_context, &ioc);
} else {
if (ad->io_context) {
put_io_context(ad->io_context);
}
ad->ioc_finished = 0;
- ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
+ ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
/*
* take it off the sort and fifo list, add to dispatch queue
*/
as_remove_queued_request(ad->q, rq);
- WARN_ON(arq->state != AS_RQ_QUEUED);
+ WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
elv_dispatch_sort(ad->q, rq);
- arq->state = AS_RQ_DISPATCHED;
- if (arq->io_context && arq->io_context->aic)
- atomic_inc(&arq->io_context->aic->nr_dispatched);
+ RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
+ if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
+ atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
ad->nr_dispatched++;
}
static int as_dispatch_request(request_queue_t *q, int force)
{
struct as_data *ad = q->elevator->elevator_data;
- struct as_rq *arq;
const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
+ struct request *rq;
if (unlikely(force)) {
/*
ad->changed_batch = 0;
ad->new_batch = 0;
- while (ad->next_arq[REQ_SYNC]) {
- as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
+ while (ad->next_rq[REQ_SYNC]) {
+ as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]);
dispatched++;
}
ad->last_check_fifo[REQ_SYNC] = jiffies;
- while (ad->next_arq[REQ_ASYNC]) {
- as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
+ while (ad->next_rq[REQ_ASYNC]) {
+ as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]);
dispatched++;
}
ad->last_check_fifo[REQ_ASYNC] = jiffies;
/*
* batch is still running or no reads or no writes
*/
- arq = ad->next_arq[ad->batch_data_dir];
+ rq = ad->next_rq[ad->batch_data_dir];
if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
if (as_fifo_expired(ad, REQ_SYNC))
goto fifo_expired;
- if (as_can_anticipate(ad, arq)) {
+ if (as_can_anticipate(ad, rq)) {
as_antic_waitreq(ad);
return 0;
}
}
- if (arq) {
+ if (rq) {
/* we have a "next request" */
if (reads && !writes)
ad->current_batch_expires =
ad->changed_batch = 1;
}
ad->batch_data_dir = REQ_SYNC;
- arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
+ rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next);
ad->last_check_fifo[ad->batch_data_dir] = jiffies;
goto dispatch_request;
}
ad->batch_data_dir = REQ_ASYNC;
ad->current_write_count = ad->write_batch_count;
ad->write_batch_idled = 0;
- arq = ad->next_arq[ad->batch_data_dir];
+ rq = ad->next_rq[ad->batch_data_dir];
goto dispatch_request;
}
if (as_fifo_expired(ad, ad->batch_data_dir)) {
fifo_expired:
- arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
- BUG_ON(arq == NULL);
+ rq = rq_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
}
if (ad->changed_batch) {
}
/*
- * arq is the selected appropriate request.
+ * rq is the selected appropriate request.
*/
- as_move_to_dispatch(ad, arq);
+ as_move_to_dispatch(ad, rq);
return 1;
}
/*
- * add arq to rbtree and fifo
+ * add rq to rbtree and fifo
*/
static void as_add_request(request_queue_t *q, struct request *rq)
{
struct as_data *ad = q->elevator->elevator_data;
- struct as_rq *arq = RQ_DATA(rq);
int data_dir;
- arq->state = AS_RQ_NEW;
+ RQ_SET_STATE(rq, AS_RQ_NEW);
- if (rq_data_dir(arq->request) == READ
- || (arq->request->flags & REQ_RW_SYNC))
- arq->is_sync = 1;
- else
- arq->is_sync = 0;
- data_dir = arq->is_sync;
+ data_dir = rq_is_sync(rq);
- arq->io_context = as_get_io_context();
+ rq->elevator_private = as_get_io_context(q->node);
- if (arq->io_context) {
- as_update_iohist(ad, arq->io_context->aic, arq->request);
- atomic_inc(&arq->io_context->aic->nr_queued);
+ if (RQ_IOC(rq)) {
+ as_update_iohist(ad, RQ_IOC(rq)->aic, rq);
+ atomic_inc(&RQ_IOC(rq)->aic->nr_queued);
}
- as_add_arq_rb(ad, arq);
- if (rq_mergeable(arq->request))
- as_add_arq_hash(ad, arq);
+ as_add_rq_rb(ad, rq);
/*
* set expire time (only used for reads) and add to fifo list
*/
- arq->expires = jiffies + ad->fifo_expire[data_dir];
- list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
+ rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]);
+ list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]);
- as_update_arq(ad, arq); /* keep state machine up to date */
- arq->state = AS_RQ_QUEUED;
+ as_update_rq(ad, rq); /* keep state machine up to date */
+ RQ_SET_STATE(rq, AS_RQ_QUEUED);
}
static void as_activate_request(request_queue_t *q, struct request *rq)
{
- struct as_rq *arq = RQ_DATA(rq);
-
- WARN_ON(arq->state != AS_RQ_DISPATCHED);
- arq->state = AS_RQ_REMOVED;
- if (arq->io_context && arq->io_context->aic)
- atomic_dec(&arq->io_context->aic->nr_dispatched);
+ WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED);
+ RQ_SET_STATE(rq, AS_RQ_REMOVED);
+ if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
+ atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched);
}
static void as_deactivate_request(request_queue_t *q, struct request *rq)
{
- struct as_rq *arq = RQ_DATA(rq);
-
- WARN_ON(arq->state != AS_RQ_REMOVED);
- arq->state = AS_RQ_DISPATCHED;
- if (arq->io_context && arq->io_context->aic)
- atomic_inc(&arq->io_context->aic->nr_dispatched);
+ WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED);
+ RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
+ if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
+ atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
}
/*
&& list_empty(&ad->fifo_list[REQ_SYNC]);
}
-static struct request *as_former_request(request_queue_t *q,
- struct request *rq)
-{
- struct as_rq *arq = RQ_DATA(rq);
- struct rb_node *rbprev = rb_prev(&arq->rb_node);
- struct request *ret = NULL;
-
- if (rbprev)
- ret = rb_entry_arq(rbprev)->request;
-
- return ret;
-}
-
-static struct request *as_latter_request(request_queue_t *q,
- struct request *rq)
-{
- struct as_rq *arq = RQ_DATA(rq);
- struct rb_node *rbnext = rb_next(&arq->rb_node);
- struct request *ret = NULL;
-
- if (rbnext)
- ret = rb_entry_arq(rbnext)->request;
-
- return ret;
-}
-
static int
as_merge(request_queue_t *q, struct request **req, struct bio *bio)
{
struct as_data *ad = q->elevator->elevator_data;
sector_t rb_key = bio->bi_sector + bio_sectors(bio);
struct request *__rq;
- int ret;
-
- /*
- * see if the merge hash can satisfy a back merge
- */
- __rq = as_find_arq_hash(ad, bio->bi_sector);
- if (__rq) {
- BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
- if (elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_BACK_MERGE;
- goto out;
- }
- }
/*
* check for front merge
*/
- __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio));
- if (__rq) {
- BUG_ON(rb_key != rq_rb_key(__rq));
-
- if (elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_FRONT_MERGE;
- goto out;
- }
+ __rq = elv_rb_find(&ad->sort_list[bio_data_dir(bio)], rb_key);
+ if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ *req = __rq;
+ return ELEVATOR_FRONT_MERGE;
}
return ELEVATOR_NO_MERGE;
-out:
- if (ret) {
- if (rq_mergeable(__rq))
- as_hot_arq_hash(ad, RQ_DATA(__rq));
- }
- *req = __rq;
- return ret;
}
-static void as_merged_request(request_queue_t *q, struct request *req)
+static void as_merged_request(request_queue_t *q, struct request *req, int type)
{
struct as_data *ad = q->elevator->elevator_data;
- struct as_rq *arq = RQ_DATA(req);
-
- /*
- * hash always needs to be repositioned, key is end sector
- */
- as_del_arq_hash(arq);
- as_add_arq_hash(ad, arq);
/*
* if the merge was a front merge, we need to reposition request
*/
- if (rq_rb_key(req) != arq->rb_key) {
- as_del_arq_rb(ad, arq);
- as_add_arq_rb(ad, arq);
+ if (type == ELEVATOR_FRONT_MERGE) {
+ as_del_rq_rb(ad, req);
+ as_add_rq_rb(ad, req);
/*
* Note! At this stage of this and the next function, our next
* request may not be optimal - eg the request may have "grown"
static void as_merged_requests(request_queue_t *q, struct request *req,
struct request *next)
{
- struct as_data *ad = q->elevator->elevator_data;
- struct as_rq *arq = RQ_DATA(req);
- struct as_rq *anext = RQ_DATA(next);
-
- BUG_ON(!arq);
- BUG_ON(!anext);
-
/*
- * reposition arq (this is the merged request) in hash, and in rbtree
- * in case of a front merge
+ * if next expires before rq, assign its expire time to arq
+ * and move into next position (next will be deleted) in fifo
*/
- as_del_arq_hash(arq);
- as_add_arq_hash(ad, arq);
-
- if (rq_rb_key(req) != arq->rb_key) {
- as_del_arq_rb(ad, arq);
- as_add_arq_rb(ad, arq);
- }
+ if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
+ if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
+ struct io_context *rioc = RQ_IOC(req);
+ struct io_context *nioc = RQ_IOC(next);
- /*
- * if anext expires before arq, assign its expire time to arq
- * and move into anext position (anext will be deleted) in fifo
- */
- if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) {
- if (time_before(anext->expires, arq->expires)) {
- list_move(&arq->fifo, &anext->fifo);
- arq->expires = anext->expires;
+ list_move(&req->queuelist, &next->queuelist);
+ rq_set_fifo_time(req, rq_fifo_time(next));
/*
* Don't copy here but swap, because when anext is
* removed below, it must contain the unused context
*/
- swap_io_context(&arq->io_context, &anext->io_context);
+ swap_io_context(&rioc, &nioc);
}
}
* kill knowledge of next, this one is a goner
*/
as_remove_queued_request(q, next);
- as_put_io_context(anext);
+ as_put_io_context(next);
- anext->state = AS_RQ_MERGED;
+ RQ_SET_STATE(next, AS_RQ_MERGED);
}
/*
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
- if (!as_queue_empty(q))
- q->request_fn(q);
+ blk_start_queueing(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
-static void as_put_request(request_queue_t *q, struct request *rq)
-{
- struct as_data *ad = q->elevator->elevator_data;
- struct as_rq *arq = RQ_DATA(rq);
-
- if (!arq) {
- WARN_ON(1);
- return;
- }
-
- if (unlikely(arq->state != AS_RQ_POSTSCHED &&
- arq->state != AS_RQ_PRESCHED &&
- arq->state != AS_RQ_MERGED)) {
- printk("arq->state %d\n", arq->state);
- WARN_ON(1);
- }
-
- mempool_free(arq, ad->arq_pool);
- rq->elevator_private = NULL;
-}
-
-static int as_set_request(request_queue_t *q, struct request *rq,
- struct bio *bio, gfp_t gfp_mask)
-{
- struct as_data *ad = q->elevator->elevator_data;
- struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
-
- if (arq) {
- memset(arq, 0, sizeof(*arq));
- RB_CLEAR_NODE(&arq->rb_node);
- arq->request = rq;
- arq->state = AS_RQ_PRESCHED;
- arq->io_context = NULL;
- INIT_HLIST_NODE(&arq->hash);
- INIT_LIST_HEAD(&arq->fifo);
- rq->elevator_private = arq;
- return 0;
- }
-
- return 1;
-}
-
-static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
+static int as_may_queue(request_queue_t *q, int rw)
{
int ret = ELV_MQUEUE_MAY;
struct as_data *ad = q->elevator->elevator_data;
struct io_context *ioc;
if (ad->antic_status == ANTIC_WAIT_REQ ||
ad->antic_status == ANTIC_WAIT_NEXT) {
- ioc = as_get_io_context();
+ ioc = as_get_io_context(q->node);
if (ad->io_context == ioc)
ret = ELV_MQUEUE_MUST;
put_io_context(ioc);
BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
- mempool_destroy(ad->arq_pool);
put_io_context(ad->io_context);
- kfree(ad->hash);
kfree(ad);
}
/*
- * initialize elevator private data (as_data), and alloc a arq for
- * each request on the free lists
+ * initialize elevator private data (as_data).
*/
static void *as_init_queue(request_queue_t *q, elevator_t *e)
{
struct as_data *ad;
- int i;
-
- if (!arq_pool)
- return NULL;
ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
if (!ad)
ad->q = q; /* Identify what queue the data belongs to */
- ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES,
- GFP_KERNEL, q->node);
- if (!ad->hash) {
- kfree(ad);
- return NULL;
- }
-
- ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
- mempool_free_slab, arq_pool, q->node);
- if (!ad->arq_pool) {
- kfree(ad->hash);
- kfree(ad);
- return NULL;
- }
-
/* anticipatory scheduling helpers */
ad->antic_timer.function = as_antic_timeout;
ad->antic_timer.data = (unsigned long)q;
init_timer(&ad->antic_timer);
INIT_WORK(&ad->antic_work, as_work_handler, q);
- for (i = 0; i < AS_HASH_ENTRIES; i++)
- INIT_HLIST_HEAD(&ad->hash[i]);
-
INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
ad->sort_list[REQ_SYNC] = RB_ROOT;
.elevator_deactivate_req_fn = as_deactivate_request,
.elevator_queue_empty_fn = as_queue_empty,
.elevator_completed_req_fn = as_completed_request,
- .elevator_former_req_fn = as_former_request,
- .elevator_latter_req_fn = as_latter_request,
- .elevator_set_req_fn = as_set_request,
- .elevator_put_req_fn = as_put_request,
+ .elevator_former_req_fn = elv_rb_former_request,
+ .elevator_latter_req_fn = elv_rb_latter_request,
.elevator_may_queue_fn = as_may_queue,
.elevator_init_fn = as_init_queue,
.elevator_exit_fn = as_exit_queue,
{
int ret;
- arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq),
- 0, 0, NULL, NULL);
- if (!arq_pool)
- return -ENOMEM;
-
ret = elv_register(&iosched_as);
if (!ret) {
/*
return 0;
}
- kmem_cache_destroy(arq_pool);
return ret;
}
ioc_gone = &all_gone;
/* ioc_gone's update must be visible before reading ioc_count */
smp_wmb();
- if (atomic_read(&ioc_count))
+ if (elv_ioc_count_read(ioc_count))
wait_for_completion(ioc_gone);
synchronize_rcu();
- kmem_cache_destroy(arq_pool);
}
module_init(as_init);
/*
- * Copyright (C) 2006 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
/*
* Bio action bits of interest
*/
-static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD) };
+static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
/*
* More could be added as needed, taking care to increment the decrementer
(((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
#define trace_ahead_bit(rw) \
(((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
+#define trace_meta_bit(rw) \
+ (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
/*
* The worker for the various blk_add_trace*() types. Fills out a
what |= bio_act[trace_barrier_bit(rw)];
what |= bio_act[trace_sync_bit(rw)];
what |= bio_act[trace_ahead_bit(rw)];
+ what |= bio_act[trace_meta_bit(rw)];
pid = tsk->pid;
if (unlikely(act_log_check(bt, what, sector, pid)))
*t -= (a + b) / 2;
}
+/*
+ * calibrate our inter-CPU timings
+ */
static void blk_trace_check_cpu_time(void *data)
{
unsigned long long *t;
put_cpu();
}
-/*
- * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
- * timings
- */
-static void blk_trace_calibrate_offsets(void)
-{
- unsigned long flags;
-
- smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
- local_irq_save(flags);
- blk_trace_check_cpu_time(NULL);
- local_irq_restore(flags);
-}
-
static void blk_trace_set_ht_offsets(void)
{
#if defined(CONFIG_SCHED_SMT)
static __init int blk_trace_init(void)
{
mutex_init(&blk_tree_mutex);
- blk_trace_calibrate_offsets();
+ on_each_cpu(blk_trace_check_cpu_time, NULL, 1, 1);
blk_trace_set_ht_offsets();
return 0;
* Based on ideas from a previously unfinished io
* scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
*
- * Copyright (C) 2003 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
*/
#include <linux/module.h>
#include <linux/blkdev.h>
* tunables
*/
static const int cfq_quantum = 4; /* max queue in one round of service */
-static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
#define CFQ_KEY_ASYNC (0)
-static DEFINE_SPINLOCK(cfq_exit_lock);
-
/*
* for the hash of cfqq inside the cfqd
*/
#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
-/*
- * for the hash of crq inside the cfqq
- */
-#define CFQ_MHASH_SHIFT 6
-#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
-#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
-#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
-#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
-#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
-
#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
-#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
-#define RQ_DATA(rq) (rq)->elevator_private
+#define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private)
+#define RQ_CFQQ(rq) ((rq)->elevator_private2)
-/*
- * rb-tree defines
- */
-#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
-#define rq_rb_key(rq) (rq)->sector
-
-static kmem_cache_t *crq_pool;
static kmem_cache_t *cfq_pool;
static kmem_cache_t *cfq_ioc_pool;
-static atomic_t ioc_count = ATOMIC_INIT(0);
+static DEFINE_PER_CPU(unsigned long, ioc_count);
static struct completion *ioc_gone;
#define CFQ_PRIO_LISTS IOPRIO_BE_NR
#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
-#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
#define ASYNC (0)
struct list_head idle_rr;
unsigned int busy_queues;
- /*
- * non-ordered list of empty cfqq's
- */
- struct list_head empty_list;
-
/*
* cfqq lookup hash
*/
struct hlist_head *cfq_hash;
- /*
- * global crq hash for all queues
- */
- struct hlist_head *crq_hash;
-
- mempool_t *crq_pool;
-
int rq_in_driver;
int hw_tag;
- /*
- * schedule slice state info
- */
/*
* idle window management
*/
sector_t last_sector;
unsigned long last_end_request;
- unsigned int rq_starved;
-
/*
* tunables, see top of file
*/
unsigned int cfq_quantum;
- unsigned int cfq_queued;
unsigned int cfq_fifo_expire[2];
unsigned int cfq_back_penalty;
unsigned int cfq_back_max;
struct hlist_node cfq_hash;
/* hash key */
unsigned int key;
- /* on either rr or empty list of cfqd */
+ /* member of the rr/busy/cur/idle cfqd list */
struct list_head cfq_list;
/* sorted list of pending requests */
struct rb_root sort_list;
/* if fifo isn't expired, next request to serve */
- struct cfq_rq *next_crq;
+ struct request *next_rq;
/* requests queued in sort_list */
int queued[2];
/* currently allocated requests */
int allocated[2];
+ /* pending metadata requests */
+ int meta_pending;
/* fifo list of requests in sort_list */
struct list_head fifo;
unsigned long slice_start;
unsigned long slice_end;
unsigned long slice_left;
- unsigned long service_last;
/* number of requests that are on the dispatch list */
int on_dispatch[2];
unsigned int flags;
};
-struct cfq_rq {
- struct rb_node rb_node;
- sector_t rb_key;
- struct request *request;
- struct hlist_node hash;
-
- struct cfq_queue *cfq_queue;
- struct cfq_io_context *io_context;
-
- unsigned int crq_flags;
-};
-
enum cfqq_state_flags {
CFQ_CFQQ_FLAG_on_rr = 0,
CFQ_CFQQ_FLAG_wait_request,
CFQ_CFQQ_FLAG_fifo_expire,
CFQ_CFQQ_FLAG_idle_window,
CFQ_CFQQ_FLAG_prio_changed,
+ CFQ_CFQQ_FLAG_queue_new,
};
#define CFQ_CFQQ_FNS(name) \
CFQ_CFQQ_FNS(fifo_expire);
CFQ_CFQQ_FNS(idle_window);
CFQ_CFQQ_FNS(prio_changed);
+CFQ_CFQQ_FNS(queue_new);
#undef CFQ_CFQQ_FNS
-enum cfq_rq_state_flags {
- CFQ_CRQ_FLAG_is_sync = 0,
-};
-
-#define CFQ_CRQ_FNS(name) \
-static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \
-{ \
- crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \
-} \
-static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \
-{ \
- crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \
-} \
-static inline int cfq_crq_##name(const struct cfq_rq *crq) \
-{ \
- return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \
-}
-
-CFQ_CRQ_FNS(is_sync);
-#undef CFQ_CRQ_FNS
-
static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
-static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
+static void cfq_dispatch_insert(request_queue_t *, struct request *);
static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
-/*
- * lots of deadline iosched dupes, can be abstracted later...
- */
-static inline void cfq_del_crq_hash(struct cfq_rq *crq)
-{
- hlist_del_init(&crq->hash);
-}
-
-static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
-{
- const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
-
- hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
-}
-
-static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
-{
- struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
- struct hlist_node *entry, *next;
-
- hlist_for_each_safe(entry, next, hash_list) {
- struct cfq_rq *crq = list_entry_hash(entry);
- struct request *__rq = crq->request;
-
- if (!rq_mergeable(__rq)) {
- cfq_del_crq_hash(crq);
- continue;
- }
-
- if (rq_hash_key(__rq) == offset)
- return __rq;
- }
-
- return NULL;
-}
-
/*
* scheduler run of queue, if there are requests pending and no one in the
* driver that will restart queueing
}
/*
- * Lifted from AS - choose which of crq1 and crq2 that is best served now.
+ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
* We choose the request that is closest to the head right now. Distance
* behind the head is penalized and only allowed to a certain extent.
*/
-static struct cfq_rq *
-cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
+static struct request *
+cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
{
sector_t last, s1, s2, d1 = 0, d2 = 0;
unsigned long back_max;
#define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */
unsigned wrap = 0; /* bit mask: requests behind the disk head? */
- if (crq1 == NULL || crq1 == crq2)
- return crq2;
- if (crq2 == NULL)
- return crq1;
+ if (rq1 == NULL || rq1 == rq2)
+ return rq2;
+ if (rq2 == NULL)
+ return rq1;
- if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
- return crq1;
- else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
- return crq2;
+ if (rq_is_sync(rq1) && !rq_is_sync(rq2))
+ return rq1;
+ else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
+ return rq2;
+ if (rq_is_meta(rq1) && !rq_is_meta(rq2))
+ return rq1;
+ else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
+ return rq2;
- s1 = crq1->request->sector;
- s2 = crq2->request->sector;
+ s1 = rq1->sector;
+ s2 = rq2->sector;
last = cfqd->last_sector;
* check two variables for all permutations: --> faster!
*/
switch (wrap) {
- case 0: /* common case for CFQ: crq1 and crq2 not wrapped */
+ case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
if (d1 < d2)
- return crq1;
+ return rq1;
else if (d2 < d1)
- return crq2;
+ return rq2;
else {
if (s1 >= s2)
- return crq1;
+ return rq1;
else
- return crq2;
+ return rq2;
}
case CFQ_RQ2_WRAP:
- return crq1;
+ return rq1;
case CFQ_RQ1_WRAP:
- return crq2;
- case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */
+ return rq2;
+ case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
default:
/*
* Since both rqs are wrapped,
* since back seek takes more time than forward.
*/
if (s1 <= s2)
- return crq1;
+ return rq1;
else
- return crq2;
+ return rq2;
}
}
/*
* would be nice to take fifo expire time into account as well
*/
-static struct cfq_rq *
-cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct cfq_rq *last)
+static struct request *
+cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+ struct request *last)
{
- struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
- struct rb_node *rbnext, *rbprev;
-
- if (!(rbnext = rb_next(&last->rb_node))) {
- rbnext = rb_first(&cfqq->sort_list);
- if (rbnext == &last->rb_node)
- rbnext = NULL;
- }
+ struct rb_node *rbnext = rb_next(&last->rb_node);
+ struct rb_node *rbprev = rb_prev(&last->rb_node);
+ struct request *next = NULL, *prev = NULL;
- rbprev = rb_prev(&last->rb_node);
+ BUG_ON(RB_EMPTY_NODE(&last->rb_node));
if (rbprev)
- crq_prev = rb_entry_crq(rbprev);
- if (rbnext)
- crq_next = rb_entry_crq(rbnext);
-
- return cfq_choose_req(cfqd, crq_next, crq_prev);
-}
+ prev = rb_entry_rq(rbprev);
-static void cfq_update_next_crq(struct cfq_rq *crq)
-{
- struct cfq_queue *cfqq = crq->cfq_queue;
+ if (rbnext)
+ next = rb_entry_rq(rbnext);
+ else {
+ rbnext = rb_first(&cfqq->sort_list);
+ if (rbnext && rbnext != &last->rb_node)
+ next = rb_entry_rq(rbnext);
+ }
- if (cfqq->next_crq == crq)
- cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
+ return cfq_choose_req(cfqd, next, prev);
}
static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
{
struct cfq_data *cfqd = cfqq->cfqd;
- struct list_head *list, *entry;
+ struct list_head *list;
BUG_ON(!cfq_cfqq_on_rr(cfqq));
}
/*
- * if queue was preempted, just add to front to be fair. busy_rr
- * isn't sorted, but insert at the back for fairness.
+ * If this queue was preempted or is new (never been serviced), let
+ * it be added first for fairness but beind other new queues.
+ * Otherwise, just add to the back of the list.
*/
- if (preempted || list == &cfqd->busy_rr) {
- if (preempted)
- list = list->prev;
+ if (preempted || cfq_cfqq_queue_new(cfqq)) {
+ struct list_head *n = list;
+ struct cfq_queue *__cfqq;
- list_add_tail(&cfqq->cfq_list, list);
- return;
- }
+ while (n->next != list) {
+ __cfqq = list_entry_cfqq(n->next);
+ if (!cfq_cfqq_queue_new(__cfqq))
+ break;
- /*
- * sort by when queue was last serviced
- */
- entry = list;
- while ((entry = entry->prev) != list) {
- struct cfq_queue *__cfqq = list_entry_cfqq(entry);
+ n = n->next;
+ }
- if (!__cfqq->service_last)
- break;
- if (time_before(__cfqq->service_last, cfqq->service_last))
- break;
+ list = n;
}
- list_add(&cfqq->cfq_list, entry);
+ list_add_tail(&cfqq->cfq_list, list);
}
/*
{
BUG_ON(!cfq_cfqq_on_rr(cfqq));
cfq_clear_cfqq_on_rr(cfqq);
- list_move(&cfqq->cfq_list, &cfqd->empty_list);
+ list_del_init(&cfqq->cfq_list);
BUG_ON(!cfqd->busy_queues);
cfqd->busy_queues--;
/*
* rb tree support functions
*/
-static inline void cfq_del_crq_rb(struct cfq_rq *crq)
+static inline void cfq_del_rq_rb(struct request *rq)
{
- struct cfq_queue *cfqq = crq->cfq_queue;
+ struct cfq_queue *cfqq = RQ_CFQQ(rq);
struct cfq_data *cfqd = cfqq->cfqd;
- const int sync = cfq_crq_is_sync(crq);
+ const int sync = rq_is_sync(rq);
BUG_ON(!cfqq->queued[sync]);
cfqq->queued[sync]--;
- cfq_update_next_crq(crq);
-
- rb_erase(&crq->rb_node, &cfqq->sort_list);
+ elv_rb_del(&cfqq->sort_list, rq);
if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
cfq_del_cfqq_rr(cfqd, cfqq);
}
-static struct cfq_rq *
-__cfq_add_crq_rb(struct cfq_rq *crq)
+static void cfq_add_rq_rb(struct request *rq)
{
- struct rb_node **p = &crq->cfq_queue->sort_list.rb_node;
- struct rb_node *parent = NULL;
- struct cfq_rq *__crq;
-
- while (*p) {
- parent = *p;
- __crq = rb_entry_crq(parent);
-
- if (crq->rb_key < __crq->rb_key)
- p = &(*p)->rb_left;
- else if (crq->rb_key > __crq->rb_key)
- p = &(*p)->rb_right;
- else
- return __crq;
- }
-
- rb_link_node(&crq->rb_node, parent, p);
- return NULL;
-}
-
-static void cfq_add_crq_rb(struct cfq_rq *crq)
-{
- struct cfq_queue *cfqq = crq->cfq_queue;
+ struct cfq_queue *cfqq = RQ_CFQQ(rq);
struct cfq_data *cfqd = cfqq->cfqd;
- struct request *rq = crq->request;
- struct cfq_rq *__alias;
+ struct request *__alias;
- crq->rb_key = rq_rb_key(rq);
- cfqq->queued[cfq_crq_is_sync(crq)]++;
+ cfqq->queued[rq_is_sync(rq)]++;
/*
* looks a little odd, but the first insert might return an alias.
* if that happens, put the alias on the dispatch list
*/
- while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
+ while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
cfq_dispatch_insert(cfqd->queue, __alias);
-
- rb_insert_color(&crq->rb_node, &cfqq->sort_list);
-
- if (!cfq_cfqq_on_rr(cfqq))
- cfq_add_cfqq_rr(cfqd, cfqq);
-
- /*
- * check if this request is a better next-serve candidate
- */
- cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
}
static inline void
-cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
+cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
{
- rb_erase(&crq->rb_node, &cfqq->sort_list);
- cfqq->queued[cfq_crq_is_sync(crq)]--;
-
- cfq_add_crq_rb(crq);
+ elv_rb_del(&cfqq->sort_list, rq);
+ cfqq->queued[rq_is_sync(rq)]--;
+ cfq_add_rq_rb(rq);
}
static struct request *
struct task_struct *tsk = current;
pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio));
struct cfq_queue *cfqq;
- struct rb_node *n;
- sector_t sector;
cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
- if (!cfqq)
- goto out;
-
- sector = bio->bi_sector + bio_sectors(bio);
- n = cfqq->sort_list.rb_node;
- while (n) {
- struct cfq_rq *crq = rb_entry_crq(n);
+ if (cfqq) {
+ sector_t sector = bio->bi_sector + bio_sectors(bio);
- if (sector < crq->rb_key)
- n = n->rb_left;
- else if (sector > crq->rb_key)
- n = n->rb_right;
- else
- return crq->request;
+ return elv_rb_find(&cfqq->sort_list, sector);
}
-out:
return NULL;
}
static void cfq_remove_request(struct request *rq)
{
- struct cfq_rq *crq = RQ_DATA(rq);
+ struct cfq_queue *cfqq = RQ_CFQQ(rq);
+
+ if (cfqq->next_rq == rq)
+ cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
list_del_init(&rq->queuelist);
- cfq_del_crq_rb(crq);
- cfq_del_crq_hash(crq);
+ cfq_del_rq_rb(rq);
+
+ if (rq_is_meta(rq)) {
+ WARN_ON(!cfqq->meta_pending);
+ cfqq->meta_pending--;
+ }
}
static int
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct request *__rq;
- int ret;
-
- __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
- if (__rq && elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_BACK_MERGE;
- goto out;
- }
__rq = cfq_find_rq_fmerge(cfqd, bio);
if (__rq && elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_FRONT_MERGE;
- goto out;
+ *req = __rq;
+ return ELEVATOR_FRONT_MERGE;
}
return ELEVATOR_NO_MERGE;
-out:
- *req = __rq;
- return ret;
}
-static void cfq_merged_request(request_queue_t *q, struct request *req)
+static void cfq_merged_request(request_queue_t *q, struct request *req,
+ int type)
{
- struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_rq *crq = RQ_DATA(req);
-
- cfq_del_crq_hash(crq);
- cfq_add_crq_hash(cfqd, crq);
-
- if (rq_rb_key(req) != crq->rb_key) {
- struct cfq_queue *cfqq = crq->cfq_queue;
+ if (type == ELEVATOR_FRONT_MERGE) {
+ struct cfq_queue *cfqq = RQ_CFQQ(req);
- cfq_update_next_crq(crq);
- cfq_reposition_crq_rb(cfqq, crq);
+ cfq_reposition_rq_rb(cfqq, req);
}
}
cfq_merged_requests(request_queue_t *q, struct request *rq,
struct request *next)
{
- cfq_merged_request(q, rq);
-
/*
* reposition in fifo if next is older than rq
*/
if (cfq_cfqq_wait_request(cfqq))
del_timer(&cfqd->idle_slice_timer);
- if (!preempted && !cfq_cfqq_dispatched(cfqq)) {
- cfqq->service_last = now;
+ if (!preempted && !cfq_cfqq_dispatched(cfqq))
cfq_schedule_dispatch(cfqd);
- }
cfq_clear_cfqq_must_dispatch(cfqq);
cfq_clear_cfqq_wait_request(cfqq);
+ cfq_clear_cfqq_queue_new(cfqq);
/*
* store what was left of this slice, if the queue idled out
{
struct cfq_queue *cfqq = NULL;
- /*
- * if current list is non-empty, grab first entry. if it is empty,
- * get next prio level and grab first entry then if any are spliced
- */
- if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
+ if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) {
+ /*
+ * if current list is non-empty, grab first entry. if it is
+ * empty, get next prio level and grab first entry then if any
+ * are spliced
+ */
cfqq = list_entry_cfqq(cfqd->cur_rr.next);
-
- /*
- * If no new queues are available, check if the busy list has some
- * before falling back to idle io.
- */
- if (!cfqq && !list_empty(&cfqd->busy_rr))
+ } else if (!list_empty(&cfqd->busy_rr)) {
+ /*
+ * If no new queues are available, check if the busy list has
+ * some before falling back to idle io.
+ */
cfqq = list_entry_cfqq(cfqd->busy_rr.next);
-
- /*
- * if we have idle queues and no rt or be queues had pending
- * requests, either allow immediate service if the grace period
- * has passed or arm the idle grace timer
- */
- if (!cfqq && !list_empty(&cfqd->idle_rr)) {
+ } else if (!list_empty(&cfqd->idle_rr)) {
+ /*
+ * if we have idle queues and no rt or be queues had pending
+ * requests, either allow immediate service if the grace period
+ * has passed or arm the idle grace timer
+ */
unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
if (time_after_eq(jiffies, end))
return 1;
}
-static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
+static void cfq_dispatch_insert(request_queue_t *q, struct request *rq)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_queue *cfqq = crq->cfq_queue;
- struct request *rq;
+ struct cfq_queue *cfqq = RQ_CFQQ(rq);
- cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
- cfq_remove_request(crq->request);
- cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
- elv_dispatch_sort(q, crq->request);
+ cfq_remove_request(rq);
+ cfqq->on_dispatch[rq_is_sync(rq)]++;
+ elv_dispatch_sort(q, rq);
rq = list_entry(q->queue_head.prev, struct request, queuelist);
cfqd->last_sector = rq->sector + rq->nr_sectors;
/*
* return expired entry, or NULL to just start from scratch in rbtree
*/
-static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
+static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq)
{
struct cfq_data *cfqd = cfqq->cfqd;
struct request *rq;
- struct cfq_rq *crq;
+ int fifo;
if (cfq_cfqq_fifo_expire(cfqq))
return NULL;
+ if (list_empty(&cfqq->fifo))
+ return NULL;
- if (!list_empty(&cfqq->fifo)) {
- int fifo = cfq_cfqq_class_sync(cfqq);
+ fifo = cfq_cfqq_class_sync(cfqq);
+ rq = rq_entry_fifo(cfqq->fifo.next);
- crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));
- rq = crq->request;
- if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
- cfq_mark_cfqq_fifo_expire(cfqq);
- return crq;
- }
+ if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
+ cfq_mark_cfqq_fifo_expire(cfqq);
+ return rq;
}
return NULL;
BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
do {
- struct cfq_rq *crq;
+ struct request *rq;
/*
* follow expired path, else get first next available
*/
- if ((crq = cfq_check_fifo(cfqq)) == NULL)
- crq = cfqq->next_crq;
+ if ((rq = cfq_check_fifo(cfqq)) == NULL)
+ rq = cfqq->next_rq;
/*
* finally, insert request into driver dispatch list
*/
- cfq_dispatch_insert(cfqd->queue, crq);
+ cfq_dispatch_insert(cfqd->queue, rq);
cfqd->dispatch_slice++;
dispatched++;
if (!cfqd->active_cic) {
- atomic_inc(&crq->io_context->ioc->refcount);
- cfqd->active_cic = crq->io_context;
+ atomic_inc(&RQ_CIC(rq)->ioc->refcount);
+ cfqd->active_cic = RQ_CIC(rq);
}
if (RB_EMPTY_ROOT(&cfqq->sort_list))
cfq_forced_dispatch_cfqqs(struct list_head *list)
{
struct cfq_queue *cfqq, *next;
- struct cfq_rq *crq;
int dispatched;
dispatched = 0;
list_for_each_entry_safe(cfqq, next, list, cfq_list) {
- while ((crq = cfqq->next_crq)) {
- cfq_dispatch_insert(cfqq->cfqd->queue, crq);
+ while (cfqq->next_rq) {
+ cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
dispatched++;
}
BUG_ON(!list_empty(&cfqq->fifo));
}
/*
- * task holds one reference to the queue, dropped when task exits. each crq
- * in-flight on this queue also holds a reference, dropped when crq is freed.
+ * task holds one reference to the queue, dropped when task exits. each rq
+ * in-flight on this queue also holds a reference, dropped when rq is freed.
*
* queue lock must be held here.
*/
kmem_cache_free(cfq_pool, cfqq);
}
-static inline struct cfq_queue *
+static struct cfq_queue *
__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
const int hashval)
{
freed++;
}
- if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone)
+ elv_ioc_count_mod(ioc_count, -freed);
+
+ if (ioc_gone && !elv_ioc_count_read(ioc_count))
complete(ioc_gone);
}
-static void cfq_trim(struct io_context *ioc)
+static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- ioc->set_ioprio = NULL;
- cfq_free_io_context(ioc);
+ if (unlikely(cfqq == cfqd->active_queue))
+ __cfq_slice_expired(cfqd, cfqq, 0);
+
+ cfq_put_queue(cfqq);
}
-/*
- * Called with interrupts disabled
- */
-static void cfq_exit_single_io_context(struct cfq_io_context *cic)
+static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
+ struct cfq_io_context *cic)
{
- struct cfq_data *cfqd = cic->key;
- request_queue_t *q;
-
- if (!cfqd)
- return;
-
- q = cfqd->queue;
-
- WARN_ON(!irqs_disabled());
-
- spin_lock(q->queue_lock);
+ list_del_init(&cic->queue_list);
+ smp_wmb();
+ cic->key = NULL;
if (cic->cfqq[ASYNC]) {
- if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue))
- __cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0);
- cfq_put_queue(cic->cfqq[ASYNC]);
+ cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
cic->cfqq[ASYNC] = NULL;
}
if (cic->cfqq[SYNC]) {
- if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue))
- __cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0);
- cfq_put_queue(cic->cfqq[SYNC]);
+ cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]);
cic->cfqq[SYNC] = NULL;
}
+}
- cic->key = NULL;
- list_del_init(&cic->queue_list);
- spin_unlock(q->queue_lock);
+
+/*
+ * Called with interrupts disabled
+ */
+static void cfq_exit_single_io_context(struct cfq_io_context *cic)
+{
+ struct cfq_data *cfqd = cic->key;
+
+ if (cfqd) {
+ request_queue_t *q = cfqd->queue;
+
+ spin_lock_irq(q->queue_lock);
+ __cfq_exit_single_io_context(cfqd, cic);
+ spin_unlock_irq(q->queue_lock);
+ }
}
static void cfq_exit_io_context(struct io_context *ioc)
{
struct cfq_io_context *__cic;
- unsigned long flags;
struct rb_node *n;
/*
* put the reference this task is holding to the various queues
*/
- spin_lock_irqsave(&cfq_exit_lock, flags);
n = rb_first(&ioc->cic_root);
while (n != NULL) {
cfq_exit_single_io_context(__cic);
n = rb_next(n);
}
-
- spin_unlock_irqrestore(&cfq_exit_lock, flags);
}
static struct cfq_io_context *
cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
{
- struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
+ struct cfq_io_context *cic;
+ cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node);
if (cic) {
memset(cic, 0, sizeof(*cic));
cic->last_end_request = jiffies;
INIT_LIST_HEAD(&cic->queue_list);
cic->dtor = cfq_free_io_context;
cic->exit = cfq_exit_io_context;
- atomic_inc(&ioc_count);
+ elv_ioc_count_inc(ioc_count);
}
return cic;
spin_unlock(cfqd->queue->queue_lock);
}
-/*
- * callback from sys_ioprio_set, irqs are disabled
- */
-static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
+static void cfq_ioc_set_ioprio(struct io_context *ioc)
{
struct cfq_io_context *cic;
struct rb_node *n;
- spin_lock(&cfq_exit_lock);
+ ioc->ioprio_changed = 0;
n = rb_first(&ioc->cic_root);
while (n != NULL) {
changed_ioprio(cic);
n = rb_next(n);
}
-
- spin_unlock(&cfq_exit_lock);
-
- return 0;
}
static struct cfq_queue *
cfqq = new_cfqq;
new_cfqq = NULL;
} else if (gfp_mask & __GFP_WAIT) {
+ /*
+ * Inform the allocator of the fact that we will
+ * just repeat this allocation if it fails, to allow
+ * the allocator to do whatever it needs to attempt to
+ * free memory.
+ */
spin_unlock_irq(cfqd->queue->queue_lock);
- new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+ new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node);
spin_lock_irq(cfqd->queue->queue_lock);
goto retry;
} else {
- cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+ cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node);
if (!cfqq)
goto out;
}
hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
atomic_set(&cfqq->ref, 0);
cfqq->cfqd = cfqd;
- cfqq->service_last = 0;
/*
* set ->slice_left to allow preemption for a new process
*/
cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
cfq_mark_cfqq_idle_window(cfqq);
cfq_mark_cfqq_prio_changed(cfqq);
+ cfq_mark_cfqq_queue_new(cfqq);
cfq_init_prio_data(cfqq);
}
static void
cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
{
- spin_lock(&cfq_exit_lock);
+ WARN_ON(!list_empty(&cic->queue_list));
rb_erase(&cic->rb_node, &ioc->cic_root);
- list_del_init(&cic->queue_list);
- spin_unlock(&cfq_exit_lock);
kmem_cache_free(cfq_ioc_pool, cic);
- atomic_dec(&ioc_count);
+ elv_ioc_count_dec(ioc_count);
}
static struct cfq_io_context *
cic->ioc = ioc;
cic->key = cfqd;
- ioc->set_ioprio = cfq_ioc_set_ioprio;
restart:
parent = NULL;
p = &ioc->cic_root.rb_node;
BUG();
}
- spin_lock(&cfq_exit_lock);
rb_link_node(&cic->rb_node, parent, p);
rb_insert_color(&cic->rb_node, &ioc->cic_root);
+
+ spin_lock_irq(cfqd->queue->queue_lock);
list_add(&cic->queue_list, &cfqd->cic_list);
- spin_unlock(&cfq_exit_lock);
+ spin_unlock_irq(cfqd->queue->queue_lock);
}
/*
might_sleep_if(gfp_mask & __GFP_WAIT);
- ioc = get_io_context(gfp_mask);
+ ioc = get_io_context(gfp_mask, cfqd->queue->node);
if (!ioc)
return NULL;
cfq_cic_link(cfqd, ioc, cic);
out:
+ smp_read_barrier_depends();
+ if (unlikely(ioc->ioprio_changed))
+ cfq_ioc_set_ioprio(ioc);
+
return cic;
err:
put_io_context(ioc);
static void
cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
- struct cfq_rq *crq)
+ struct request *rq)
{
sector_t sdist;
u64 total;
- if (cic->last_request_pos < crq->request->sector)
- sdist = crq->request->sector - cic->last_request_pos;
+ if (cic->last_request_pos < rq->sector)
+ sdist = rq->sector - cic->last_request_pos;
else
- sdist = cic->last_request_pos - crq->request->sector;
+ sdist = cic->last_request_pos - rq->sector;
/*
* Don't allow the seek distance to get too large from the
*/
static int
cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
- struct cfq_rq *crq)
+ struct request *rq)
{
struct cfq_queue *cfqq = cfqd->active_queue;
*/
if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
return 0;
- if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq))
+ /*
+ * if the new request is sync, but the currently running queue is
+ * not, let the sync request have priority.
+ */
+ if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
+ return 1;
+ /*
+ * So both queues are sync. Let the new request get disk time if
+ * it's a metadata request and the current queue is doing regular IO.
+ */
+ if (rq_is_meta(rq) && !cfqq->meta_pending)
return 1;
return 0;
*/
static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- struct cfq_queue *__cfqq, *next;
-
- list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)
- cfq_resort_rr_list(__cfqq, 1);
+ cfq_slice_expired(cfqd, 1);
if (!cfqq->slice_left)
cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
- cfqq->slice_end = cfqq->slice_left + jiffies;
- cfq_slice_expired(cfqd, 1);
- __cfq_set_active_queue(cfqd, cfqq);
-}
-
-/*
- * should really be a ll_rw_blk.c helper
- */
-static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- request_queue_t *q = cfqd->queue;
+ /*
+ * Put the new queue at the front of the of the current list,
+ * so we know that it will be selected next.
+ */
+ BUG_ON(!cfq_cfqq_on_rr(cfqq));
+ list_move(&cfqq->cfq_list, &cfqd->cur_rr);
- if (!blk_queue_plugged(q))
- q->request_fn(q);
- else
- __generic_unplug_device(q);
+ cfqq->slice_end = cfqq->slice_left + jiffies;
}
/*
- * Called when a new fs request (crq) is added (to cfqq). Check if there's
+ * Called when a new fs request (rq) is added (to cfqq). Check if there's
* something we should do about it
*/
static void
-cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct cfq_rq *crq)
+cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+ struct request *rq)
{
- struct cfq_io_context *cic = crq->io_context;
+ struct cfq_io_context *cic = RQ_CIC(rq);
+
+ if (rq_is_meta(rq))
+ cfqq->meta_pending++;
+
+ /*
+ * check if this request is a better next-serve candidate)) {
+ */
+ cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
+ BUG_ON(!cfqq->next_rq);
/*
* we never wait for an async request and we don't allow preemption
* of an async request. so just return early
*/
- if (!cfq_crq_is_sync(crq)) {
+ if (!rq_is_sync(rq)) {
/*
* sync process issued an async request, if it's waiting
* then expire it and kick rq handling.
if (cic == cfqd->active_cic &&
del_timer(&cfqd->idle_slice_timer)) {
cfq_slice_expired(cfqd, 0);
- cfq_start_queueing(cfqd, cfqq);
+ blk_start_queueing(cfqd->queue);
}
return;
}
cfq_update_io_thinktime(cfqd, cic);
- cfq_update_io_seektime(cfqd, cic, crq);
+ cfq_update_io_seektime(cfqd, cic, rq);
cfq_update_idle_window(cfqd, cfqq, cic);
cic->last_queue = jiffies;
- cic->last_request_pos = crq->request->sector + crq->request->nr_sectors;
+ cic->last_request_pos = rq->sector + rq->nr_sectors;
if (cfqq == cfqd->active_queue) {
/*
if (cfq_cfqq_wait_request(cfqq)) {
cfq_mark_cfqq_must_dispatch(cfqq);
del_timer(&cfqd->idle_slice_timer);
- cfq_start_queueing(cfqd, cfqq);
+ blk_start_queueing(cfqd->queue);
}
- } else if (cfq_should_preempt(cfqd, cfqq, crq)) {
+ } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
/*
* not the active queue - expire current slice if it is
* idle and has expired it's mean thinktime or this new queue
*/
cfq_preempt_queue(cfqd, cfqq);
cfq_mark_cfqq_must_dispatch(cfqq);
- cfq_start_queueing(cfqd, cfqq);
+ blk_start_queueing(cfqd->queue);
}
}
static void cfq_insert_request(request_queue_t *q, struct request *rq)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_rq *crq = RQ_DATA(rq);
- struct cfq_queue *cfqq = crq->cfq_queue;
+ struct cfq_queue *cfqq = RQ_CFQQ(rq);
cfq_init_prio_data(cfqq);
- cfq_add_crq_rb(crq);
+ cfq_add_rq_rb(rq);
- list_add_tail(&rq->queuelist, &cfqq->fifo);
+ if (!cfq_cfqq_on_rr(cfqq))
+ cfq_add_cfqq_rr(cfqd, cfqq);
- if (rq_mergeable(rq))
- cfq_add_crq_hash(cfqd, crq);
+ list_add_tail(&rq->queuelist, &cfqq->fifo);
- cfq_crq_enqueued(cfqd, cfqq, crq);
+ cfq_rq_enqueued(cfqd, cfqq, rq);
}
static void cfq_completed_request(request_queue_t *q, struct request *rq)
{
- struct cfq_rq *crq = RQ_DATA(rq);
- struct cfq_queue *cfqq = crq->cfq_queue;
+ struct cfq_queue *cfqq = RQ_CFQQ(rq);
struct cfq_data *cfqd = cfqq->cfqd;
- const int sync = cfq_crq_is_sync(crq);
+ const int sync = rq_is_sync(rq);
unsigned long now;
now = jiffies;
if (!cfq_class_idle(cfqq))
cfqd->last_end_request = now;
- if (!cfq_cfqq_dispatched(cfqq)) {
- if (cfq_cfqq_on_rr(cfqq)) {
- cfqq->service_last = now;
- cfq_resort_rr_list(cfqq, 0);
- }
- }
+ if (!cfq_cfqq_dispatched(cfqq) && cfq_cfqq_on_rr(cfqq))
+ cfq_resort_rr_list(cfqq, 0);
if (sync)
- crq->io_context->last_end_request = now;
+ RQ_CIC(rq)->last_end_request = now;
/*
* If this is the active queue, check if it needs to be expired,
}
}
-static struct request *
-cfq_former_request(request_queue_t *q, struct request *rq)
-{
- struct cfq_rq *crq = RQ_DATA(rq);
- struct rb_node *rbprev = rb_prev(&crq->rb_node);
-
- if (rbprev)
- return rb_entry_crq(rbprev)->request;
-
- return NULL;
-}
-
-static struct request *
-cfq_latter_request(request_queue_t *q, struct request *rq)
-{
- struct cfq_rq *crq = RQ_DATA(rq);
- struct rb_node *rbnext = rb_next(&crq->rb_node);
-
- if (rbnext)
- return rb_entry_crq(rbnext)->request;
-
- return NULL;
-}
-
/*
* we temporarily boost lower priority queues if they are holding fs exclusive
* resources. they are boosted to normal prio (CLASS_BE/4)
cfq_resort_rr_list(cfqq, 0);
}
-static inline int
-__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct task_struct *task, int rw)
+static inline int __cfq_may_queue(struct cfq_queue *cfqq)
{
if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
!cfq_cfqq_must_alloc_slice(cfqq)) {
return ELV_MQUEUE_MAY;
}
-static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
+static int cfq_may_queue(request_queue_t *q, int rw)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct task_struct *tsk = current;
cfq_init_prio_data(cfqq);
cfq_prio_boost(cfqq);
- return __cfq_may_queue(cfqd, cfqq, tsk, rw);
+ return __cfq_may_queue(cfqq);
}
return ELV_MQUEUE_MAY;
}
-static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
-{
- struct cfq_data *cfqd = q->elevator->elevator_data;
-
- if (unlikely(cfqd->rq_starved)) {
- struct request_list *rl = &q->rq;
-
- smp_mb();
- if (waitqueue_active(&rl->wait[READ]))
- wake_up(&rl->wait[READ]);
- if (waitqueue_active(&rl->wait[WRITE]))
- wake_up(&rl->wait[WRITE]);
- }
-}
-
/*
* queue lock held here
*/
static void cfq_put_request(request_queue_t *q, struct request *rq)
{
- struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_rq *crq = RQ_DATA(rq);
+ struct cfq_queue *cfqq = RQ_CFQQ(rq);
- if (crq) {
- struct cfq_queue *cfqq = crq->cfq_queue;
+ if (cfqq) {
const int rw = rq_data_dir(rq);
BUG_ON(!cfqq->allocated[rw]);
cfqq->allocated[rw]--;
- put_io_context(crq->io_context->ioc);
+ put_io_context(RQ_CIC(rq)->ioc);
- mempool_free(crq, cfqd->crq_pool);
rq->elevator_private = NULL;
+ rq->elevator_private2 = NULL;
- cfq_check_waiters(q, cfqq);
cfq_put_queue(cfqq);
}
}
* Allocate cfq data structures associated with this request.
*/
static int
-cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
- gfp_t gfp_mask)
+cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct task_struct *tsk = current;
const int rw = rq_data_dir(rq);
pid_t key = cfq_queue_pid(tsk, rw);
struct cfq_queue *cfqq;
- struct cfq_rq *crq;
unsigned long flags;
int is_sync = key != CFQ_KEY_ASYNC;
cfqq->allocated[rw]++;
cfq_clear_cfqq_must_alloc(cfqq);
- cfqd->rq_starved = 0;
atomic_inc(&cfqq->ref);
- spin_unlock_irqrestore(q->queue_lock, flags);
- crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
- if (crq) {
- RB_CLEAR_NODE(&crq->rb_node);
- crq->rb_key = 0;
- crq->request = rq;
- INIT_HLIST_NODE(&crq->hash);
- crq->cfq_queue = cfqq;
- crq->io_context = cic;
-
- if (is_sync)
- cfq_mark_crq_is_sync(crq);
- else
- cfq_clear_crq_is_sync(crq);
+ spin_unlock_irqrestore(q->queue_lock, flags);
- rq->elevator_private = crq;
- return 0;
- }
+ rq->elevator_private = cic;
+ rq->elevator_private2 = cfqq;
+ return 0;
- spin_lock_irqsave(q->queue_lock, flags);
- cfqq->allocated[rw]--;
- if (!(cfqq->allocated[0] + cfqq->allocated[1]))
- cfq_mark_cfqq_must_alloc(cfqq);
- cfq_put_queue(cfqq);
queue_fail:
if (cic)
put_io_context(cic->ioc);
- /*
- * mark us rq allocation starved. we need to kickstart the process
- * ourselves if there are no pending requests that can do it for us.
- * that would be an extremely rare OOM situation
- */
- cfqd->rq_starved = 1;
+
cfq_schedule_dispatch(cfqd);
spin_unlock_irqrestore(q->queue_lock, flags);
return 1;
static void cfq_kick_queue(void *data)
{
request_queue_t *q = data;
- struct cfq_data *cfqd = q->elevator->elevator_data;
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
-
- if (cfqd->rq_starved) {
- struct request_list *rl = &q->rq;
-
- /*
- * we aren't guaranteed to get a request after this, but we
- * have to be opportunistic
- */
- smp_mb();
- if (waitqueue_active(&rl->wait[READ]))
- wake_up(&rl->wait[READ]);
- if (waitqueue_active(&rl->wait[WRITE]))
- wake_up(&rl->wait[WRITE]);
- }
-
- blk_remove_plug(q);
- q->request_fn(q);
+ blk_start_queueing(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
cfq_shutdown_timer_wq(cfqd);
- spin_lock(&cfq_exit_lock);
spin_lock_irq(q->queue_lock);
if (cfqd->active_queue)
struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
struct cfq_io_context,
queue_list);
- if (cic->cfqq[ASYNC]) {
- cfq_put_queue(cic->cfqq[ASYNC]);
- cic->cfqq[ASYNC] = NULL;
- }
- if (cic->cfqq[SYNC]) {
- cfq_put_queue(cic->cfqq[SYNC]);
- cic->cfqq[SYNC] = NULL;
- }
- cic->key = NULL;
- list_del_init(&cic->queue_list);
+
+ __cfq_exit_single_io_context(cfqd, cic);
}
spin_unlock_irq(q->queue_lock);
- spin_unlock(&cfq_exit_lock);
cfq_shutdown_timer_wq(cfqd);
- mempool_destroy(cfqd->crq_pool);
- kfree(cfqd->crq_hash);
kfree(cfqd->cfq_hash);
kfree(cfqd);
}
struct cfq_data *cfqd;
int i;
- cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
+ cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
if (!cfqd)
return NULL;
INIT_LIST_HEAD(&cfqd->busy_rr);
INIT_LIST_HEAD(&cfqd->cur_rr);
INIT_LIST_HEAD(&cfqd->idle_rr);
- INIT_LIST_HEAD(&cfqd->empty_list);
INIT_LIST_HEAD(&cfqd->cic_list);
- cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
- if (!cfqd->crq_hash)
- goto out_crqhash;
-
- cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
+ cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node);
if (!cfqd->cfq_hash)
- goto out_cfqhash;
-
- cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool);
- if (!cfqd->crq_pool)
- goto out_crqpool;
+ goto out_free;
- for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
- INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
- cfqd->cfq_queued = cfq_queued;
cfqd->cfq_quantum = cfq_quantum;
cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
cfqd->cfq_slice_idle = cfq_slice_idle;
return cfqd;
-out_crqpool:
- kfree(cfqd->cfq_hash);
-out_cfqhash:
- kfree(cfqd->crq_hash);
-out_crqhash:
+out_free:
kfree(cfqd);
return NULL;
}
static void cfq_slab_kill(void)
{
- if (crq_pool)
- kmem_cache_destroy(crq_pool);
if (cfq_pool)
kmem_cache_destroy(cfq_pool);
if (cfq_ioc_pool)
static int __init cfq_slab_setup(void)
{
- crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
- NULL, NULL);
- if (!crq_pool)
- goto fail;
-
cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
NULL, NULL);
if (!cfq_pool)
return cfq_var_show(__data, (page)); \
}
SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
-SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
return ret; \
}
STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
-STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
static struct elv_fs_entry cfq_attrs[] = {
CFQ_ATTR(quantum),
- CFQ_ATTR(queued),
CFQ_ATTR(fifo_expire_sync),
CFQ_ATTR(fifo_expire_async),
CFQ_ATTR(back_seek_max),
.elevator_deactivate_req_fn = cfq_deactivate_request,
.elevator_queue_empty_fn = cfq_queue_empty,
.elevator_completed_req_fn = cfq_completed_request,
- .elevator_former_req_fn = cfq_former_request,
- .elevator_latter_req_fn = cfq_latter_request,
+ .elevator_former_req_fn = elv_rb_former_request,
+ .elevator_latter_req_fn = elv_rb_latter_request,
.elevator_set_req_fn = cfq_set_request,
.elevator_put_req_fn = cfq_put_request,
.elevator_may_queue_fn = cfq_may_queue,
.elevator_init_fn = cfq_init_queue,
.elevator_exit_fn = cfq_exit_queue,
- .trim = cfq_trim,
+ .trim = cfq_free_io_context,
},
.elevator_attrs = cfq_attrs,
.elevator_name = "cfq",
ioc_gone = &all_gone;
/* ioc_gone's update must be visible before reading ioc_count */
smp_wmb();
- if (atomic_read(&ioc_count))
+ if (elv_ioc_count_read(ioc_count))
wait_for_completion(ioc_gone);
synchronize_rcu();
cfq_slab_kill();
/*
* Deadline i/o scheduler.
*
- * Copyright (C) 2002 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
-#include <linux/hash.h>
#include <linux/rbtree.h>
/*
static const int fifo_batch = 16; /* # of sequential requests treated as one
by the above parameters. For throughput. */
-static const int deadline_hash_shift = 5;
-#define DL_HASH_BLOCK(sec) ((sec) >> 3)
-#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
-#define DL_HASH_ENTRIES (1 << deadline_hash_shift)
-#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
-#define ON_HASH(drq) (!hlist_unhashed(&(drq)->hash))
-
struct deadline_data {
/*
* run time data
/*
* next in sort order. read, write or both are NULL
*/
- struct deadline_rq *next_drq[2];
- struct hlist_head *hash; /* request hash */
+ struct request *next_rq[2];
unsigned int batching; /* number of sequential requests made */
sector_t last_sector; /* head position */
unsigned int starved; /* times reads have starved writes */
int fifo_batch;
int writes_starved;
int front_merges;
-
- mempool_t *drq_pool;
};
-/*
- * pre-request data.
- */
-struct deadline_rq {
- /*
- * rbtree index, key is the starting offset
- */
- struct rb_node rb_node;
- sector_t rb_key;
-
- struct request *request;
-
- /*
- * request hash, key is the ending offset (for back merge lookup)
- */
- struct hlist_node hash;
-
- /*
- * expire fifo
- */
- struct list_head fifo;
- unsigned long expires;
-};
-
-static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq);
-
-static kmem_cache_t *drq_pool;
-
-#define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)
+static void deadline_move_request(struct deadline_data *, struct request *);
-/*
- * the back merge hash support functions
- */
-static inline void __deadline_del_drq_hash(struct deadline_rq *drq)
-{
- hlist_del_init(&drq->hash);
-}
-
-static inline void deadline_del_drq_hash(struct deadline_rq *drq)
-{
- if (ON_HASH(drq))
- __deadline_del_drq_hash(drq);
-}
-
-static inline void
-deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
-{
- struct request *rq = drq->request;
-
- BUG_ON(ON_HASH(drq));
-
- hlist_add_head(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
-}
-
-/*
- * move hot entry to front of chain
- */
-static inline void
-deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
-{
- struct request *rq = drq->request;
- struct hlist_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];
-
- if (ON_HASH(drq) && &drq->hash != head->first) {
- hlist_del(&drq->hash);
- hlist_add_head(&drq->hash, head);
- }
-}
-
-static struct request *
-deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
-{
- struct hlist_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
- struct hlist_node *entry, *next;
- struct deadline_rq *drq;
-
- hlist_for_each_entry_safe(drq, entry, next, hash_list, hash) {
- struct request *__rq = drq->request;
-
- BUG_ON(!ON_HASH(drq));
-
- if (!rq_mergeable(__rq)) {
- __deadline_del_drq_hash(drq);
- continue;
- }
-
- if (rq_hash_key(__rq) == offset)
- return __rq;
- }
-
- return NULL;
-}
-
-/*
- * rb tree support functions
- */
-#define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node)
-#define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)])
-#define rq_rb_key(rq) (rq)->sector
-
-static struct deadline_rq *
-__deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
-{
- struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
- struct rb_node *parent = NULL;
- struct deadline_rq *__drq;
-
- while (*p) {
- parent = *p;
- __drq = rb_entry_drq(parent);
-
- if (drq->rb_key < __drq->rb_key)
- p = &(*p)->rb_left;
- else if (drq->rb_key > __drq->rb_key)
- p = &(*p)->rb_right;
- else
- return __drq;
- }
-
- rb_link_node(&drq->rb_node, parent, p);
- return NULL;
-}
+#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))])
static void
-deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
{
- struct deadline_rq *__alias;
-
- drq->rb_key = rq_rb_key(drq->request);
+ struct rb_root *root = RQ_RB_ROOT(dd, rq);
+ struct request *__alias;
retry:
- __alias = __deadline_add_drq_rb(dd, drq);
- if (!__alias) {
- rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
- return;
+ __alias = elv_rb_add(root, rq);
+ if (unlikely(__alias)) {
+ deadline_move_request(dd, __alias);
+ goto retry;
}
-
- deadline_move_request(dd, __alias);
- goto retry;
}
static inline void
-deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
{
- const int data_dir = rq_data_dir(drq->request);
+ const int data_dir = rq_data_dir(rq);
- if (dd->next_drq[data_dir] == drq) {
- struct rb_node *rbnext = rb_next(&drq->rb_node);
+ if (dd->next_rq[data_dir] == rq) {
+ struct rb_node *rbnext = rb_next(&rq->rb_node);
- dd->next_drq[data_dir] = NULL;
+ dd->next_rq[data_dir] = NULL;
if (rbnext)
- dd->next_drq[data_dir] = rb_entry_drq(rbnext);
- }
-
- BUG_ON(!RB_EMPTY_NODE(&drq->rb_node));
- rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
- RB_CLEAR_NODE(&drq->rb_node);
-}
-
-static struct request *
-deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
-{
- struct rb_node *n = dd->sort_list[data_dir].rb_node;
- struct deadline_rq *drq;
-
- while (n) {
- drq = rb_entry_drq(n);
-
- if (sector < drq->rb_key)
- n = n->rb_left;
- else if (sector > drq->rb_key)
- n = n->rb_right;
- else
- return drq->request;
+ dd->next_rq[data_dir] = rb_entry_rq(rbnext);
}
- return NULL;
+ elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
}
/*
- * deadline_find_first_drq finds the first (lowest sector numbered) request
- * for the specified data_dir. Used to sweep back to the start of the disk
- * (1-way elevator) after we process the last (highest sector) request.
- */
-static struct deadline_rq *
-deadline_find_first_drq(struct deadline_data *dd, int data_dir)
-{
- struct rb_node *n = dd->sort_list[data_dir].rb_node;
-
- for (;;) {
- if (n->rb_left == NULL)
- return rb_entry_drq(n);
-
- n = n->rb_left;
- }
-}
-
-/*
- * add drq to rbtree and fifo
+ * add rq to rbtree and fifo
*/
static void
deadline_add_request(struct request_queue *q, struct request *rq)
{
struct deadline_data *dd = q->elevator->elevator_data;
- struct deadline_rq *drq = RQ_DATA(rq);
+ const int data_dir = rq_data_dir(rq);
- const int data_dir = rq_data_dir(drq->request);
+ deadline_add_rq_rb(dd, rq);
- deadline_add_drq_rb(dd, drq);
/*
* set expire time (only used for reads) and add to fifo list
*/
- drq->expires = jiffies + dd->fifo_expire[data_dir];
- list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
-
- if (rq_mergeable(rq))
- deadline_add_drq_hash(dd, drq);
+ rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
+ list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
}
/*
- * remove rq from rbtree, fifo, and hash
+ * remove rq from rbtree and fifo.
*/
static void deadline_remove_request(request_queue_t *q, struct request *rq)
{
- struct deadline_rq *drq = RQ_DATA(rq);
struct deadline_data *dd = q->elevator->elevator_data;
- list_del_init(&drq->fifo);
- deadline_del_drq_rb(dd, drq);
- deadline_del_drq_hash(drq);
+ rq_fifo_clear(rq);
+ deadline_del_rq_rb(dd, rq);
}
static int
struct request *__rq;
int ret;
- /*
- * see if the merge hash can satisfy a back merge
- */
- __rq = deadline_find_drq_hash(dd, bio->bi_sector);
- if (__rq) {
- BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
- if (elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_BACK_MERGE;
- goto out;
- }
- }
-
/*
* check for front merge
*/
if (dd->front_merges) {
- sector_t rb_key = bio->bi_sector + bio_sectors(bio);
+ sector_t sector = bio->bi_sector + bio_sectors(bio);
- __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio));
+ __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
if (__rq) {
- BUG_ON(rb_key != rq_rb_key(__rq));
+ BUG_ON(sector != __rq->sector);
if (elv_rq_merge_ok(__rq, bio)) {
ret = ELEVATOR_FRONT_MERGE;
return ELEVATOR_NO_MERGE;
out:
- if (ret)
- deadline_hot_drq_hash(dd, RQ_DATA(__rq));
*req = __rq;
return ret;
}
-static void deadline_merged_request(request_queue_t *q, struct request *req)
+static void deadline_merged_request(request_queue_t *q, struct request *req,
+ int type)
{
struct deadline_data *dd = q->elevator->elevator_data;
- struct deadline_rq *drq = RQ_DATA(req);
-
- /*
- * hash always needs to be repositioned, key is end sector
- */
- deadline_del_drq_hash(drq);
- deadline_add_drq_hash(dd, drq);
/*
* if the merge was a front merge, we need to reposition request
*/
- if (rq_rb_key(req) != drq->rb_key) {
- deadline_del_drq_rb(dd, drq);
- deadline_add_drq_rb(dd, drq);
+ if (type == ELEVATOR_FRONT_MERGE) {
+ elv_rb_del(RQ_RB_ROOT(dd, req), req);
+ deadline_add_rq_rb(dd, req);
}
}
deadline_merged_requests(request_queue_t *q, struct request *req,
struct request *next)
{
- struct deadline_data *dd = q->elevator->elevator_data;
- struct deadline_rq *drq = RQ_DATA(req);
- struct deadline_rq *dnext = RQ_DATA(next);
-
- BUG_ON(!drq);
- BUG_ON(!dnext);
-
/*
- * reposition drq (this is the merged request) in hash, and in rbtree
- * in case of a front merge
+ * if next expires before rq, assign its expire time to rq
+ * and move into next position (next will be deleted) in fifo
*/
- deadline_del_drq_hash(drq);
- deadline_add_drq_hash(dd, drq);
-
- if (rq_rb_key(req) != drq->rb_key) {
- deadline_del_drq_rb(dd, drq);
- deadline_add_drq_rb(dd, drq);
- }
-
- /*
- * if dnext expires before drq, assign its expire time to drq
- * and move into dnext position (dnext will be deleted) in fifo
- */
- if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
- if (time_before(dnext->expires, drq->expires)) {
- list_move(&drq->fifo, &dnext->fifo);
- drq->expires = dnext->expires;
+ if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
+ if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
+ list_move(&req->queuelist, &next->queuelist);
+ rq_set_fifo_time(req, rq_fifo_time(next));
}
}
* move request from sort list to dispatch queue.
*/
static inline void
-deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
{
- request_queue_t *q = drq->request->q;
+ request_queue_t *q = rq->q;
- deadline_remove_request(q, drq->request);
- elv_dispatch_add_tail(q, drq->request);
+ deadline_remove_request(q, rq);
+ elv_dispatch_add_tail(q, rq);
}
/*
* move an entry to dispatch queue
*/
static void
-deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_move_request(struct deadline_data *dd, struct request *rq)
{
- const int data_dir = rq_data_dir(drq->request);
- struct rb_node *rbnext = rb_next(&drq->rb_node);
+ const int data_dir = rq_data_dir(rq);
+ struct rb_node *rbnext = rb_next(&rq->rb_node);
- dd->next_drq[READ] = NULL;
- dd->next_drq[WRITE] = NULL;
+ dd->next_rq[READ] = NULL;
+ dd->next_rq[WRITE] = NULL;
if (rbnext)
- dd->next_drq[data_dir] = rb_entry_drq(rbnext);
+ dd->next_rq[data_dir] = rb_entry_rq(rbnext);
- dd->last_sector = drq->request->sector + drq->request->nr_sectors;
+ dd->last_sector = rq->sector + rq->nr_sectors;
/*
* take it off the sort and fifo list, move
* to dispatch queue
*/
- deadline_move_to_dispatch(dd, drq);
+ deadline_move_to_dispatch(dd, rq);
}
-#define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)
-
/*
* deadline_check_fifo returns 0 if there are no expired reads on the fifo,
* 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
*/
static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
{
- struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next);
+ struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
/*
- * drq is expired!
+ * rq is expired!
*/
- if (time_after(jiffies, drq->expires))
+ if (time_after(jiffies, rq_fifo_time(rq)))
return 1;
return 0;
struct deadline_data *dd = q->elevator->elevator_data;
const int reads = !list_empty(&dd->fifo_list[READ]);
const int writes = !list_empty(&dd->fifo_list[WRITE]);
- struct deadline_rq *drq;
+ struct request *rq;
int data_dir;
/*
* batches are currently reads XOR writes
*/
- if (dd->next_drq[WRITE])
- drq = dd->next_drq[WRITE];
+ if (dd->next_rq[WRITE])
+ rq = dd->next_rq[WRITE];
else
- drq = dd->next_drq[READ];
+ rq = dd->next_rq[READ];
- if (drq) {
+ if (rq) {
/* we have a "next request" */
- if (dd->last_sector != drq->request->sector)
+ if (dd->last_sector != rq->sector)
/* end the batch on a non sequential request */
dd->batching += dd->fifo_batch;
if (deadline_check_fifo(dd, data_dir)) {
/* An expired request exists - satisfy it */
dd->batching = 0;
- drq = list_entry_fifo(dd->fifo_list[data_dir].next);
+ rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
- } else if (dd->next_drq[data_dir]) {
+ } else if (dd->next_rq[data_dir]) {
/*
* The last req was the same dir and we have a next request in
* sort order. No expired requests so continue on from here.
*/
- drq = dd->next_drq[data_dir];
+ rq = dd->next_rq[data_dir];
} else {
+ struct rb_node *node;
/*
* The last req was the other direction or we have run out of
* higher-sectored requests. Go back to the lowest sectored
* request (1 way elevator) and start a new batch.
*/
dd->batching = 0;
- drq = deadline_find_first_drq(dd, data_dir);
+ node = rb_first(&dd->sort_list[data_dir]);
+ if (node)
+ rq = rb_entry_rq(node);
}
dispatch_request:
/*
- * drq is the selected appropriate request.
+ * rq is the selected appropriate request.
*/
dd->batching++;
- deadline_move_request(dd, drq);
+ deadline_move_request(dd, rq);
return 1;
}
&& list_empty(&dd->fifo_list[READ]);
}
-static struct request *
-deadline_former_request(request_queue_t *q, struct request *rq)
-{
- struct deadline_rq *drq = RQ_DATA(rq);
- struct rb_node *rbprev = rb_prev(&drq->rb_node);
-
- if (rbprev)
- return rb_entry_drq(rbprev)->request;
-
- return NULL;
-}
-
-static struct request *
-deadline_latter_request(request_queue_t *q, struct request *rq)
-{
- struct deadline_rq *drq = RQ_DATA(rq);
- struct rb_node *rbnext = rb_next(&drq->rb_node);
-
- if (rbnext)
- return rb_entry_drq(rbnext)->request;
-
- return NULL;
-}
-
static void deadline_exit_queue(elevator_t *e)
{
struct deadline_data *dd = e->elevator_data;
BUG_ON(!list_empty(&dd->fifo_list[READ]));
BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
- mempool_destroy(dd->drq_pool);
- kfree(dd->hash);
kfree(dd);
}
/*
- * initialize elevator private data (deadline_data), and alloc a drq for
- * each request on the free lists
+ * initialize elevator private data (deadline_data).
*/
static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
{
struct deadline_data *dd;
- int i;
-
- if (!drq_pool)
- return NULL;
dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
if (!dd)
return NULL;
memset(dd, 0, sizeof(*dd));
- dd->hash = kmalloc_node(sizeof(struct hlist_head)*DL_HASH_ENTRIES,
- GFP_KERNEL, q->node);
- if (!dd->hash) {
- kfree(dd);
- return NULL;
- }
-
- dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
- mempool_free_slab, drq_pool, q->node);
- if (!dd->drq_pool) {
- kfree(dd->hash);
- kfree(dd);
- return NULL;
- }
-
- for (i = 0; i < DL_HASH_ENTRIES; i++)
- INIT_HLIST_HEAD(&dd->hash[i]);
-
INIT_LIST_HEAD(&dd->fifo_list[READ]);
INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
dd->sort_list[READ] = RB_ROOT;
return dd;
}
-static void deadline_put_request(request_queue_t *q, struct request *rq)
-{
- struct deadline_data *dd = q->elevator->elevator_data;
- struct deadline_rq *drq = RQ_DATA(rq);
-
- mempool_free(drq, dd->drq_pool);
- rq->elevator_private = NULL;
-}
-
-static int
-deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
- gfp_t gfp_mask)
-{
- struct deadline_data *dd = q->elevator->elevator_data;
- struct deadline_rq *drq;
-
- drq = mempool_alloc(dd->drq_pool, gfp_mask);
- if (drq) {
- memset(drq, 0, sizeof(*drq));
- RB_CLEAR_NODE(&drq->rb_node);
- drq->request = rq;
-
- INIT_HLIST_NODE(&drq->hash);
-
- INIT_LIST_HEAD(&drq->fifo);
-
- rq->elevator_private = drq;
- return 0;
- }
-
- return 1;
-}
-
/*
* sysfs parts below
*/
.elevator_dispatch_fn = deadline_dispatch_requests,
.elevator_add_req_fn = deadline_add_request,
.elevator_queue_empty_fn = deadline_queue_empty,
- .elevator_former_req_fn = deadline_former_request,
- .elevator_latter_req_fn = deadline_latter_request,
- .elevator_set_req_fn = deadline_set_request,
- .elevator_put_req_fn = deadline_put_request,
+ .elevator_former_req_fn = elv_rb_former_request,
+ .elevator_latter_req_fn = elv_rb_latter_request,
.elevator_init_fn = deadline_init_queue,
.elevator_exit_fn = deadline_exit_queue,
},
static int __init deadline_init(void)
{
- int ret;
-
- drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
- 0, 0, NULL, NULL);
-
- if (!drq_pool)
- return -ENOMEM;
-
- ret = elv_register(&iosched_deadline);
- if (ret)
- kmem_cache_destroy(drq_pool);
-
- return ret;
+ return elv_register(&iosched_deadline);
}
static void __exit deadline_exit(void)
{
- kmem_cache_destroy(drq_pool);
elv_unregister(&iosched_deadline);
}
*
* Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
*
- * 30042000 Jens Axboe <axboe@suse.de> :
+ * 30042000 Jens Axboe <axboe@kernel.dk> :
*
* Split the elevator a bit so that it is possible to choose a different
* one or even write a new "plug in". There are three pieces:
#include <linux/compiler.h>
#include <linux/delay.h>
#include <linux/blktrace_api.h>
+#include <linux/hash.h>
#include <asm/uaccess.h>
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);
+/*
+ * Merge hash stuff.
+ */
+static const int elv_hash_shift = 6;
+#define ELV_HASH_BLOCK(sec) ((sec) >> 3)
+#define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
+#define ELV_HASH_ENTRIES (1 << elv_hash_shift)
+#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
+#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
+
/*
* can we safely merge with this request?
*/
/*
* same device and no special stuff set, merge is ok
*/
- if (rq->rq_disk == bio->bi_bdev->bd_disk &&
- !rq->waiting && !rq->special)
+ if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special)
return 1;
return 0;
static struct kobj_type elv_ktype;
-static elevator_t *elevator_alloc(struct elevator_type *e)
-{
- elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
- if (eq) {
- memset(eq, 0, sizeof(*eq));
- eq->ops = &e->ops;
- eq->elevator_type = e;
- kobject_init(&eq->kobj);
- snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
- eq->kobj.ktype = &elv_ktype;
- mutex_init(&eq->sysfs_lock);
- } else {
- elevator_put(e);
- }
+static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e)
+{
+ elevator_t *eq;
+ int i;
+
+ eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node);
+ if (unlikely(!eq))
+ goto err;
+
+ memset(eq, 0, sizeof(*eq));
+ eq->ops = &e->ops;
+ eq->elevator_type = e;
+ kobject_init(&eq->kobj);
+ snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
+ eq->kobj.ktype = &elv_ktype;
+ mutex_init(&eq->sysfs_lock);
+
+ eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
+ GFP_KERNEL, q->node);
+ if (!eq->hash)
+ goto err;
+
+ for (i = 0; i < ELV_HASH_ENTRIES; i++)
+ INIT_HLIST_HEAD(&eq->hash[i]);
+
return eq;
+err:
+ kfree(eq);
+ elevator_put(e);
+ return NULL;
}
static void elevator_release(struct kobject *kobj)
{
elevator_t *e = container_of(kobj, elevator_t, kobj);
+
elevator_put(e->elevator_type);
+ kfree(e->hash);
kfree(e);
}
e = elevator_get("noop");
}
- eq = elevator_alloc(e);
+ eq = elevator_alloc(q, e);
if (!eq)
return -ENOMEM;
return ret;
}
+EXPORT_SYMBOL(elevator_init);
+
void elevator_exit(elevator_t *e)
{
mutex_lock(&e->sysfs_lock);
kobject_put(&e->kobj);
}
+EXPORT_SYMBOL(elevator_exit);
+
+static inline void __elv_rqhash_del(struct request *rq)
+{
+ hlist_del_init(&rq->hash);
+}
+
+static void elv_rqhash_del(request_queue_t *q, struct request *rq)
+{
+ if (ELV_ON_HASH(rq))
+ __elv_rqhash_del(rq);
+}
+
+static void elv_rqhash_add(request_queue_t *q, struct request *rq)
+{
+ elevator_t *e = q->elevator;
+
+ BUG_ON(ELV_ON_HASH(rq));
+ hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
+}
+
+static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
+{
+ __elv_rqhash_del(rq);
+ elv_rqhash_add(q, rq);
+}
+
+static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
+{
+ elevator_t *e = q->elevator;
+ struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
+ struct hlist_node *entry, *next;
+ struct request *rq;
+
+ hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
+ BUG_ON(!ELV_ON_HASH(rq));
+
+ if (unlikely(!rq_mergeable(rq))) {
+ __elv_rqhash_del(rq);
+ continue;
+ }
+
+ if (rq_hash_key(rq) == offset)
+ return rq;
+ }
+
+ return NULL;
+}
+
+/*
+ * RB-tree support functions for inserting/lookup/removal of requests
+ * in a sorted RB tree.
+ */
+struct request *elv_rb_add(struct rb_root *root, struct request *rq)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct request *__rq;
+
+ while (*p) {
+ parent = *p;
+ __rq = rb_entry(parent, struct request, rb_node);
+
+ if (rq->sector < __rq->sector)
+ p = &(*p)->rb_left;
+ else if (rq->sector > __rq->sector)
+ p = &(*p)->rb_right;
+ else
+ return __rq;
+ }
+
+ rb_link_node(&rq->rb_node, parent, p);
+ rb_insert_color(&rq->rb_node, root);
+ return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_add);
+
+void elv_rb_del(struct rb_root *root, struct request *rq)
+{
+ BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
+ rb_erase(&rq->rb_node, root);
+ RB_CLEAR_NODE(&rq->rb_node);
+}
+
+EXPORT_SYMBOL(elv_rb_del);
+
+struct request *elv_rb_find(struct rb_root *root, sector_t sector)
+{
+ struct rb_node *n = root->rb_node;
+ struct request *rq;
+
+ while (n) {
+ rq = rb_entry(n, struct request, rb_node);
+
+ if (sector < rq->sector)
+ n = n->rb_left;
+ else if (sector > rq->sector)
+ n = n->rb_right;
+ else
+ return rq;
+ }
+
+ return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_find);
+
/*
* Insert rq into dispatch queue of q. Queue lock must be held on
- * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be
- * appended to the dispatch queue. To be used by specific elevators.
+ * entry. rq is sort insted into the dispatch queue. To be used by
+ * specific elevators.
*/
void elv_dispatch_sort(request_queue_t *q, struct request *rq)
{
if (q->last_merge == rq)
q->last_merge = NULL;
+
+ elv_rqhash_del(q, rq);
+
q->nr_sorted--;
boundary = q->end_sector;
list_for_each_prev(entry, &q->queue_head) {
struct request *pos = list_entry_rq(entry);
- if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
+ if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
break;
if (rq->sector >= boundary) {
if (pos->sector < boundary)
list_add(&rq->queuelist, entry);
}
+EXPORT_SYMBOL(elv_dispatch_sort);
+
+/*
+ * Insert rq into dispatch queue of q. Queue lock must be held on
+ * entry. rq is added to the back of the dispatch queue. To be used by
+ * specific elevators.
+ */
+void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
+{
+ if (q->last_merge == rq)
+ q->last_merge = NULL;
+
+ elv_rqhash_del(q, rq);
+
+ q->nr_sorted--;
+
+ q->end_sector = rq_end_sector(rq);
+ q->boundary_rq = rq;
+ list_add_tail(&rq->queuelist, &q->queue_head);
+}
+
+EXPORT_SYMBOL(elv_dispatch_add_tail);
+
int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
{
elevator_t *e = q->elevator;
+ struct request *__rq;
int ret;
+ /*
+ * First try one-hit cache.
+ */
if (q->last_merge) {
ret = elv_try_merge(q->last_merge, bio);
if (ret != ELEVATOR_NO_MERGE) {
}
}
+ /*
+ * See if our hash lookup can find a potential backmerge.
+ */
+ __rq = elv_rqhash_find(q, bio->bi_sector);
+ if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ *req = __rq;
+ return ELEVATOR_BACK_MERGE;
+ }
+
if (e->ops->elevator_merge_fn)
return e->ops->elevator_merge_fn(q, req, bio);
return ELEVATOR_NO_MERGE;
}
-void elv_merged_request(request_queue_t *q, struct request *rq)
+void elv_merged_request(request_queue_t *q, struct request *rq, int type)
{
elevator_t *e = q->elevator;
if (e->ops->elevator_merged_fn)
- e->ops->elevator_merged_fn(q, rq);
+ e->ops->elevator_merged_fn(q, rq, type);
+
+ if (type == ELEVATOR_BACK_MERGE)
+ elv_rqhash_reposition(q, rq);
q->last_merge = rq;
}
if (e->ops->elevator_merge_req_fn)
e->ops->elevator_merge_req_fn(q, rq, next);
- q->nr_sorted--;
+ elv_rqhash_reposition(q, rq);
+ elv_rqhash_del(q, next);
+
+ q->nr_sorted--;
q->last_merge = rq;
}
e->ops->elevator_deactivate_req_fn(q, rq);
}
- rq->flags &= ~REQ_STARTED;
+ rq->cmd_flags &= ~REQ_STARTED;
elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
}
switch (where) {
case ELEVATOR_INSERT_FRONT:
- rq->flags |= REQ_SOFTBARRIER;
+ rq->cmd_flags |= REQ_SOFTBARRIER;
list_add(&rq->queuelist, &q->queue_head);
break;
case ELEVATOR_INSERT_BACK:
- rq->flags |= REQ_SOFTBARRIER;
+ rq->cmd_flags |= REQ_SOFTBARRIER;
elv_drain_elevator(q);
list_add_tail(&rq->queuelist, &q->queue_head);
/*
case ELEVATOR_INSERT_SORT:
BUG_ON(!blk_fs_request(rq));
- rq->flags |= REQ_SORTED;
+ rq->cmd_flags |= REQ_SORTED;
q->nr_sorted++;
- if (q->last_merge == NULL && rq_mergeable(rq))
- q->last_merge = rq;
+ if (rq_mergeable(rq)) {
+ elv_rqhash_add(q, rq);
+ if (!q->last_merge)
+ q->last_merge = rq;
+ }
+
/*
* Some ioscheds (cfq) run q->request_fn directly, so
* rq cannot be accessed after calling
* insertion; otherwise, requests should be requeued
* in ordseq order.
*/
- rq->flags |= REQ_SOFTBARRIER;
+ rq->cmd_flags |= REQ_SOFTBARRIER;
if (q->ordseq == 0) {
list_add(&rq->queuelist, &q->queue_head);
int plug)
{
if (q->ordcolor)
- rq->flags |= REQ_ORDERED_COLOR;
+ rq->cmd_flags |= REQ_ORDERED_COLOR;
- if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+ if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
/*
* toggle ordered color
*/
q->end_sector = rq_end_sector(rq);
q->boundary_rq = rq;
}
- } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
+ } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
where = ELEVATOR_INSERT_BACK;
if (plug)
elv_insert(q, rq, where);
}
+EXPORT_SYMBOL(__elv_add_request);
+
void elv_add_request(request_queue_t *q, struct request *rq, int where,
int plug)
{
spin_unlock_irqrestore(q->queue_lock, flags);
}
+EXPORT_SYMBOL(elv_add_request);
+
static inline struct request *__elv_next_request(request_queue_t *q)
{
struct request *rq;
int ret;
while ((rq = __elv_next_request(q)) != NULL) {
- if (!(rq->flags & REQ_STARTED)) {
+ if (!(rq->cmd_flags & REQ_STARTED)) {
elevator_t *e = q->elevator;
/*
* it, a request that has been delayed should
* not be passed by new incoming requests
*/
- rq->flags |= REQ_STARTED;
+ rq->cmd_flags |= REQ_STARTED;
blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
}
q->boundary_rq = NULL;
}
- if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
+ if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
break;
ret = q->prep_rq_fn(q, rq);
nr_bytes = rq->data_len;
blkdev_dequeue_request(rq);
- rq->flags |= REQ_QUIET;
+ rq->cmd_flags |= REQ_QUIET;
end_that_request_chunk(rq, 0, nr_bytes);
end_that_request_last(rq, 0);
} else {
return rq;
}
+EXPORT_SYMBOL(elv_next_request);
+
void elv_dequeue_request(request_queue_t *q, struct request *rq)
{
BUG_ON(list_empty(&rq->queuelist));
+ BUG_ON(ELV_ON_HASH(rq));
list_del_init(&rq->queuelist);
q->in_flight++;
}
+EXPORT_SYMBOL(elv_dequeue_request);
+
int elv_queue_empty(request_queue_t *q)
{
elevator_t *e = q->elevator;
return 1;
}
+EXPORT_SYMBOL(elv_queue_empty);
+
struct request *elv_latter_request(request_queue_t *q, struct request *rq)
{
elevator_t *e = q->elevator;
return NULL;
}
-int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
- gfp_t gfp_mask)
+int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
{
elevator_t *e = q->elevator;
if (e->ops->elevator_set_req_fn)
- return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
+ return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
rq->elevator_private = NULL;
return 0;
e->ops->elevator_put_req_fn(q, rq);
}
-int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
+int elv_may_queue(request_queue_t *q, int rw)
{
elevator_t *e = q->elevator;
if (e->ops->elevator_may_queue_fn)
- return e->ops->elevator_may_queue_fn(q, rw, bio);
+ return e->ops->elevator_may_queue_fn(q, rw);
return ELV_MQUEUE_MAY;
}
/*
* Allocate new elevator
*/
- e = elevator_alloc(new_e);
+ e = elevator_alloc(q, new_e);
if (!e)
return 0;
return len;
}
-EXPORT_SYMBOL(elv_dispatch_sort);
-EXPORT_SYMBOL(elv_add_request);
-EXPORT_SYMBOL(__elv_add_request);
-EXPORT_SYMBOL(elv_next_request);
-EXPORT_SYMBOL(elv_dequeue_request);
-EXPORT_SYMBOL(elv_queue_empty);
-EXPORT_SYMBOL(elevator_exit);
-EXPORT_SYMBOL(elevator_init);
+struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
+{
+ struct rb_node *rbprev = rb_prev(&rq->rb_node);
+
+ if (rbprev)
+ return rb_entry_rq(rbprev);
+
+ return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_former_request);
+
+struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
+{
+ struct rb_node *rbnext = rb_next(&rq->rb_node);
+
+ if (rbnext)
+ return rb_entry_rq(rbnext);
+
+ return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_latter_request);
static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
static void init_request_from_bio(struct request *req, struct bio *bio);
static int __make_request(request_queue_t *q, struct bio *bio);
+static struct io_context *current_io_context(gfp_t gfp_flags, int node);
/*
* For the allocated request tables
EXPORT_SYMBOL(blk_queue_make_request);
-static inline void rq_init(request_queue_t *q, struct request *rq)
+static void rq_init(request_queue_t *q, struct request *rq)
{
INIT_LIST_HEAD(&rq->queuelist);
INIT_LIST_HEAD(&rq->donelist);
rq->errors = 0;
- rq->rq_status = RQ_ACTIVE;
rq->bio = rq->biotail = NULL;
+ INIT_HLIST_NODE(&rq->hash);
+ RB_CLEAR_NODE(&rq->rb_node);
rq->ioprio = 0;
rq->buffer = NULL;
rq->ref_count = 1;
rq->q = q;
- rq->waiting = NULL;
rq->special = NULL;
rq->data_len = 0;
rq->data = NULL;
if (rq == &q->post_flush_rq)
return QUEUE_ORDSEQ_POSTFLUSH;
- if ((rq->flags & REQ_ORDERED_COLOR) ==
- (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
+ if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
+ (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
return QUEUE_ORDSEQ_DRAIN;
else
return QUEUE_ORDSEQ_DONE;
end_io = post_flush_end_io;
}
+ rq->cmd_flags = REQ_HARDBARRIER;
rq_init(q, rq);
- rq->flags = REQ_HARDBARRIER;
rq->elevator_private = NULL;
+ rq->elevator_private2 = NULL;
rq->rq_disk = q->bar_rq.rq_disk;
- rq->rl = NULL;
rq->end_io = end_io;
q->prepare_flush_fn(q, rq);
blkdev_dequeue_request(rq);
q->orig_bar_rq = rq;
rq = &q->bar_rq;
+ rq->cmd_flags = 0;
rq_init(q, rq);
- rq->flags = bio_data_dir(q->orig_bar_rq->bio);
- rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
+ if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
+ rq->cmd_flags |= REQ_RW;
+ rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
rq->elevator_private = NULL;
- rq->rl = NULL;
+ rq->elevator_private2 = NULL;
init_request_from_bio(rq, q->orig_bar_rq->bio);
rq->end_io = bar_end_io;
return 0;
}
-static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
- unsigned int nbytes, int error)
+static int ordered_bio_endio(struct request *rq, struct bio *bio,
+ unsigned int nbytes, int error)
{
request_queue_t *q = rq->q;
bio_end_io_t *endio;
}
list_del_init(&rq->queuelist);
- rq->flags &= ~REQ_QUEUED;
+ rq->cmd_flags &= ~REQ_QUEUED;
rq->tag = -1;
if (unlikely(bqt->tag_index[tag] == NULL))
struct blk_queue_tag *bqt = q->queue_tags;
int tag;
- if (unlikely((rq->flags & REQ_QUEUED))) {
+ if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
printk(KERN_ERR
"%s: request %p for device [%s] already tagged %d",
__FUNCTION__, rq,
BUG();
}
- tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
- if (tag >= bqt->max_depth)
- return 1;
+ /*
+ * Protect against shared tag maps, as we may not have exclusive
+ * access to the tag map.
+ */
+ do {
+ tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
+ if (tag >= bqt->max_depth)
+ return 1;
- __set_bit(tag, bqt->tag_map);
+ } while (test_and_set_bit(tag, bqt->tag_map));
- rq->flags |= REQ_QUEUED;
+ rq->cmd_flags |= REQ_QUEUED;
rq->tag = tag;
bqt->tag_index[tag] = rq;
blkdev_dequeue_request(rq);
printk(KERN_ERR
"%s: bad tag found on list\n", __FUNCTION__);
list_del_init(&rq->queuelist);
- rq->flags &= ~REQ_QUEUED;
+ rq->cmd_flags &= ~REQ_QUEUED;
} else
blk_queue_end_tag(q, rq);
- rq->flags &= ~REQ_STARTED;
+ rq->cmd_flags &= ~REQ_STARTED;
__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
}
}
EXPORT_SYMBOL(blk_queue_invalidate_tags);
-static const char * const rq_flags[] = {
- "REQ_RW",
- "REQ_FAILFAST",
- "REQ_SORTED",
- "REQ_SOFTBARRIER",
- "REQ_HARDBARRIER",
- "REQ_FUA",
- "REQ_CMD",
- "REQ_NOMERGE",
- "REQ_STARTED",
- "REQ_DONTPREP",
- "REQ_QUEUED",
- "REQ_ELVPRIV",
- "REQ_PC",
- "REQ_BLOCK_PC",
- "REQ_SENSE",
- "REQ_FAILED",
- "REQ_QUIET",
- "REQ_SPECIAL",
- "REQ_DRIVE_CMD",
- "REQ_DRIVE_TASK",
- "REQ_DRIVE_TASKFILE",
- "REQ_PREEMPT",
- "REQ_PM_SUSPEND",
- "REQ_PM_RESUME",
- "REQ_PM_SHUTDOWN",
- "REQ_ORDERED_COLOR",
-};
-
void blk_dump_rq_flags(struct request *rq, char *msg)
{
int bit;
- printk("%s: dev %s: flags = ", msg,
- rq->rq_disk ? rq->rq_disk->disk_name : "?");
- bit = 0;
- do {
- if (rq->flags & (1 << bit))
- printk("%s ", rq_flags[bit]);
- bit++;
- } while (bit < __REQ_NR_BITS);
+ printk("%s: dev %s: type=%x, flags=%x\n", msg,
+ rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
+ rq->cmd_flags);
printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
rq->nr_sectors,
rq->current_nr_sectors);
printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
- if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
+ if (blk_pc_request(rq)) {
printk("cdb: ");
for (bit = 0; bit < sizeof(rq->cmd); bit++)
printk("%02x ", rq->cmd[bit]);
int nr_phys_segs = bio_phys_segments(q, bio);
if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
- req->flags |= REQ_NOMERGE;
+ req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
return 0;
if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
|| req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
- req->flags |= REQ_NOMERGE;
+ req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
return 0;
max_sectors = q->max_sectors;
if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
- req->flags |= REQ_NOMERGE;
+ req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
return 0;
if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
- req->flags |= REQ_NOMERGE;
+ req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
return 0;
static inline void blk_free_request(request_queue_t *q, struct request *rq)
{
- if (rq->flags & REQ_ELVPRIV)
+ if (rq->cmd_flags & REQ_ELVPRIV)
elv_put_request(q, rq);
mempool_free(rq, q->rq.rq_pool);
}
-static inline struct request *
-blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
- int priv, gfp_t gfp_mask)
+static struct request *
+blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask)
{
struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
return NULL;
/*
- * first three bits are identical in rq->flags and bio->bi_rw,
+ * first three bits are identical in rq->cmd_flags and bio->bi_rw,
* see bio.h and blkdev.h
*/
- rq->flags = rw;
+ rq->cmd_flags = rw | REQ_ALLOCED;
if (priv) {
- if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
+ if (unlikely(elv_set_request(q, rq, gfp_mask))) {
mempool_free(rq, q->rq.rq_pool);
return NULL;
}
- rq->flags |= REQ_ELVPRIV;
+ rq->cmd_flags |= REQ_ELVPRIV;
}
return rq;
struct io_context *ioc = NULL;
int may_queue, priv;
- may_queue = elv_may_queue(q, rw, bio);
+ may_queue = elv_may_queue(q, rw);
if (may_queue == ELV_MQUEUE_NO)
goto rq_starved;
if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
if (rl->count[rw]+1 >= q->nr_requests) {
- ioc = current_io_context(GFP_ATOMIC);
+ ioc = current_io_context(GFP_ATOMIC, q->node);
/*
* The queue will fill after this allocation, so set
* it as full, and mark this process as "batching".
spin_unlock_irq(q->queue_lock);
- rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
+ rq = blk_alloc_request(q, rw, priv, gfp_mask);
if (unlikely(!rq)) {
/*
* Allocation failed presumably due to memory. Undo anything
ioc->nr_batch_requests--;
rq_init(q, rq);
- rq->rl = rl;
blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
out:
* up to a big batch of them for a small period time.
* See ioc_batching, ioc_set_batching
*/
- ioc = current_io_context(GFP_NOIO);
+ ioc = current_io_context(GFP_NOIO, q->node);
ioc_set_batching(q, ioc);
spin_lock_irq(q->queue_lock);
}
EXPORT_SYMBOL(blk_get_request);
+/**
+ * blk_start_queueing - initiate dispatch of requests to device
+ * @q: request queue to kick into gear
+ *
+ * This is basically a helper to remove the need to know whether a queue
+ * is plugged or not if someone just wants to initiate dispatch of requests
+ * for this queue.
+ *
+ * The queue lock must be held with interrupts disabled.
+ */
+void blk_start_queueing(request_queue_t *q)
+{
+ if (!blk_queue_plugged(q))
+ q->request_fn(q);
+ else
+ __generic_unplug_device(q);
+}
+EXPORT_SYMBOL(blk_start_queueing);
+
/**
* blk_requeue_request - put a request back on queue
* @q: request queue where request should be inserted
* must not attempt merges on this) and that it acts as a soft
* barrier
*/
- rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;
+ rq->cmd_type = REQ_TYPE_SPECIAL;
+ rq->cmd_flags |= REQ_SOFTBARRIER;
rq->special = data;
drive_stat_acct(rq, rq->nr_sectors, 1);
__elv_add_request(q, rq, where, 0);
-
- if (blk_queue_plugged(q))
- __generic_unplug_device(q);
- else
- q->request_fn(q);
+ blk_start_queueing(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
rq->rq_disk = bd_disk;
- rq->flags |= REQ_NOMERGE;
+ rq->cmd_flags |= REQ_NOMERGE;
rq->end_io = done;
WARN_ON(irqs_disabled());
spin_lock_irq(q->queue_lock);
rq->sense_len = 0;
}
- rq->waiting = &wait;
+ rq->end_io_data = &wait;
blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
wait_for_completion(&wait);
- rq->waiting = NULL;
if (rq->errors)
err = -EIO;
*/
void __blk_put_request(request_queue_t *q, struct request *req)
{
- struct request_list *rl = req->rl;
-
if (unlikely(!q))
return;
if (unlikely(--req->ref_count))
elv_completed_request(q, req);
- req->rq_status = RQ_INACTIVE;
- req->rl = NULL;
-
/*
* Request may not have originated from ll_rw_blk. if not,
* it didn't come out of our reserved rq pools
*/
- if (rl) {
+ if (req->cmd_flags & REQ_ALLOCED) {
int rw = rq_data_dir(req);
- int priv = req->flags & REQ_ELVPRIV;
+ int priv = req->cmd_flags & REQ_ELVPRIV;
BUG_ON(!list_empty(&req->queuelist));
+ BUG_ON(!hlist_unhashed(&req->hash));
blk_free_request(q, req);
freed_request(q, rw, priv);
*/
void blk_end_sync_rq(struct request *rq, int error)
{
- struct completion *waiting = rq->waiting;
+ struct completion *waiting = rq->end_io_data;
- rq->waiting = NULL;
+ rq->end_io_data = NULL;
__blk_put_request(rq->q, rq);
/*
if (rq_data_dir(req) != rq_data_dir(next)
|| req->rq_disk != next->rq_disk
- || next->waiting || next->special)
+ || next->special)
return 0;
/*
static void init_request_from_bio(struct request *req, struct bio *bio)
{
- req->flags |= REQ_CMD;
+ req->cmd_type = REQ_TYPE_FS;
/*
* inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
*/
if (bio_rw_ahead(bio) || bio_failfast(bio))
- req->flags |= REQ_FAILFAST;
+ req->cmd_flags |= REQ_FAILFAST;
/*
* REQ_BARRIER implies no merging, but lets make it explicit
*/
if (unlikely(bio_barrier(bio)))
- req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
+ req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
if (bio_sync(bio))
- req->flags |= REQ_RW_SYNC;
+ req->cmd_flags |= REQ_RW_SYNC;
+ if (bio_rw_meta(bio))
+ req->cmd_flags |= REQ_RW_META;
req->errors = 0;
req->hard_sector = req->sector = bio->bi_sector;
req->nr_phys_segments = bio_phys_segments(req->q, bio);
req->nr_hw_segments = bio_hw_segments(req->q, bio);
req->buffer = bio_data(bio); /* see ->buffer comment above */
- req->waiting = NULL;
req->bio = req->biotail = bio;
req->ioprio = bio_prio(bio);
req->rq_disk = bio->bi_bdev->bd_disk;
static int __make_request(request_queue_t *q, struct bio *bio)
{
struct request *req;
- int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
- unsigned short prio;
- sector_t sector;
+ int el_ret, nr_sectors, barrier, err;
+ const unsigned short prio = bio_prio(bio);
+ const int sync = bio_sync(bio);
- sector = bio->bi_sector;
nr_sectors = bio_sectors(bio);
- cur_nr_sectors = bio_cur_sectors(bio);
- prio = bio_prio(bio);
-
- rw = bio_data_dir(bio);
- sync = bio_sync(bio);
/*
* low level driver can indicate that it wants pages above a
*/
blk_queue_bounce(q, &bio);
- spin_lock_prefetch(q->queue_lock);
-
barrier = bio_barrier(bio);
if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
err = -EOPNOTSUPP;
req->ioprio = ioprio_best(req->ioprio, prio);
drive_stat_acct(req, nr_sectors, 0);
if (!attempt_back_merge(q, req))
- elv_merged_request(q, req);
+ elv_merged_request(q, req, el_ret);
goto out;
case ELEVATOR_FRONT_MERGE:
* not touch req->buffer either...
*/
req->buffer = bio_data(bio);
- req->current_nr_sectors = cur_nr_sectors;
- req->hard_cur_sectors = cur_nr_sectors;
- req->sector = req->hard_sector = sector;
+ req->current_nr_sectors = bio_cur_sectors(bio);
+ req->hard_cur_sectors = req->current_nr_sectors;
+ req->sector = req->hard_sector = bio->bi_sector;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
req->ioprio = ioprio_best(req->ioprio, prio);
drive_stat_acct(req, nr_sectors, 0);
if (!attempt_front_merge(q, req))
- elv_merged_request(q, req);
+ elv_merged_request(q, req, el_ret);
goto out;
/* ELV_NO_MERGE: elevator says don't/can't merge. */
* Grab a free request. This is might sleep but can not fail.
* Returns with the queue unlocked.
*/
- req = get_request_wait(q, rw, bio);
+ req = get_request_wait(q, bio_data_dir(bio), bio);
/*
* After dropping the lock and possibly sleeping here, our request
req->errors = 0;
if (!uptodate) {
- if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
+ if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
printk("end_request: I/O error, dev %s, sector %llu\n",
req->rq_disk ? req->rq_disk->disk_name : "?",
(unsigned long long)req->sector);
void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
{
- /* first two bits are identical in rq->flags and bio->bi_rw */
- rq->flags |= (bio->bi_rw & 3);
+ /* first two bits are identical in rq->cmd_flags and bio->bi_rw */
+ rq->cmd_flags |= (bio->bi_rw & 3);
rq->nr_phys_segments = bio_phys_segments(q, bio);
rq->nr_hw_segments = bio_hw_segments(q, bio);
/* Called by the exitting task */
void exit_io_context(void)
{
- unsigned long flags;
struct io_context *ioc;
struct cfq_io_context *cic;
- local_irq_save(flags);
task_lock(current);
ioc = current->io_context;
current->io_context = NULL;
- ioc->task = NULL;
task_unlock(current);
- local_irq_restore(flags);
+ ioc->task = NULL;
if (ioc->aic && ioc->aic->exit)
ioc->aic->exit(ioc->aic);
if (ioc->cic_root.rb_node != NULL) {
cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
cic->exit(ioc);
}
-
+
put_io_context(ioc);
}
* but since the current task itself holds a reference, the context can be
* used in general code, so long as it stays within `current` context.
*/
-struct io_context *current_io_context(gfp_t gfp_flags)
+static struct io_context *current_io_context(gfp_t gfp_flags, int node)
{
struct task_struct *tsk = current;
struct io_context *ret;
if (likely(ret))
return ret;
- ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
+ ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
if (ret) {
atomic_set(&ret->refcount, 1);
ret->task = current;
- ret->set_ioprio = NULL;
+ ret->ioprio_changed = 0;
ret->last_waited = jiffies; /* doesn't matter... */
ret->nr_batch_requests = 0; /* because this is 0 */
ret->aic = NULL;
*
* This is always called in the context of the task which submitted the I/O.
*/
-struct io_context *get_io_context(gfp_t gfp_flags)
+struct io_context *get_io_context(gfp_t gfp_flags, int node)
{
struct io_context *ret;
- ret = current_io_context(gfp_flags);
+ ret = current_io_context(gfp_flags, node);
if (likely(ret))
atomic_inc(&ret->refcount);
return ret;
ssize_t ret = queue_var_store(&ra_kb, page, count);
spin_lock_irq(q->queue_lock);
- if (ra_kb > (q->max_sectors >> 1))
- ra_kb = (q->max_sectors >> 1);
-
q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
spin_unlock_irq(q->queue_lock);
{
struct noop_data *nd;
- nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+ nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
if (!nd)
return NULL;
INIT_LIST_HEAD(&nd->queue);
rq->sense = sense;
rq->sense_len = 0;
- rq->flags |= REQ_BLOCK_PC;
+ rq->cmd_type = REQ_TYPE_BLOCK_PC;
bio = rq->bio;
/*
memset(sense, 0, sizeof(sense));
rq->sense = sense;
rq->sense_len = 0;
- rq->flags |= REQ_BLOCK_PC;
+ rq->cmd_type = REQ_TYPE_BLOCK_PC;
blk_execute_rq(q, disk, rq, 0);
int err;
rq = blk_get_request(q, WRITE, __GFP_WAIT);
- rq->flags |= REQ_BLOCK_PC;
+ rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->data = NULL;
rq->data_len = 0;
rq->timeout = BLK_DEFAULT_TIMEOUT;
Command->DmaDirection = PCI_DMA_TODEVICE;
Command->CommandType = DAC960_WriteCommand;
}
- Command->Completion = Request->waiting;
+ Command->Completion = Request->end_io_data;
Command->LogicalDriveNumber = (long)Request->rq_disk->private_data;
Command->BlockNumber = Request->sector;
Command->BlockCount = Request->nr_sectors;
# Block device driver configuration
#
+if BLOCK
+
menu "Block devices"
config BLK_DEV_FD
devices like the Coraid EtherDrive (R) Storage Blade.
endmenu
+
+endif
int nr_sectors = bio_sectors(bio);
bio->bi_next = NULL;
- blk_finished_io(len);
bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
bio = xbh;
}
xbh = bio->bi_next;
bio->bi_next = NULL;
- blk_finished_io(nr_sectors);
bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO);
bio = xbh;
if (usage_count == 0) {
printk("warning: usage count=0, current_req=%p exiting\n",
current_req);
- printk("sect=%ld flags=%lx\n", (long)current_req->sector,
- current_req->flags);
+ printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector,
+ current_req->cmd_type, current_req->cmd_flags);
return;
}
if (test_bit(0, &fdc_busy)) {
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/loop.h>
+#include <linux/compat.h>
#include <linux/suspend.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h> /* for invalidate_bdev() */
return err;
}
+#ifdef CONFIG_COMPAT
+struct compat_loop_info {
+ compat_int_t lo_number; /* ioctl r/o */
+ compat_dev_t lo_device; /* ioctl r/o */
+ compat_ulong_t lo_inode; /* ioctl r/o */
+ compat_dev_t lo_rdevice; /* ioctl r/o */
+ compat_int_t lo_offset;
+ compat_int_t lo_encrypt_type;
+ compat_int_t lo_encrypt_key_size; /* ioctl w/o */
+ compat_int_t lo_flags; /* ioctl r/o */
+ char lo_name[LO_NAME_SIZE];
+ unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
+ compat_ulong_t lo_init[2];
+ char reserved[4];
+};
+
+/*
+ * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
+ * - noinlined to reduce stack space usage in main part of driver
+ */
+static noinline int
+loop_info64_from_compat(const struct compat_loop_info *arg,
+ struct loop_info64 *info64)
+{
+ struct compat_loop_info info;
+
+ if (copy_from_user(&info, arg, sizeof(info)))
+ return -EFAULT;
+
+ memset(info64, 0, sizeof(*info64));
+ info64->lo_number = info.lo_number;
+ info64->lo_device = info.lo_device;
+ info64->lo_inode = info.lo_inode;
+ info64->lo_rdevice = info.lo_rdevice;
+ info64->lo_offset = info.lo_offset;
+ info64->lo_sizelimit = 0;
+ info64->lo_encrypt_type = info.lo_encrypt_type;
+ info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
+ info64->lo_flags = info.lo_flags;
+ info64->lo_init[0] = info.lo_init[0];
+ info64->lo_init[1] = info.lo_init[1];
+ if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+ memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
+ else
+ memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
+ memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
+ return 0;
+}
+
+/*
+ * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
+ * - noinlined to reduce stack space usage in main part of driver
+ */
+static noinline int
+loop_info64_to_compat(const struct loop_info64 *info64,
+ struct compat_loop_info __user *arg)
+{
+ struct compat_loop_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.lo_number = info64->lo_number;
+ info.lo_device = info64->lo_device;
+ info.lo_inode = info64->lo_inode;
+ info.lo_rdevice = info64->lo_rdevice;
+ info.lo_offset = info64->lo_offset;
+ info.lo_encrypt_type = info64->lo_encrypt_type;
+ info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
+ info.lo_flags = info64->lo_flags;
+ info.lo_init[0] = info64->lo_init[0];
+ info.lo_init[1] = info64->lo_init[1];
+ if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+ memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
+ else
+ memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
+ memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
+
+ /* error in case values were truncated */
+ if (info.lo_device != info64->lo_device ||
+ info.lo_rdevice != info64->lo_rdevice ||
+ info.lo_inode != info64->lo_inode ||
+ info.lo_offset != info64->lo_offset ||
+ info.lo_init[0] != info64->lo_init[0] ||
+ info.lo_init[1] != info64->lo_init[1])
+ return -EOVERFLOW;
+
+ if (copy_to_user(arg, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+}
+
+static int
+loop_set_status_compat(struct loop_device *lo,
+ const struct compat_loop_info __user *arg)
+{
+ struct loop_info64 info64;
+ int ret;
+
+ ret = loop_info64_from_compat(arg, &info64);
+ if (ret < 0)
+ return ret;
+ return loop_set_status(lo, &info64);
+}
+
+static int
+loop_get_status_compat(struct loop_device *lo,
+ struct compat_loop_info __user *arg)
+{
+ struct loop_info64 info64;
+ int err = 0;
+
+ if (!arg)
+ err = -EINVAL;
+ if (!err)
+ err = loop_get_status(lo, &info64);
+ if (!err)
+ err = loop_info64_to_compat(&info64, arg);
+ return err;
+}
+
+static long lo_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
+ int err;
+
+ lock_kernel();
+ switch(cmd) {
+ case LOOP_SET_STATUS:
+ mutex_lock(&lo->lo_ctl_mutex);
+ err = loop_set_status_compat(
+ lo, (const struct compat_loop_info __user *) arg);
+ mutex_unlock(&lo->lo_ctl_mutex);
+ break;
+ case LOOP_GET_STATUS:
+ mutex_lock(&lo->lo_ctl_mutex);
+ err = loop_get_status_compat(
+ lo, (struct compat_loop_info __user *) arg);
+ mutex_unlock(&lo->lo_ctl_mutex);
+ break;
+ case LOOP_CLR_FD:
+ case LOOP_GET_STATUS64:
+ case LOOP_SET_STATUS64:
+ arg = (unsigned long) compat_ptr(arg);
+ case LOOP_SET_FD:
+ case LOOP_CHANGE_FD:
+ err = lo_ioctl(inode, file, cmd, arg);
+ break;
+ default:
+ err = -ENOIOCTLCMD;
+ break;
+ }
+ unlock_kernel();
+ return err;
+}
+#endif
+
static int lo_open(struct inode *inode, struct file *file)
{
struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
.open = lo_open,
.release = lo_release,
.ioctl = lo_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = lo_compat_ioctl,
+#endif
};
/*
struct nbd_device *lo;
blkdev_dequeue_request(req);
- dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n",
- req->rq_disk->disk_name, req, req->flags);
+ dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
+ req->rq_disk->disk_name, req, req->cmd_type);
- if (!(req->flags & REQ_CMD))
+ if (!blk_fs_request(req))
goto error_out;
lo = req->rq_disk->private_data;
switch (cmd) {
case NBD_DISCONNECT:
printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
- sreq.flags = REQ_SPECIAL;
+ sreq.cmd_type = REQ_TYPE_SPECIAL;
nbd_cmd(&sreq) = NBD_CMD_DISC;
/*
* Set these to sane values in case server implementation
static enum action do_pd_io_start(void)
{
- if (pd_req->flags & REQ_SPECIAL) {
+ if (blk_special_request(pd_req)) {
phase = pd_special;
return pd_special();
}
memset(&rq, 0, sizeof(rq));
rq.errors = 0;
- rq.rq_status = RQ_ACTIVE;
rq.rq_disk = disk->gd;
rq.ref_count = 1;
- rq.waiting = &wait;
+ rq.end_io_data = &wait;
rq.end_io = blk_end_sync_rq;
blk_insert_request(disk->gd->queue, &rq, 0, func);
wait_for_completion(&wait);
- rq.waiting = NULL;
if (rq.errors)
err = -EIO;
blk_put_request(&rq);
rq->sense = sense;
memset(sense, 0, sizeof(sense));
rq->sense_len = 0;
- rq->flags |= REQ_BLOCK_PC | REQ_HARDBARRIER;
+ rq->cmd_type = REQ_TYPE_BLOCK_PC;
+ rq->cmd_flags |= REQ_HARDBARRIER;
if (cgc->quiet)
- rq->flags |= REQ_QUIET;
+ rq->cmd_flags |= REQ_QUIET;
memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
rq->ref_count++;
- rq->flags |= REQ_NOMERGE;
- rq->waiting = &wait;
+ rq->end_io_data = &wait;
rq->end_io = blk_end_sync_rq;
elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
generic_unplug_device(q);
printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
req->rq_disk->disk_name, req->cmd,
(long)req->sector, req->nr_sectors, req->buffer);
- printk(" rq_status=%d errors=%d current_nr_sectors=%ld\n",
- req->rq_status, req->errors, req->current_nr_sectors);
+ printk(" errors=%d current_nr_sectors=%ld\n",
+ req->errors, req->current_nr_sectors);
#endif
if (req->sector < 0 || req->sector >= fs->total_secs) {
printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
CURRENT->rq_disk->disk_name, CURRENT->cmd,
CURRENT->sector, CURRENT->nr_sectors, CURRENT->buffer);
- printk(" rq_status=%d errors=%d current_nr_sectors=%ld\n",
- CURRENT->rq_status, CURRENT->errors, CURRENT->current_nr_sectors);
+ printk(" errors=%d current_nr_sectors=%ld\n",
+ CURRENT->errors, CURRENT->current_nr_sectors);
#endif
if (CURRENT->sector < 0 || CURRENT->sector >= fs->total_secs) {
int res = 0;
int retry;
- if (!(req->flags & REQ_CMD)) {
+ if (!blk_fs_request(req)) {
end_request(req, 0);
continue;
}
#
menu "Old CD-ROM drivers (not SCSI, not IDE)"
- depends on ISA
+ depends on ISA && BLOCK
config CD_NO_IDESCSI
bool "Support non-SCSI/IDE/ATAPI CDROM drives"
rq->cmd[9] = 0xf8;
rq->cmd_len = 12;
- rq->flags |= REQ_BLOCK_PC;
+ rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->timeout = 60 * HZ;
bio = rq->bio;
}
/* WTF??? */
- if (!(req->flags & REQ_CMD))
+ if (!blk_fs_request(req)) {
+ end_request(req, 0);
continue;
+ }
if (rq_data_dir(req) == WRITE) {
end_request(req, 0);
continue;
config RAW_DRIVER
tristate "RAW driver (/dev/raw/rawN) (OBSOLETE)"
+ depends on BLOCK
help
The raw driver permits block devices to be bound to /dev/raw/rawN.
Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O.
add_timer_randomness(irq_timer_state[irq], 0x100 + irq);
}
+#ifdef CONFIG_BLOCK
void add_disk_randomness(struct gendisk *disk)
{
if (!disk || !disk->random)
}
EXPORT_SYMBOL(add_disk_randomness);
+#endif
#define EXTRACT_SIZE 10
}
}
+#ifdef CONFIG_BLOCK
void rand_initialize_disk(struct gendisk *disk)
{
struct timer_rand_state *state;
disk->random = state;
}
}
+#endif
static ssize_t
random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
*/
fc->rst_pkt->device->host->eh_action = &sem;
- fc->rst_pkt->request->rq_status = RQ_SCSI_BUSY;
fc->rst_pkt->done = fcp_scsi_reset_done;
# Andre Hedrick <andre@linux-ide.org>
#
+if BLOCK
+
menu "ATA/ATAPI/MFM/RLL support"
config IDE
endif
endmenu
+
+endif
{
int log = 0;
- if (!sense || !rq || (rq->flags & REQ_QUIET))
+ if (!sense || !rq || (rq->cmd_flags & REQ_QUIET))
return 0;
switch (sense->sense_key) {
struct cdrom_info *cd = drive->driver_data;
ide_init_drive_cmd(rq);
- rq->flags = REQ_PC;
+ rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->rq_disk = cd->disk;
}
rq->cmd[0] = GPCMD_REQUEST_SENSE;
rq->cmd[4] = rq->data_len = 18;
- rq->flags = REQ_SENSE;
+ rq->cmd_type = REQ_TYPE_SENSE;
/* NOTE! Save the failed command in "rq->buffer" */
rq->buffer = (void *) failed_command;
struct request *rq = HWGROUP(drive)->rq;
int nsectors = rq->hard_cur_sectors;
- if ((rq->flags & REQ_SENSE) && uptodate) {
+ if (blk_sense_request(rq) && uptodate) {
/*
- * For REQ_SENSE, "rq->buffer" points to the original failed
- * request
+ * For REQ_TYPE_SENSE, "rq->buffer" points to the original
+ * failed request
*/
struct request *failed = (struct request *) rq->buffer;
struct cdrom_info *info = drive->driver_data;
return 1;
}
- if (rq->flags & REQ_SENSE) {
+ if (blk_sense_request(rq)) {
/* We got an error trying to get sense info
from the drive (probably while trying
to recover from a former error). Just give up. */
- rq->flags |= REQ_FAILED;
+ rq->cmd_flags |= REQ_FAILED;
cdrom_end_request(drive, 0);
ide_error(drive, "request sense failure", stat);
return 1;
- } else if (rq->flags & (REQ_PC | REQ_BLOCK_PC)) {
+ } else if (blk_pc_request(rq)) {
/* All other functions, except for READ. */
unsigned long flags;
* if we have an error, pass back CHECK_CONDITION as the
* scsi status byte
*/
- if ((rq->flags & REQ_BLOCK_PC) && !rq->errors)
+ if (!rq->errors)
rq->errors = SAM_STAT_CHECK_CONDITION;
/* Check for tray open. */
cdrom_saw_media_change (drive);
/*printk("%s: media changed\n",drive->name);*/
return 0;
- } else if (!(rq->flags & REQ_QUIET)) {
+ } else if (!(rq->cmd_flags & REQ_QUIET)) {
/* Otherwise, print an error. */
ide_dump_status(drive, "packet command error", stat);
}
- rq->flags |= REQ_FAILED;
+ rq->cmd_flags |= REQ_FAILED;
/*
* instead of playing games with moving completions around,
wait = ATAPI_WAIT_PC;
break;
default:
- if (!(rq->flags & REQ_QUIET))
+ if (!(rq->cmd_flags & REQ_QUIET))
printk(KERN_INFO "ide-cd: cmd 0x%x timed out\n", rq->cmd[0]);
wait = 0;
break;
if (rq->current_nr_sectors > 0) {
printk (KERN_ERR "%s: cdrom_read_intr: data underrun (%d blocks)\n",
drive->name, rq->current_nr_sectors);
- rq->flags |= REQ_FAILED;
+ rq->cmd_flags |= REQ_FAILED;
cdrom_end_request(drive, 0);
} else
cdrom_end_request(drive, 1);
printk ("%s: cdrom_pc_intr: data underrun %d\n",
drive->name, pc->buflen);
*/
- rq->flags |= REQ_FAILED;
+ rq->cmd_flags |= REQ_FAILED;
cdrom_end_request(drive, 0);
}
return ide_stopped;
rq->data += thislen;
rq->data_len -= thislen;
- if (rq->flags & REQ_SENSE)
+ if (blk_sense_request(rq))
rq->sense_len += thislen;
} else {
confused:
"appears confused (ireason = 0x%02x). "
"Trying to recover by ending request.\n",
drive->name, ireason);
- rq->flags |= REQ_FAILED;
+ rq->cmd_flags |= REQ_FAILED;
cdrom_end_request(drive, 0);
return ide_stopped;
}
struct cdrom_info *info = drive->driver_data;
info->dma = 0;
- rq->flags &= ~REQ_FAILED;
+ rq->cmd_flags &= ~REQ_FAILED;
len = rq->data_len;
/* Start sending the command to the drive. */
{
struct request_sense sense;
int retries = 10;
- unsigned int flags = rq->flags;
+ unsigned int flags = rq->cmd_flags;
if (rq->sense == NULL)
rq->sense = &sense;
do {
int error;
unsigned long time = jiffies;
- rq->flags = flags;
+ rq->cmd_flags = flags;
error = ide_do_drive_cmd(drive, rq, ide_wait);
time = jiffies - time;
/* FIXME: we should probably abort/retry or something
* in case of failure */
- if (rq->flags & REQ_FAILED) {
+ if (rq->cmd_flags & REQ_FAILED) {
/* The request failed. Retry if it was due to a unit
attention status
(usually means media was changed). */
}
/* End of retry loop. */
- } while ((rq->flags & REQ_FAILED) && retries >= 0);
+ } while ((rq->cmd_flags & REQ_FAILED) && retries >= 0);
/* Return an error if the command failed. */
- return (rq->flags & REQ_FAILED) ? -EIO : 0;
+ return (rq->cmd_flags & REQ_FAILED) ? -EIO : 0;
}
/*
{
struct cdrom_info *info = drive->driver_data;
- rq->flags |= REQ_QUIET;
+ rq->cmd_flags |= REQ_QUIET;
info->dma = 0;
}
info->last_block = block;
return action;
- } else if (rq->flags & (REQ_PC | REQ_SENSE)) {
+ } else if (rq->cmd_type == REQ_TYPE_SENSE) {
return cdrom_do_packet_command(drive);
- } else if (rq->flags & REQ_BLOCK_PC) {
+ } else if (blk_pc_request(rq)) {
return cdrom_do_block_pc(drive, rq);
- } else if (rq->flags & REQ_SPECIAL) {
+ } else if (blk_special_request(rq)) {
/*
* right now this can only be a reset...
*/
req.sense = sense;
req.cmd[0] = GPCMD_TEST_UNIT_READY;
- req.flags |= REQ_QUIET;
+ req.cmd_flags |= REQ_QUIET;
#if ! STANDARD_ATAPI
/* the Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to
req.cmd[0] = GPCMD_READ_CDVD_CAPACITY;
req.data = (char *)&capbuf;
req.data_len = sizeof(capbuf);
- req.flags |= REQ_QUIET;
+ req.cmd_flags |= REQ_QUIET;
stat = cdrom_queue_packet_command(drive, &req);
if (stat == 0) {
req.sense = sense;
req.data = buf;
req.data_len = buflen;
- req.flags |= REQ_QUIET;
+ req.cmd_flags |= REQ_QUIET;
req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
req.cmd[6] = trackno;
req.cmd[7] = (buflen >> 8);
req.timeout = cgc->timeout;
if (cgc->quiet)
- req.flags |= REQ_QUIET;
+ req.cmd_flags |= REQ_QUIET;
req.sense = cgc->sense;
cgc->stat = cdrom_queue_packet_command(drive, &req);
int ret;
cdrom_prepare_request(drive, &req);
- req.flags = REQ_SPECIAL | REQ_QUIET;
+ req.cmd_type = REQ_TYPE_SPECIAL;
+ req.cmd_flags = REQ_QUIET;
ret = ide_do_drive_cmd(drive, &req, ide_wait);
/*
static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq)
{
- if (rq->flags & REQ_CMD)
+ if (blk_fs_request(rq))
return ide_cdrom_prep_fs(q, rq);
- else if (rq->flags & REQ_BLOCK_PC)
+ else if (blk_pc_request(rq))
return ide_cdrom_prep_pc(rq);
return 0;
rq->cmd[0] = WIN_FLUSH_CACHE;
- rq->flags |= REQ_DRIVE_TASK;
+ rq->cmd_type = REQ_TYPE_ATA_TASK;
+ rq->cmd_flags |= REQ_SOFTBARRIER;
rq->buffer = rq->cmd;
}
if (drive->special.b.set_multmode)
return -EBUSY;
ide_init_drive_cmd (&rq);
- rq.flags = REQ_DRIVE_CMD;
+ rq.cmd_type = REQ_TYPE_ATA_CMD;
drive->mult_req = arg;
drive->special.b.set_multmode = 1;
(void) ide_do_drive_cmd (drive, &rq, ide_wait);
ide_hwif_t *hwif = HWIF(drive);
struct scatterlist *sg = hwif->sg_table;
- BUG_ON((rq->flags & REQ_DRIVE_TASKFILE) && rq->nr_sectors > 256);
+ BUG_ON((rq->cmd_type == REQ_TYPE_ATA_TASKFILE) && rq->nr_sectors > 256);
ide_map_sg(drive, rq);
/* Why does this happen? */
if (!rq)
return 0;
- if (!(rq->flags & REQ_SPECIAL)) { //if (!IDEFLOPPY_RQ_CMD (rq->cmd)) {
+ if (!blk_special_request(rq)) {
/* our real local end request function */
ide_end_request(drive, uptodate, nsecs);
return 0;
ide_init_drive_cmd(rq);
rq->buffer = (char *) pc;
- rq->flags = REQ_SPECIAL; //rq->cmd = IDEFLOPPY_PC_RQ;
+ rq->cmd_type = REQ_TYPE_SPECIAL;
rq->rq_disk = floppy->disk;
(void) ide_do_drive_cmd(drive, rq, ide_preempt);
}
pc->callback = &idefloppy_rw_callback;
pc->rq = rq;
pc->b_count = cmd == READ ? 0 : rq->bio->bi_size;
- if (rq->flags & REQ_RW)
+ if (rq->cmd_flags & REQ_RW)
set_bit(PC_WRITING, &pc->flags);
pc->buffer = NULL;
pc->request_transfer = pc->buffer_size = blocks * floppy->block_size;
idefloppy_pc_t *pc;
unsigned long block = (unsigned long)block_s;
- debug_log(KERN_INFO "rq_status: %d, dev: %s, flags: %lx, errors: %d\n",
- rq->rq_status,
+ debug_log(KERN_INFO "dev: %s, flags: %lx, errors: %d\n",
rq->rq_disk ? rq->rq_disk->disk_name : "?",
rq->flags, rq->errors);
debug_log(KERN_INFO "sector: %ld, nr_sectors: %ld, "
idefloppy_do_end_request(drive, 0, 0);
return ide_stopped;
}
- if (rq->flags & REQ_CMD) {
+ if (blk_fs_request(rq)) {
if (((long)rq->sector % floppy->bs_factor) ||
(rq->nr_sectors % floppy->bs_factor)) {
printk("%s: unsupported r/w request size\n",
}
pc = idefloppy_next_pc_storage(drive);
idefloppy_create_rw_cmd(floppy, pc, rq, block);
- } else if (rq->flags & REQ_SPECIAL) {
+ } else if (blk_special_request(rq)) {
pc = (idefloppy_pc_t *) rq->buffer;
- } else if (rq->flags & REQ_BLOCK_PC) {
+ } else if (blk_pc_request(rq)) {
pc = idefloppy_next_pc_storage(drive);
if (idefloppy_blockpc_cmd(floppy, pc, rq)) {
idefloppy_do_end_request(drive, 0, 0);
ide_init_drive_cmd (&rq);
rq.buffer = (char *) pc;
- rq.flags = REQ_SPECIAL; // rq.cmd = IDEFLOPPY_PC_RQ;
+ rq.cmd_type = REQ_TYPE_SPECIAL;
rq.rq_disk = floppy->disk;
return ide_do_drive_cmd(drive, &rq, ide_wait);
{
int ret = 1;
- BUG_ON(!(rq->flags & REQ_STARTED));
+ BUG_ON(!blk_rq_started(rq));
/*
* if failfast is set on a request, override number of sectors and
static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 stat, u8 error)
{
- struct request_pm_state *pm = rq->end_io_data;
+ struct request_pm_state *pm = rq->data;
if (drive->media != ide_disk)
return;
static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
{
- struct request_pm_state *pm = rq->end_io_data;
+ struct request_pm_state *pm = rq->data;
ide_task_t *args = rq->special;
memset(args, 0, sizeof(*args));
spin_lock_irqsave(&ide_lock, flags);
- BUG_ON(!(rq->flags & REQ_STARTED));
+ BUG_ON(!blk_rq_started(rq));
/*
* if failfast is set on a request, override number of sectors and
rq = HWGROUP(drive)->rq;
spin_unlock_irqrestore(&ide_lock, flags);
- if (rq->flags & REQ_DRIVE_CMD) {
+ if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
u8 *args = (u8 *) rq->buffer;
if (rq->errors == 0)
rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
args[1] = err;
args[2] = hwif->INB(IDE_NSECTOR_REG);
}
- } else if (rq->flags & REQ_DRIVE_TASK) {
+ } else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
u8 *args = (u8 *) rq->buffer;
if (rq->errors == 0)
rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
args[5] = hwif->INB(IDE_HCYL_REG);
args[6] = hwif->INB(IDE_SELECT_REG);
}
- } else if (rq->flags & REQ_DRIVE_TASKFILE) {
+ } else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
ide_task_t *args = (ide_task_t *) rq->special;
if (rq->errors == 0)
rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
}
}
} else if (blk_pm_request(rq)) {
- struct request_pm_state *pm = rq->end_io_data;
+ struct request_pm_state *pm = rq->data;
#ifdef DEBUG_PM
printk("%s: complete_power_step(step: %d, stat: %x, err: %x)\n",
drive->name, rq->pm->pm_step, stat, err);
return ide_stopped;
/* retry only "normal" I/O: */
- if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+ if (!blk_fs_request(rq)) {
rq->errors = 1;
ide_end_drive_cmd(drive, stat, err);
return ide_stopped;
return ide_stopped;
/* retry only "normal" I/O: */
- if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+ if (!blk_fs_request(rq)) {
rq->errors = 1;
ide_end_drive_cmd(drive, BUSY_STAT, 0);
return ide_stopped;
if (hwif->sg_mapped) /* needed by ide-scsi */
return;
- if ((rq->flags & REQ_DRIVE_TASKFILE) == 0) {
+ if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
} else {
sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
struct request *rq)
{
ide_hwif_t *hwif = HWIF(drive);
- if (rq->flags & REQ_DRIVE_TASKFILE) {
+ if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
ide_task_t *args = rq->special;
if (!args)
if (args->tf_out_flags.all != 0)
return flagged_taskfile(drive, args);
return do_rw_taskfile(drive, args);
- } else if (rq->flags & REQ_DRIVE_TASK) {
+ } else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
u8 *args = rq->buffer;
u8 sel;
hwif->OUTB(sel, IDE_SELECT_REG);
ide_cmd(drive, args[0], args[2], &drive_cmd_intr);
return ide_started;
- } else if (rq->flags & REQ_DRIVE_CMD) {
+ } else if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
u8 *args = rq->buffer;
if (!args)
static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
{
- struct request_pm_state *pm = rq->end_io_data;
+ struct request_pm_state *pm = rq->data;
if (blk_pm_suspend_request(rq) &&
pm->pm_step == ide_pm_state_start_suspend)
ide_startstop_t startstop;
sector_t block;
- BUG_ON(!(rq->flags & REQ_STARTED));
+ BUG_ON(!blk_rq_started(rq));
#ifdef DEBUG
printk("%s: start_request: current=0x%08lx\n",
if (!drive->special.all) {
ide_driver_t *drv;
- if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK))
- return execute_drive_cmd(drive, rq);
- else if (rq->flags & REQ_DRIVE_TASKFILE)
+ if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+ rq->cmd_type == REQ_TYPE_ATA_TASK ||
+ rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
return execute_drive_cmd(drive, rq);
else if (blk_pm_request(rq)) {
- struct request_pm_state *pm = rq->end_io_data;
+ struct request_pm_state *pm = rq->data;
#ifdef DEBUG_PM
printk("%s: start_power_step(step: %d)\n",
drive->name, rq->pm->pm_step);
* We count how many times we loop here to make sure we service
* all drives in the hwgroup without looping for ever
*/
- if (drive->blocked && !blk_pm_request(rq) && !(rq->flags & REQ_PREEMPT)) {
+ if (drive->blocked && !blk_pm_request(rq) && !(rq->cmd_flags & REQ_PREEMPT)) {
drive = drive->next ? drive->next : hwgroup->drive;
if (loops++ < 4 && !blk_queue_plugged(drive->queue))
goto again;
void ide_init_drive_cmd (struct request *rq)
{
memset(rq, 0, sizeof(*rq));
- rq->flags = REQ_DRIVE_CMD;
+ rq->cmd_type = REQ_TYPE_ATA_CMD;
rq->ref_count = 1;
}
int must_wait = (action == ide_wait || action == ide_head_wait);
rq->errors = 0;
- rq->rq_status = RQ_ACTIVE;
/*
* we need to hold an extra reference to request for safe inspection
*/
if (must_wait) {
rq->ref_count++;
- rq->waiting = &wait;
+ rq->end_io_data = &wait;
rq->end_io = blk_end_sync_rq;
}
hwgroup->rq = NULL;
if (action == ide_preempt || action == ide_head_wait) {
where = ELEVATOR_INSERT_FRONT;
- rq->flags |= REQ_PREEMPT;
+ rq->cmd_flags |= REQ_PREEMPT;
}
__elv_add_request(drive->queue, rq, where, 0);
ide_do_request(hwgroup, IDE_NO_IRQ);
err = 0;
if (must_wait) {
wait_for_completion(&wait);
- rq->waiting = NULL;
if (rq->errors)
err = -EIO;
spin_unlock(&ide_lock);
if (!rq)
return;
- if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) {
+ if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+ rq->cmd_type == REQ_TYPE_ATA_TASK) {
char *args = rq->buffer;
if (args) {
opcode = args[0];
found = 1;
}
- } else if (rq->flags & REQ_DRIVE_TASKFILE) {
+ } else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
ide_task_t *args = rq->special;
if (args) {
task_struct_t *tf = (task_struct_t *) args->tfRegister;
static void idetape_init_rq(struct request *rq, u8 cmd)
{
memset(rq, 0, sizeof(*rq));
- rq->flags = REQ_SPECIAL;
+ rq->cmd_type = REQ_TYPE_SPECIAL;
rq->cmd[0] = cmd;
}
#if IDETAPE_DEBUG_LOG
#if 0
if (tape->debug_level >= 5)
- printk(KERN_INFO "ide-tape: rq_status: %d, "
- "dev: %s, cmd: %ld, errors: %d\n", rq->rq_status,
+ printk(KERN_INFO "ide-tape: %d, "
+ "dev: %s, cmd: %ld, errors: %d\n",
rq->rq_disk->disk_name, rq->cmd[0], rq->errors);
#endif
if (tape->debug_level >= 2)
rq->sector, rq->nr_sectors, rq->current_nr_sectors);
#endif /* IDETAPE_DEBUG_LOG */
- if ((rq->flags & REQ_SPECIAL) == 0) {
+ if (!blk_special_request(rq)) {
/*
* We do not support buffer cache originated requests.
*/
printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
- "request queue (%ld)\n", drive->name, rq->flags);
+ "request queue (%d)\n", drive->name, rq->cmd_type);
ide_end_request(drive, 0, 0);
return ide_stopped;
}
idetape_tape_t *tape = drive->driver_data;
#if IDETAPE_DEBUG_BUGS
- if (rq == NULL || (rq->flags & REQ_SPECIAL) == 0) {
+ if (rq == NULL || !blk_special_request(rq)) {
printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n");
return;
}
#endif /* IDETAPE_DEBUG_BUGS */
- rq->waiting = &wait;
+ rq->end_io_data = &wait;
rq->end_io = blk_end_sync_rq;
spin_unlock_irq(&tape->spinlock);
wait_for_completion(&wait);
static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
{
- if (rq->flags & REQ_DRIVE_TASKFILE) {
+ if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
ide_task_t *task = rq->special;
if (task->tf_out_flags.all) {
struct request rq;
memset(&rq, 0, sizeof(rq));
- rq.flags = REQ_DRIVE_TASKFILE;
+ rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
rq.buffer = buf;
/*
rq.hard_cur_sectors = rq.current_nr_sectors = rq.nr_sectors;
if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE)
- rq.flags |= REQ_RW;
+ rq.cmd_flags |= REQ_RW;
}
rq.special = args;
struct request rq;
ide_init_drive_cmd(&rq);
- rq.flags = REQ_DRIVE_TASK;
+ rq.cmd_type = REQ_TYPE_ATA_TASK;
rq.buffer = buf;
return ide_do_drive_cmd(drive, &rq, ide_wait);
}
memset(&rq, 0, sizeof(rq));
memset(&rqpm, 0, sizeof(rqpm));
memset(&args, 0, sizeof(args));
- rq.flags = REQ_PM_SUSPEND;
+ rq.cmd_type = REQ_TYPE_PM_SUSPEND;
rq.special = &args;
- rq.end_io_data = &rqpm;
+ rq.data = &rqpm;
rqpm.pm_step = ide_pm_state_start_suspend;
if (mesg.event == PM_EVENT_PRETHAW)
mesg.event = PM_EVENT_FREEZE;
memset(&rq, 0, sizeof(rq));
memset(&rqpm, 0, sizeof(rqpm));
memset(&args, 0, sizeof(args));
- rq.flags = REQ_PM_RESUME;
+ rq.cmd_type = REQ_TYPE_PM_RESUME;
rq.special = &args;
- rq.end_io_data = &rqpm;
+ rq.data = &rqpm;
rqpm.pm_step = ide_pm_state_start_resume;
rqpm.pm_state = PM_EVENT_ON;
req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ",
cyl, head, sec, nsect, req->buffer);
#endif
- if (req->flags & REQ_CMD) {
+ if (blk_fs_request(req)) {
switch (rq_data_dir(req)) {
case READ:
hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr);
# Block device driver configuration
#
+if BLOCK
+
menu "Multi-device support (RAID and LVM)"
config MD
endmenu
+endif
memset(&rq->cmd, 0, BLK_MAX_CDB);
rq->timeout = EMC_FAILOVER_TIMEOUT;
- rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE);
+ rq->cmd_type = REQ_TYPE_BLOCK_PC;
+ rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
return rq;
}
config I2O_BLOCK
tristate "I2O Block OSM"
- depends on I2O
+ depends on I2O && BLOCK
---help---
Include support for the I2O Block OSM. The Block OSM presents disk
and other structured block devices to the operating system. If you
}
/* request is already processed by us, so return */
- if (req->flags & REQ_SPECIAL) {
+ if (blk_special_request(req)) {
osm_debug("REQ_SPECIAL already set!\n");
- req->flags |= REQ_DONTPREP;
+ req->cmd_flags |= REQ_DONTPREP;
return BLKPREP_OK;
}
ireq = req->special;
/* do not come back here */
- req->flags |= REQ_DONTPREP | REQ_SPECIAL;
+ req->cmd_type = REQ_TYPE_SPECIAL;
+ req->cmd_flags |= REQ_DONTPREP;
return BLKPREP_OK;
};
config MMC_BLOCK
tristate "MMC block device driver"
- depends on MMC
+ depends on MMC && BLOCK
default y
help
Say Y here to enable the MMC block device driver support.
obj-$(CONFIG_MMC_OMAP) += omap.o
obj-$(CONFIG_MMC_AT91RM9200) += at91_mci.o
-mmc_core-y := mmc.o mmc_queue.o mmc_sysfs.o
+mmc_core-y := mmc.o mmc_sysfs.o
+mmc_core-$(CONFIG_BLOCK) += mmc_queue.o
ifeq ($(CONFIG_MMC_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
struct mmc_queue *mq = q->queuedata;
int ret = BLKPREP_KILL;
- if (req->flags & REQ_SPECIAL) {
+ if (blk_special_request(req)) {
/*
* Special commands already have the command
* blocks already setup in req->special.
BUG_ON(!req->special);
ret = BLKPREP_OK;
- } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+ } else if (blk_fs_request(req) || blk_pc_request(req)) {
/*
* Block I/O requests need translating according
* to the protocol.
}
if (ret == BLKPREP_OK)
- req->flags |= REQ_DONTPREP;
+ req->cmd_flags |= REQ_DONTPREP;
return ret;
}
config MTD_BLOCK
tristate "Caching block device access to MTD devices"
- depends on MTD
+ depends on MTD && BLOCK
---help---
Although most flash chips have an erase size too large to be useful
as block devices, it is possible to use MTD devices which are based
config MTD_BLOCK_RO
tristate "Readonly block device access to MTD devices"
- depends on MTD_BLOCK!=y && MTD
+ depends on MTD_BLOCK!=y && MTD && BLOCK
help
This allows you to mount read-only file systems (such as cramfs)
from an MTD device, without the overhead (and danger) of the caching
config FTL
tristate "FTL (Flash Translation Layer) support"
- depends on MTD
+ depends on MTD && BLOCK
---help---
This provides support for the original Flash Translation Layer which
is part of the PCMCIA specification. It uses a kind of pseudo-
config NFTL
tristate "NFTL (NAND Flash Translation Layer) support"
- depends on MTD
+ depends on MTD && BLOCK
---help---
This provides support for the NAND Flash Translation Layer which is
used on M-Systems' DiskOnChip devices. It uses a kind of pseudo-
config INFTL
tristate "INFTL (Inverse NAND Flash Translation Layer) support"
- depends on MTD
+ depends on MTD && BLOCK
---help---
This provides support for the Inverse NAND Flash Translation
Layer which is used on M-Systems' newer DiskOnChip devices. It
config RFD_FTL
tristate "Resident Flash Disk (Flash Translation Layer) support"
- depends on MTD
+ depends on MTD && BLOCK
---help---
This provides support for the flash translation layer known
as the Resident Flash Disk (RFD), as used by the Embedded BIOS
config MTD_BLOCK2MTD
tristate "MTD using block device"
- depends on MTD
+ depends on MTD && BLOCK
help
This driver allows a block device to appear as an MTD. It would
generally be used in the following cases:
nsect = req->current_nr_sectors;
buf = req->buffer;
- if (!(req->flags & REQ_CMD))
+ if (!blk_fs_request(req))
return 0;
if (block + nsect > get_capacity(req->rq_disk))
-if S390
+if S390 && BLOCK
comment "S/390 block device drivers"
depends on S390
}
cqr->retries = DIAG_MAX_RETRIES;
cqr->buildclk = get_clock();
- if (req->flags & REQ_FAILFAST)
+ if (req->cmd_flags & REQ_FAILFAST)
set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
cqr->device = device;
cqr->expires = DIAG_TIMEOUT;
recid++;
}
}
- if (req->flags & REQ_FAILFAST)
+ if (req->cmd_flags & REQ_FAILFAST)
set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
cqr->device = device;
cqr->expires = 5 * 60 * HZ; /* 5 minutes */
recid++;
}
}
- if (req->flags & REQ_FAILFAST)
+ if (req->cmd_flags & REQ_FAILFAST)
set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
cqr->device = device;
cqr->expires = 5 * 60 * HZ; /* 5 minutes */
config RAID_ATTRS
tristate "RAID Transport Class"
default n
+ depends on BLOCK
---help---
Provides RAID
config SCSI
tristate "SCSI device support"
+ depends on BLOCK
---help---
If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or
any other SCSI device under Linux, say Y and make sure that you know
aic_dev->r_total++;
ptr = aic_dev->r_bins;
}
- if(cmd->device->simple_tags && cmd->request->flags & REQ_HARDBARRIER)
+ if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER)
{
aic_dev->barrier_total++;
if(scb->tag_action == MSG_ORDERED_Q_TAG)
/* We always force TEST_UNIT_READY to untagged */
if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags)
{
- if (req->flags & REQ_HARDBARRIER)
+ if (req->cmd_flags & REQ_HARDBARRIER)
{
if(sdptr->ordered_tags)
{
pc->buffer = buf;
pc->c[0] = REQUEST_SENSE;
pc->c[4] = pc->request_transfer = pc->buffer_size = SCSI_SENSE_BUFFERSIZE;
- rq->flags = REQ_SENSE;
+ rq->cmd_type = REQ_TYPE_SENSE;
pc->timeout = jiffies + WAIT_READY;
/* NOTE! Save the failed packet command in "rq->buffer" */
rq->buffer = (void *) failed_command->special;
int errors = rq->errors;
unsigned long flags;
- if (!(rq->flags & (REQ_SPECIAL|REQ_SENSE))) {
+ if (!blk_special_request(rq) && !blk_sense_request(rq)) {
ide_end_request(drive, uptodate, nrsecs);
return 0;
}
ide_end_drive_cmd (drive, 0, 0);
- if (rq->flags & REQ_SENSE) {
+ if (blk_sense_request(rq)) {
idescsi_pc_t *opc = (idescsi_pc_t *) rq->buffer;
if (log) {
printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number);
static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block)
{
#if IDESCSI_DEBUG_LOG
- printk (KERN_INFO "rq_status: %d, dev: %s, cmd: %x, errors: %d\n",rq->rq_status, rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
+ printk (KERN_INFO "dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
#endif /* IDESCSI_DEBUG_LOG */
- if (rq->flags & (REQ_SPECIAL|REQ_SENSE)) {
+ if (blk_sense_request(rq) || blk_special_request(rq)) {
return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special);
}
blk_dump_rq_flags(rq, "ide-scsi: unsup command");
ide_init_drive_cmd (rq);
rq->special = (char *) pc;
- rq->flags = REQ_SPECIAL;
+ rq->cmd_type = REQ_TYPE_SPECIAL;
spin_unlock_irq(host->host_lock);
rq->rq_disk = scsi->disk;
(void) ide_do_drive_cmd (drive, rq, ide_end);
*/
printk (KERN_ERR "ide-scsi: cmd aborted!\n");
- if (scsi->pc->rq->flags & REQ_SENSE)
+ if (blk_sense_request(scsi->pc->rq))
kfree(scsi->pc->buffer);
kfree(scsi->pc->rq);
kfree(scsi->pc);
/* kill current request */
blkdev_dequeue_request(req);
end_that_request_last(req, 0);
- if (req->flags & REQ_SENSE)
+ if (blk_sense_request(req))
kfree(scsi->pc->buffer);
kfree(scsi->pc);
scsi->pc = NULL;
static void __init pluto_detect_scsi_done(Scsi_Cmnd *SCpnt)
{
- SCpnt->request->rq_status = RQ_SCSI_DONE;
PLND(("Detect done %08lx\n", (long)SCpnt))
if (atomic_dec_and_test (&fcss))
up(&fc_sem);
SCpnt->cmd_len = COMMAND_SIZE(INQUIRY);
- SCpnt->request->rq_status = RQ_SCSI_BUSY;
+ SCpnt->request->cmd_flags &= ~REQ_STARTED;
SCpnt->done = pluto_detect_done;
SCpnt->request_bufflen = 256;
for (retry = 0; retry < 5; retry++) {
for (i = 0; i < fcscount; i++) {
if (!fcs[i].fc) break;
- if (fcs[i].cmd.request->rq_status != RQ_SCSI_DONE) {
+ if (!(fcs[i].cmd.request->cmd_flags & REQ_STARTED)) {
+ fcs[i].cmd.request->cmd_flags |= REQ_STARTED;
disable_irq(fcs[i].fc->irq);
PLND(("queuecommand %d %d\n", retry, i))
fcp_scsi_queuecommand (&(fcs[i].cmd),
return rtn;
}
-
-/*
- * Per-CPU I/O completion queue.
- */
-static DEFINE_PER_CPU(struct list_head, scsi_done_q);
-
/**
* scsi_req_abort_cmd -- Request command recovery for the specified command
* cmd: pointer to the SCSI command of interest
spin_lock_irqsave(&sdev->list_lock, flags);
list_for_each_entry(scmd, &sdev->cmd_list, list) {
- if (scmd->request && scmd->request->rq_status != RQ_INACTIVE) {
+ if (scmd->request) {
/*
* If we are unable to remove the timer, it means
* that the command has already timed out or
static int __init init_scsi(void)
{
- int error, i;
+ int error;
error = scsi_init_queue();
if (error)
if (error)
goto cleanup_sysctl;
- for_each_possible_cpu(i)
- INIT_LIST_HEAD(&per_cpu(scsi_done_q, i));
-
scsi_netlink_init();
printk(KERN_NOTICE "SCSI subsystem initialized\n");
{
struct scsi_cmnd *cmd = req->special;
- req->flags &= ~REQ_DONTPREP;
+ req->cmd_flags &= ~REQ_DONTPREP;
req->special = NULL;
scsi_put_command(cmd);
req->sense_len = 0;
req->retries = retries;
req->timeout = timeout;
- req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET;
+ req->cmd_type = REQ_TYPE_BLOCK_PC;
+ req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
/*
* head injection *required* here otherwise quiesce won't work
req = blk_get_request(sdev->request_queue, write, gfp);
if (!req)
goto free_sense;
- req->flags |= REQ_BLOCK_PC | REQ_QUIET;
+ req->cmd_type = REQ_TYPE_BLOCK_PC;
+ req->cmd_flags |= REQ_QUIET;
if (use_sg)
err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
break;
}
}
- if (!(req->flags & REQ_QUIET)) {
+ if (!(req->cmd_flags & REQ_QUIET)) {
scmd_printk(KERN_INFO, cmd,
"Device not ready: ");
scsi_print_sense_hdr("", &sshdr);
scsi_end_request(cmd, 0, this_count, 1);
return;
case VOLUME_OVERFLOW:
- if (!(req->flags & REQ_QUIET)) {
+ if (!(req->cmd_flags & REQ_QUIET)) {
scmd_printk(KERN_INFO, cmd,
"Volume overflow, CDB: ");
__scsi_print_command(cmd->cmnd);
return;
}
if (result) {
- if (!(req->flags & REQ_QUIET)) {
+ if (!(req->cmd_flags & REQ_QUIET)) {
scmd_printk(KERN_INFO, cmd,
"SCSI error: return code = 0x%08x\n",
result);
/*
* if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
*/
- if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
+ if (blk_pc_request(req) && !req->bio) {
cmd->request_bufflen = req->data_len;
cmd->request_buffer = req->data;
req->buffer = req->data;
* these two cases differently. We differentiate by looking
* at request->cmd, as this tells us the real story.
*/
- if (req->flags & REQ_SPECIAL && req->special) {
+ if (blk_special_request(req) && req->special)
cmd = req->special;
- } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
-
- if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) {
- if(specials_only == SDEV_QUIESCE ||
- specials_only == SDEV_BLOCK)
+ else if (blk_pc_request(req) || blk_fs_request(req)) {
+ if (unlikely(specials_only) && !(req->cmd_flags & REQ_PREEMPT)){
+ if (specials_only == SDEV_QUIESCE ||
+ specials_only == SDEV_BLOCK)
goto defer;
sdev_printk(KERN_ERR, sdev,
goto kill;
}
-
/*
* Now try and find a command block that we can use.
*/
* lock. We hope REQ_STARTED prevents anything untoward from
* happening now.
*/
- if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+ if (blk_fs_request(req) || blk_pc_request(req)) {
int ret;
/*
/*
* Initialize the actual SCSI command for this request.
*/
- if (req->flags & REQ_BLOCK_PC) {
+ if (blk_pc_request(req)) {
scsi_setup_blk_pc_cmnd(cmd);
} else if (req->rq_disk) {
struct scsi_driver *drv;
/*
* The request is now prepped, no need to come back here
*/
- req->flags |= REQ_DONTPREP;
+ req->cmd_flags |= REQ_DONTPREP;
return BLKPREP_OK;
defer:
if (unlikely(cmd == NULL)) {
printk(KERN_CRIT "impossible request in %s.\n"
"please mail a stack trace to "
- "linux-scsi@vger.kernel.org",
+ "linux-scsi@vger.kernel.org\n",
__FUNCTION__);
+ blk_dump_rq_flags(req, "foo");
BUG();
}
spin_lock(shost->host_lock);
SCpnt->cmnd[0] = READ_6;
SCpnt->sc_data_direction = DMA_FROM_DEVICE;
} else {
- printk(KERN_ERR "sd: Unknown command %lx\n", rq->flags);
-/* overkill panic("Unknown sd command %lx\n", rq->flags); */
+ printk(KERN_ERR "sd: Unknown command %x\n", rq->cmd_flags);
return 0;
}
static void sd_prepare_flush(request_queue_t *q, struct request *rq)
{
memset(rq->cmd, 0, sizeof(rq->cmd));
- rq->flags |= REQ_BLOCK_PC;
+ rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->timeout = SD_TIMEOUT;
rq->cmd[0] = SYNCHRONIZE_CACHE;
rq->cmd_len = 10;
if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done
!= cmd))
{
- if(cmd->request->flags & REQ_CMD) {
+ if(blk_fs_request(cmd->request)) {
sun3scsi_dma_setup(d, count,
rq_data_dir(cmd->request));
sun3_dma_setup_done = cmd;
static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
int write_flag)
{
- if(cmd->request->flags & REQ_CMD)
+ if(blk_fs_request(cmd->request))
return wanted;
else
return 0;
static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
int write_flag)
{
- if(cmd->request->flags & REQ_CMD)
+ if(blk_fs_request(cmd->request))
return wanted;
else
return 0;
config USB_STORAGE
tristate "USB Mass Storage support"
- depends on USB
- select SCSI
+ depends on USB && SCSI
---help---
Say Y here if you want to connect USB mass storage devices to your
computer's USB port. This is the driver you need for USB
similar devices. This driver may also be used for some cameras
and card readers.
- This option 'selects' (turns on, enables) 'SCSI', but you
+ This option depends on 'SCSI' support being enabled, but you
probably also need 'SCSI device support: SCSI disk support'
(BLK_DEV_SD) for most USB storage devices.
menu "File systems"
+if BLOCK
+
config EXT2_FS
tristate "Second extended fs support"
help
If you don't know whether you need it, then you don't need it:
answer N.
+endif
+
config INOTIFY
bool "Inotify file change notification support"
default y
If you want to develop a userspace FS, or if you want to use
a filesystem based on FUSE, answer Y or M.
+if BLOCK
menu "CD-ROM/DVD Filesystems"
config ISO9660_FS
depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y)
endmenu
+endif
+if BLOCK
menu "DOS/FAT/NT Filesystems"
config FAT_FS
It is perfectly safe to say N here.
endmenu
+endif
menu "Pseudo filesystems"
config ADFS_FS
tristate "ADFS file system support (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on BLOCK && EXPERIMENTAL
help
The Acorn Disc Filing System is the standard file system of the
RiscOS operating system which runs on Acorn's ARM-based Risc PC
config AFFS_FS
tristate "Amiga FFS file system support (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on BLOCK && EXPERIMENTAL
help
The Fast File System (FFS) is the common file system used on hard
disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y
config HFS_FS
tristate "Apple Macintosh file system support (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on BLOCK && EXPERIMENTAL
select NLS
help
If you say Y here, you will be able to mount Macintosh-formatted
config HFSPLUS_FS
tristate "Apple Extended HFS file system support"
+ depends on BLOCK
select NLS
select NLS_UTF8
help
config BEFS_FS
tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on BLOCK && EXPERIMENTAL
select NLS
help
The BeOS File System (BeFS) is the native file system of Be, Inc's
config BFS_FS
tristate "BFS file system support (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on BLOCK && EXPERIMENTAL
help
Boot File System (BFS) is a file system used under SCO UnixWare to
allow the bootloader access to the kernel image and other important
config EFS_FS
tristate "EFS file system support (read only) (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on BLOCK && EXPERIMENTAL
help
EFS is an older file system used for non-ISO9660 CD-ROMs and hard
disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
config JFFS_FS
tristate "Journalling Flash File System (JFFS) support"
- depends on MTD
+ depends on MTD && BLOCK
help
JFFS is the Journaling Flash File System developed by Axis
Communications in Sweden, aimed at providing a crash/powerdown-safe
config CRAMFS
tristate "Compressed ROM file system support (cramfs)"
+ depends on BLOCK
select ZLIB_INFLATE
help
Saying Y here includes support for CramFs (Compressed ROM File
config VXFS_FS
tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
+ depends on BLOCK
help
FreeVxFS is a file system driver that support the VERITAS VxFS(TM)
file system format. VERITAS VxFS(TM) is the standard file system
config HPFS_FS
tristate "OS/2 HPFS file system support"
+ depends on BLOCK
help
OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
is the file system used for organizing files on OS/2 hard disk
config QNX4FS_FS
tristate "QNX4 file system support (read only)"
+ depends on BLOCK
help
This is the file system used by the real-time operating systems
QNX 4 and QNX 6 (the latter is also called QNX RTP).
config SYSV_FS
tristate "System V/Xenix/V7/Coherent file system support"
+ depends on BLOCK
help
SCO, Xenix and Coherent are commercial Unix systems for Intel
machines, and Version 7 was used on the DEC PDP-11. Saying Y
config UFS_FS
tristate "UFS file system support (read only)"
+ depends on BLOCK
help
BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
OpenBSD and NeXTstep) use a file system called UFS. Some System V
endmenu
+if BLOCK
menu "Partition Types"
source "fs/partitions/Kconfig"
endmenu
+endif
source "fs/nls/Kconfig"
# Rewritten to use lists instead of if-statements.
#
-obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
- block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
+obj-y := open.o read_write.o file_table.o super.o \
+ char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
- seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
- ioprio.o pnode.o drop_caches.o splice.o sync.o
+ seq_file.o xattr.o libfs.o fs-writeback.o \
+ pnode.o drop_caches.o splice.o sync.o
+
+ifeq ($(CONFIG_BLOCK),y)
+obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
+else
+obj-y += no-block.o
+endif
obj-$(CONFIG_INOTIFY) += inotify.o
obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
-#include <linux/buffer_head.h>
#include "volume.h"
#include "vnode.h"
#include <rxrpc/call.h>
const struct address_space_operations afs_fs_aops = {
.readpage = afs_file_readpage,
- .sync_page = block_sync_page,
.set_page_dirty = __set_page_dirty_nobuffers,
.releasepage = afs_file_releasepage,
.invalidatepage = afs_file_invalidatepage,
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
static int load_elf_library(struct file *);
static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
-extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
#ifndef elf_addr_t
#define elf_addr_t unsigned long
/*
- * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
struct biovec_slab *bp = bvec_slabs + i;
mempool_t **bvp = bs->bvec_pools + i;
- if (i >= scale)
+ if (pool_entries > 1 && i >= scale)
pool_entries >>= 1;
*bvp = mempool_create_slab_pool(pool_entries, bp->slab);
#include <linux/module.h>
#include <linux/blkpg.h>
#include <linux/buffer_head.h>
+#include <linux/writeback.h>
#include <linux/mpage.h>
#include <linux/mount.h>
#include <linux/uio.h>
#include <linux/namei.h>
#include <asm/uaccess.h>
+#include "internal.h"
struct bdev_inode {
struct block_device bdev;
}
EXPORT_SYMBOL(close_bdev_excl);
+
+int __invalidate_device(struct block_device *bdev)
+{
+ struct super_block *sb = get_super(bdev);
+ int res = 0;
+
+ if (sb) {
+ /*
+ * no need to lock the super, get_super holds the
+ * read mutex so the filesystem cannot go away
+ * under us (->put_super runs with the write lock
+ * hold).
+ */
+ shrink_dcache_sb(sb);
+ res = invalidate_inodes(sb);
+ drop_super(sb);
+ }
+ invalidate_bdev(bdev, 0);
+ return res;
+}
+EXPORT_SYMBOL(__invalidate_device);
}
EXPORT_SYMBOL(sync_blockdev);
-static void __fsync_super(struct super_block *sb)
-{
- sync_inodes_sb(sb, 0);
- DQUOT_SYNC(sb);
- lock_super(sb);
- if (sb->s_dirt && sb->s_op->write_super)
- sb->s_op->write_super(sb);
- unlock_super(sb);
- if (sb->s_op->sync_fs)
- sb->s_op->sync_fs(sb, 1);
- sync_blockdev(sb->s_bdev);
- sync_inodes_sb(sb, 1);
-}
-
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock. Filesystem data as well as the underlying block
- * device. Takes the superblock lock.
- */
-int fsync_super(struct super_block *sb)
-{
- __fsync_super(sb);
- return sync_blockdev(sb->s_bdev);
-}
-
/*
* Write out and wait upon all dirty data associated with this
* device. Filesystem data as well as the underlying block
}
EXPORT_SYMBOL(thaw_bdev);
-/*
- * sync everything. Start out by waking pdflush, because that writes back
- * all queues in parallel.
- */
-static void do_sync(unsigned long wait)
-{
- wakeup_pdflush(0);
- sync_inodes(0); /* All mappings, inodes and their blockdevs */
- DQUOT_SYNC(NULL);
- sync_supers(); /* Write the superblocks */
- sync_filesystems(0); /* Start syncing the filesystems */
- sync_filesystems(wait); /* Waitingly sync the filesystems */
- sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */
- if (!wait)
- printk("Emergency Sync complete\n");
- if (unlikely(laptop_mode))
- laptop_sync_completion();
-}
-
-asmlinkage long sys_sync(void)
-{
- do_sync(1);
- return 0;
-}
-
-void emergency_sync(void)
-{
- pdflush_operation(do_sync, 0);
-}
-
-/*
- * Generic function to fsync a file.
- *
- * filp may be NULL if called via the msync of a vma.
- */
-
-int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
-{
- struct inode * inode = dentry->d_inode;
- struct super_block * sb;
- int ret, err;
-
- /* sync the inode to buffers */
- ret = write_inode_now(inode, 0);
-
- /* sync the superblock to buffers */
- sb = inode->i_sb;
- lock_super(sb);
- if (sb->s_op->write_super)
- sb->s_op->write_super(sb);
- unlock_super(sb);
-
- /* .. finally sync the buffers to disk */
- err = sync_blockdev(sb->s_bdev);
- if (!ret)
- ret = err;
- return ret;
-}
-
-long do_fsync(struct file *file, int datasync)
-{
- int ret;
- int err;
- struct address_space *mapping = file->f_mapping;
-
- if (!file->f_op || !file->f_op->fsync) {
- /* Why? We can still call filemap_fdatawrite */
- ret = -EINVAL;
- goto out;
- }
-
- ret = filemap_fdatawrite(mapping);
-
- /*
- * We need to protect against concurrent writers, which could cause
- * livelocks in fsync_buffers_list().
- */
- mutex_lock(&mapping->host->i_mutex);
- err = file->f_op->fsync(file, file->f_dentry, datasync);
- if (!ret)
- ret = err;
- mutex_unlock(&mapping->host->i_mutex);
- err = filemap_fdatawait(mapping);
- if (!ret)
- ret = err;
-out:
- return ret;
-}
-
-static long __do_fsync(unsigned int fd, int datasync)
-{
- struct file *file;
- int ret = -EBADF;
-
- file = fget(fd);
- if (file) {
- ret = do_fsync(file, datasync);
- fput(file);
- }
- return ret;
-}
-
-asmlinkage long sys_fsync(unsigned int fd)
-{
- return __do_fsync(fd, 0);
-}
-
-asmlinkage long sys_fdatasync(unsigned int fd)
-{
- return __do_fsync(fd, 1);
-}
-
/*
* Various filesystems appear to want __find_get_block to be non-blocking.
* But it's the page lock which protects the buffers. To get around this,
unlock_buffer(bh);
}
-/**
- * try_to_release_page() - release old fs-specific metadata on a page
- *
- * @page: the page which the kernel is trying to free
- * @gfp_mask: memory allocation flags (and I/O mode)
- *
- * The address_space is to try to release any data against the page
- * (presumably at page->private). If the release was successful, return `1'.
- * Otherwise return zero.
- *
- * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
- *
- * NOTE: @gfp_mask may go away, and this function may become non-blocking.
- */
-int try_to_release_page(struct page *page, gfp_t gfp_mask)
-{
- struct address_space * const mapping = page->mapping;
-
- BUG_ON(!PageLocked(page));
- if (PageWriteback(page))
- return 0;
-
- if (mapping && mapping->a_ops->releasepage)
- return mapping->a_ops->releasepage(page, gfp_mask);
- return try_to_free_buffers(page);
-}
-EXPORT_SYMBOL(try_to_release_page);
-
/**
* block_invalidatepage - invalidate part of all of a buffer-backed page
*
}
EXPORT_SYMBOL(block_invalidatepage);
-void do_invalidatepage(struct page *page, unsigned long offset)
-{
- void (*invalidatepage)(struct page *, unsigned long);
- invalidatepage = page->mapping->a_ops->invalidatepage ? :
- block_invalidatepage;
- (*invalidatepage)(page, offset);
-}
-
/*
* We attach and possibly dirty the buffers atomically wrt
* __set_page_dirty_buffers() via private_lock. try_to_free_buffers
#ifdef CONFIG_KMOD
#include <linux/kmod.h>
#endif
+#include "internal.h"
/*
* capabilities for /dev/mem, /dev/kmem and similar directly mappable character
#include <linux/backing-dev.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
-#include <linux/mpage.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
#include <linux/smp_lock.h>
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
-#include <linux/buffer_head.h>
#include <linux/stat.h>
#include <linux/pagemap.h>
#include <asm/div64.h>
*/
#include <linux/fs.h>
-#include <linux/ext2_fs.h>
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifsproto.h"
}
break;
#ifdef CONFIG_CIFS_POSIX
- case EXT2_IOC_GETFLAGS:
+ case FS_IOC_GETFLAGS:
if(CIFS_UNIX_EXTATTR_CAP & caps) {
if (pSMBFile == NULL)
break;
&ExtAttrBits, &ExtAttrMask);
if(rc == 0)
rc = put_user(ExtAttrBits &
- EXT2_FL_USER_VISIBLE,
+ FS_FL_USER_VISIBLE,
(int __user *)arg);
}
break;
- case EXT2_IOC_SETFLAGS:
+ case FS_IOC_SETFLAGS:
if(CIFS_UNIX_EXTATTR_CAP & caps) {
if(get_user(ExtAttrBits,(int __user *)arg)) {
rc = -EFAULT;
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/ioctls.h>
-
-extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+#include "internal.h"
int compat_log = 1;
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
int compat_printk(const char *fmt, ...)
{
va_list ap;
#define IOCTL_HASHSIZE 256
static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE];
-extern struct ioctl_trans ioctl_start[];
-extern int ioctl_table_size;
-
static inline unsigned long ioctl32_hash(unsigned long cmd)
{
return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE;
return 0;
}
-extern int copy_mount_options (const void __user *, unsigned long *);
-
#define SMBFS_NAME "smbfs"
#define NCPFS_NAME "ncpfs"
#define NFS4_NAME "nfs4"
#include <linux/if_pppox.h>
#include <linux/mtio.h>
#include <linux/cdrom.h>
-#include <linux/loop.h>
#include <linux/auto_fs.h>
#include <linux/auto_fs4.h>
#include <linux/tty.h>
#include <linux/vt_kern.h>
#include <linux/fb.h>
-#include <linux/ext2_fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
#include <linux/videodev.h>
#include <linux/netdevice.h>
#include <linux/raw.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/serial.h>
-#include <linux/reiserfs_fs.h>
#include <linux/if_tun.h>
#include <linux/ctype.h>
#include <linux/ioctl32.h>
#include <linux/nbd.h>
#include <linux/random.h>
#include <linux/filter.h>
-#include <linux/msdos_fs.h>
#include <linux/pktcdvd.h>
#include <linux/hiddev.h>
#include <linux/dvb/video.h>
#include <linux/lp.h>
-/* Aiee. Someone does not find a difference between int and long */
-#define EXT2_IOC32_GETFLAGS _IOR('f', 1, int)
-#define EXT2_IOC32_SETFLAGS _IOW('f', 2, int)
-#define EXT3_IOC32_GETVERSION _IOR('f', 3, int)
-#define EXT3_IOC32_SETVERSION _IOW('f', 4, int)
-#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int)
-#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int)
-#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
-#ifdef CONFIG_JBD_DEBUG
-#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
-#endif
-
-#define EXT2_IOC32_GETVERSION _IOR('v', 1, int)
-#define EXT2_IOC32_SETVERSION _IOW('v', 2, int)
-
static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
unsigned long arg, struct file *f)
{
return err;
}
-static int do_ext2_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- /* These are just misnamed, they actually get/put from/to user an int */
- switch (cmd) {
- case EXT2_IOC32_GETFLAGS: cmd = EXT2_IOC_GETFLAGS; break;
- case EXT2_IOC32_SETFLAGS: cmd = EXT2_IOC_SETFLAGS; break;
- case EXT2_IOC32_GETVERSION: cmd = EXT2_IOC_GETVERSION; break;
- case EXT2_IOC32_SETVERSION: cmd = EXT2_IOC_SETVERSION; break;
- }
- return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
-}
-
-static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- /* These are just misnamed, they actually get/put from/to user an int */
- switch (cmd) {
- case EXT3_IOC32_GETVERSION: cmd = EXT3_IOC_GETVERSION; break;
- case EXT3_IOC32_SETVERSION: cmd = EXT3_IOC_SETVERSION; break;
- case EXT3_IOC32_GETRSVSZ: cmd = EXT3_IOC_GETRSVSZ; break;
- case EXT3_IOC32_SETRSVSZ: cmd = EXT3_IOC_SETRSVSZ; break;
- case EXT3_IOC32_GROUP_EXTEND: cmd = EXT3_IOC_GROUP_EXTEND; break;
-#ifdef CONFIG_JBD_DEBUG
- case EXT3_IOC32_WAIT_FOR_READONLY: cmd = EXT3_IOC_WAIT_FOR_READONLY; break;
-#endif
- }
- return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
-}
-
struct compat_video_event {
int32_t type;
compat_time_t timestamp;
}
#endif
+#ifdef CONFIG_BLOCK
struct hd_geometry32 {
unsigned char heads;
unsigned char sectors;
}
return err;
}
+#endif /* CONFIG_BLOCK */
struct sock_fprog32 {
unsigned short len;
}
+#ifdef CONFIG_BLOCK
struct mtget32 {
compat_long_t mt_type;
compat_long_t mt_resid;
return err;
}
-
-struct loop_info32 {
- compat_int_t lo_number; /* ioctl r/o */
- compat_dev_t lo_device; /* ioctl r/o */
- compat_ulong_t lo_inode; /* ioctl r/o */
- compat_dev_t lo_rdevice; /* ioctl r/o */
- compat_int_t lo_offset;
- compat_int_t lo_encrypt_type;
- compat_int_t lo_encrypt_key_size; /* ioctl w/o */
- compat_int_t lo_flags; /* ioctl r/o */
- char lo_name[LO_NAME_SIZE];
- unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
- compat_ulong_t lo_init[2];
- char reserved[4];
-};
-
-static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- mm_segment_t old_fs = get_fs();
- struct loop_info l;
- struct loop_info32 __user *ul;
- int err = -EINVAL;
-
- ul = compat_ptr(arg);
- switch(cmd) {
- case LOOP_SET_STATUS:
- err = get_user(l.lo_number, &ul->lo_number);
- err |= __get_user(l.lo_device, &ul->lo_device);
- err |= __get_user(l.lo_inode, &ul->lo_inode);
- err |= __get_user(l.lo_rdevice, &ul->lo_rdevice);
- err |= __copy_from_user(&l.lo_offset, &ul->lo_offset,
- 8 + (unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
- if (err) {
- err = -EFAULT;
- } else {
- set_fs (KERNEL_DS);
- err = sys_ioctl (fd, cmd, (unsigned long)&l);
- set_fs (old_fs);
- }
- break;
- case LOOP_GET_STATUS:
- set_fs (KERNEL_DS);
- err = sys_ioctl (fd, cmd, (unsigned long)&l);
- set_fs (old_fs);
- if (!err) {
- err = put_user(l.lo_number, &ul->lo_number);
- err |= __put_user(l.lo_device, &ul->lo_device);
- err |= __put_user(l.lo_inode, &ul->lo_inode);
- err |= __put_user(l.lo_rdevice, &ul->lo_rdevice);
- err |= __copy_to_user(&ul->lo_offset, &l.lo_offset,
- (unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
- if (err)
- err = -EFAULT;
- }
- break;
- default: {
- static int count;
- if (++count <= 20)
- printk("%s: Unknown loop ioctl cmd, fd(%d) "
- "cmd(%08x) arg(%08lx)\n",
- __FUNCTION__, fd, cmd, arg);
- }
- }
- return err;
-}
-
-extern int tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg);
+#endif /* CONFIG_BLOCK */
#ifdef CONFIG_VT
return -EINVAL;
}
+#ifdef CONFIG_BLOCK
static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg)
{
/* The mkswap binary hard codes it to Intel value :-((( */
return sys_ioctl(fd, cmd, (unsigned long)a);
}
+#endif
static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
{
return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
}
+#ifdef CONFIG_BLOCK
/* Fix sizeof(sizeof()) breakage */
#define BLKBSZGET_32 _IOR(0x12,112,int)
#define BLKBSZSET_32 _IOW(0x12,113,int)
{
return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg));
}
+#endif
/* Bluetooth ioctls */
#define HCIUARTSETPROTO _IOW('U', 200, int)
#define HIDPGETCONNLIST _IOR('H', 210, int)
#define HIDPGETCONNINFO _IOR('H', 211, int)
+#ifdef CONFIG_BLOCK
struct floppy_struct32 {
compat_uint_t size;
compat_uint_t sect;
kfree(karg);
return err;
}
+#endif
struct mtd_oob_buf32 {
u_int32_t start;
return err;
}
-#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2])
-#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2])
-
-static long
-put_dirent32 (struct dirent *d, struct compat_dirent __user *d32)
-{
- if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent)))
- return -EFAULT;
-
- __put_user(d->d_ino, &d32->d_ino);
- __put_user(d->d_off, &d32->d_off);
- __put_user(d->d_reclen, &d32->d_reclen);
- if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen))
- return -EFAULT;
-
- return 0;
-}
-
-static int vfat_ioctl32(unsigned fd, unsigned cmd, unsigned long arg)
-{
- struct compat_dirent __user *p = compat_ptr(arg);
- int ret;
- mm_segment_t oldfs = get_fs();
- struct dirent d[2];
-
- switch(cmd)
- {
- case VFAT_IOCTL_READDIR_BOTH32:
- cmd = VFAT_IOCTL_READDIR_BOTH;
- break;
- case VFAT_IOCTL_READDIR_SHORT32:
- cmd = VFAT_IOCTL_READDIR_SHORT;
- break;
- }
-
- set_fs(KERNEL_DS);
- ret = sys_ioctl(fd,cmd,(unsigned long)&d);
- set_fs(oldfs);
- if (ret >= 0) {
- ret |= put_dirent32(&d[0], p);
- ret |= put_dirent32(&d[1], p + 1);
- }
- return ret;
-}
-
-#define REISERFS_IOC_UNPACK32 _IOW(0xCD,1,int)
-
-static int reiserfs_ioctl32(unsigned fd, unsigned cmd, unsigned long ptr)
-{
- if (cmd == REISERFS_IOC_UNPACK32)
- cmd = REISERFS_IOC_UNPACK;
-
- return sys_ioctl(fd,cmd,ptr);
-}
-
+#ifdef CONFIG_BLOCK
struct raw32_config_request
{
compat_int_t raw_minor;
}
return ret;
}
+#endif /* CONFIG_BLOCK */
struct serial_struct32 {
compat_int_t type;
HANDLE_IOCTL(SIOCRTMSG, ret_einval)
HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
#endif
+#ifdef CONFIG_BLOCK
HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
HANDLE_IOCTL(BLKRAGET, w_long)
HANDLE_IOCTL(BLKGETSIZE, w_long)
HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
+#endif
HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
+#ifdef CONFIG_BLOCK
HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
-HANDLE_IOCTL(LOOP_SET_STATUS, loop_status)
-HANDLE_IOCTL(LOOP_GET_STATUS, loop_status)
+#endif
#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
#ifdef CONFIG_VT
HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl)
HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl)
#endif
-HANDLE_IOCTL(EXT2_IOC32_GETFLAGS, do_ext2_ioctl)
-HANDLE_IOCTL(EXT2_IOC32_SETFLAGS, do_ext2_ioctl)
-HANDLE_IOCTL(EXT2_IOC32_GETVERSION, do_ext2_ioctl)
-HANDLE_IOCTL(EXT2_IOC32_SETVERSION, do_ext2_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_GETVERSION, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_SETVERSION, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_GETRSVSZ, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_SETRSVSZ, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_GROUP_EXTEND, do_ext3_ioctl)
-COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD)
-#ifdef CONFIG_JBD_DEBUG
-HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl)
-#endif
/* One SMB ioctl needs translations. */
#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
/* block stuff */
+#ifdef CONFIG_BLOCK
HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64)
-/* vfat */
-HANDLE_IOCTL(VFAT_IOCTL_READDIR_BOTH32, vfat_ioctl32)
-HANDLE_IOCTL(VFAT_IOCTL_READDIR_SHORT32, vfat_ioctl32)
-HANDLE_IOCTL(REISERFS_IOC_UNPACK32, reiserfs_ioctl32)
/* Raw devices */
HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
+#endif
/* Serial */
HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
#include <linux/seqlock.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
+#include "internal.h"
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL(d_genocide);
-extern void bdev_cache_init(void);
-extern void chrdev_init(void);
-
void __init vfs_caches_init_early(void)
{
dcache_init_early();
.read = generic_read_dir,
.readdir = ext2_readdir,
.ioctl = ext2_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ext2_compat_ioctl,
+#endif
.fsync = ext2_sync_file,
};
/* ioctl.c */
extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
unsigned long);
+extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long);
/* namei.c */
struct dentry *ext2_get_parent(struct dentry *child);
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.ioctl = ext2_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ext2_compat_ioctl,
+#endif
.mmap = generic_file_mmap,
.open = generic_file_open,
.release = ext2_release_file,
.read = xip_file_read,
.write = xip_file_write,
.ioctl = ext2_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ext2_compat_ioctl,
+#endif
.mmap = xip_file_mmap,
.open = generic_file_open,
.release = ext2_release_file,
#include <linux/capability.h>
#include <linux/time.h>
#include <linux/sched.h>
+#include <linux/compat.h>
+#include <linux/smp_lock.h>
#include <asm/current.h>
#include <asm/uaccess.h>
return -ENOTTY;
}
}
+
+#ifdef CONFIG_COMPAT
+long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ int ret;
+
+ /* These are just misnamed, they actually get/put from/to user an int */
+ switch (cmd) {
+ case EXT2_IOC32_GETFLAGS:
+ cmd = EXT2_IOC_GETFLAGS;
+ break;
+ case EXT2_IOC32_SETFLAGS:
+ cmd = EXT2_IOC_SETFLAGS;
+ break;
+ case EXT2_IOC32_GETVERSION:
+ cmd = EXT2_IOC_GETVERSION;
+ break;
+ case EXT2_IOC32_SETVERSION:
+ cmd = EXT2_IOC_SETVERSION;
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+ lock_kernel();
+ ret = ext2_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+ unlock_kernel();
+ return ret;
+}
+#endif
.read = generic_read_dir,
.readdir = ext3_readdir, /* we take BKL. needed?*/
.ioctl = ext3_ioctl, /* BKL held */
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ext3_compat_ioctl,
+#endif
.fsync = ext3_sync_file, /* BKL held */
#ifdef CONFIG_EXT3_INDEX
.release = ext3_release_dir,
.readv = generic_file_readv,
.writev = generic_file_writev,
.ioctl = ext3_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ext3_compat_ioctl,
+#endif
.mmap = generic_file_mmap,
.open = generic_file_open,
.release = ext3_release_file,
#include <linux/writeback.h>
#include <linux/mpage.h>
#include <linux/uio.h>
+#include <linux/bio.h>
#include "xattr.h"
#include "acl.h"
return bh;
if (buffer_uptodate(bh))
return bh;
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(READ_META, 1, &bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
*/
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(READ, bh);
+ submit_bh(READ_META, bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
ext3_error(inode->i_sb, "ext3_get_inode_loc",
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
#include <linux/time.h>
+#include <linux/compat.h>
+#include <linux/smp_lock.h>
#include <asm/uaccess.h>
-
int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
unsigned long arg)
{
return -ENOTTY;
}
}
+
+#ifdef CONFIG_COMPAT
+long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ int ret;
+
+ /* These are just misnamed, they actually get/put from/to user an int */
+ switch (cmd) {
+ case EXT3_IOC32_GETFLAGS:
+ cmd = EXT3_IOC_GETFLAGS;
+ break;
+ case EXT3_IOC32_SETFLAGS:
+ cmd = EXT3_IOC_SETFLAGS;
+ break;
+ case EXT3_IOC32_GETVERSION:
+ cmd = EXT3_IOC_GETVERSION;
+ break;
+ case EXT3_IOC32_SETVERSION:
+ cmd = EXT3_IOC_SETVERSION;
+ break;
+ case EXT3_IOC32_GROUP_EXTEND:
+ cmd = EXT3_IOC_GROUP_EXTEND;
+ break;
+ case EXT3_IOC32_GETVERSION_OLD:
+ cmd = EXT3_IOC_GETVERSION_OLD;
+ break;
+ case EXT3_IOC32_SETVERSION_OLD:
+ cmd = EXT3_IOC_SETVERSION_OLD;
+ break;
+#ifdef CONFIG_JBD_DEBUG
+ case EXT3_IOC32_WAIT_FOR_READONLY:
+ cmd = EXT3_IOC_WAIT_FOR_READONLY;
+ break;
+#endif
+ case EXT3_IOC32_GETRSVSZ:
+ cmd = EXT3_IOC_GETRSVSZ;
+ break;
+ case EXT3_IOC32_SETRSVSZ:
+ cmd = EXT3_IOC_SETRSVSZ;
+ break;
+ case EXT3_IOC_GROUP_ADD:
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+ lock_kernel();
+ ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+ unlock_kernel();
+ return ret;
+}
+#endif
#include <linux/string.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
+#include <linux/bio.h>
#include <linux/smp_lock.h>
#include "namei.h"
bh = ext3_getblk(NULL, dir, b++, 0, &err);
bh_use[ra_max] = bh;
if (bh)
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(READ_META, 1, &bh);
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
#include <linux/dirent.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
+#include <linux/compat.h>
#include <asm/uaccess.h>
static inline loff_t fat_make_i_pos(struct super_block *sb,
return ret;
}
+#ifdef CONFIG_COMPAT
+#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2])
+#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2])
+
+static long fat_compat_put_dirent32(struct dirent *d,
+ struct compat_dirent __user *d32)
+{
+ if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent)))
+ return -EFAULT;
+
+ __put_user(d->d_ino, &d32->d_ino);
+ __put_user(d->d_off, &d32->d_off);
+ __put_user(d->d_reclen, &d32->d_reclen);
+ if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen))
+ return -EFAULT;
+
+ return 0;
+}
+
+static long fat_compat_dir_ioctl(struct file *file, unsigned cmd,
+ unsigned long arg)
+{
+ struct compat_dirent __user *p = compat_ptr(arg);
+ int ret;
+ mm_segment_t oldfs = get_fs();
+ struct dirent d[2];
+
+ switch (cmd) {
+ case VFAT_IOCTL_READDIR_BOTH32:
+ cmd = VFAT_IOCTL_READDIR_BOTH;
+ break;
+ case VFAT_IOCTL_READDIR_SHORT32:
+ cmd = VFAT_IOCTL_READDIR_SHORT;
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ set_fs(KERNEL_DS);
+ lock_kernel();
+ ret = fat_dir_ioctl(file->f_dentry->d_inode, file,
+ cmd, (unsigned long) &d);
+ unlock_kernel();
+ set_fs(oldfs);
+ if (ret >= 0) {
+ ret |= fat_compat_put_dirent32(&d[0], p);
+ ret |= fat_compat_put_dirent32(&d[1], p + 1);
+ }
+ return ret;
+}
+#endif /* CONFIG_COMPAT */
+
const struct file_operations fat_dir_operations = {
.read = generic_read_dir,
.readdir = fat_readdir,
.ioctl = fat_dir_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = fat_compat_dir_ioctl,
+#endif
.fsync = file_fsync,
};
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
-
-extern struct super_block *blockdev_superblock;
+#include "internal.h"
/**
* __mark_inode_dirty - internal function
if (!bdi_cap_writeback_dirty(bdi)) {
list_move(&inode->i_list, &sb->s_dirty);
- if (sb == blockdev_superblock) {
+ if (sb_is_blkdev_sb(sb)) {
/*
* Dirty memory-backed blockdev: the ramdisk
* driver does this. Skip just this inode
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
- if (sb != blockdev_superblock)
+ if (!sb_is_blkdev_sb(sb))
break; /* Skip a congested fs */
list_move(&inode->i_list, &sb->s_dirty);
continue; /* Skip a congested blockdev */
}
if (wbc->bdi && bdi != wbc->bdi) {
- if (sb != blockdev_superblock)
+ if (!sb_is_blkdev_sb(sb))
break; /* fs has the wrong queue */
list_move(&inode->i_list, &sb->s_dirty);
continue; /* blockdev has wrong queue */
/* ext2 ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) to support
* chattr/lsattr */
-#define HFSPLUS_IOC_EXT2_GETFLAGS _IOR('f', 1, long)
-#define HFSPLUS_IOC_EXT2_SETFLAGS _IOW('f', 2, long)
-
-#define EXT2_FLAG_IMMUTABLE 0x00000010 /* Immutable file */
-#define EXT2_FLAG_APPEND 0x00000020 /* writes to file may only append */
-#define EXT2_FLAG_NODUMP 0x00000040 /* do not dump file */
+#define HFSPLUS_IOC_EXT2_GETFLAGS FS_IOC_GETFLAGS
+#define HFSPLUS_IOC_EXT2_SETFLAGS FS_IOC_SETFLAGS
/*
case HFSPLUS_IOC_EXT2_GETFLAGS:
flags = 0;
if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE)
- flags |= EXT2_FLAG_IMMUTABLE; /* EXT2_IMMUTABLE_FL */
+ flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */
if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND)
- flags |= EXT2_FLAG_APPEND; /* EXT2_APPEND_FL */
+ flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */
if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP)
- flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */
+ flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */
return put_user(flags, (int __user *)arg);
case HFSPLUS_IOC_EXT2_SETFLAGS: {
if (IS_RDONLY(inode))
if (get_user(flags, (int __user *)arg))
return -EFAULT;
- if (flags & (EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND) ||
+ if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) ||
HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
if (!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
}
/* don't silently ignore unsupported ext2 flags */
- if (flags & ~(EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND|
- EXT2_FLAG_NODUMP))
+ if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL))
return -EOPNOTSUPP;
- if (flags & EXT2_FLAG_IMMUTABLE) { /* EXT2_IMMUTABLE_FL */
+ if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */
inode->i_flags |= S_IMMUTABLE;
HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
} else {
inode->i_flags &= ~S_IMMUTABLE;
HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE;
}
- if (flags & EXT2_FLAG_APPEND) { /* EXT2_APPEND_FL */
+ if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */
inode->i_flags |= S_APPEND;
HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND;
} else {
inode->i_flags &= ~S_APPEND;
HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND;
}
- if (flags & EXT2_FLAG_NODUMP) /* EXT2_NODUMP_FL */
+ if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */
HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP;
else
HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP;
}
EXPORT_SYMBOL(invalidate_inodes);
-
-int __invalidate_device(struct block_device *bdev)
-{
- struct super_block *sb = get_super(bdev);
- int res = 0;
-
- if (sb) {
- /*
- * no need to lock the super, get_super holds the
- * read mutex so the filesystem cannot go away
- * under us (->put_super runs with the write lock
- * hold).
- */
- shrink_dcache_sb(sb);
- res = invalidate_inodes(sb);
- drop_super(sb);
- }
- invalidate_bdev(bdev, 0);
- return res;
-}
-EXPORT_SYMBOL(__invalidate_device);
static int can_unuse(struct inode *inode)
{
--- /dev/null
+/* fs/ internal definitions
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/ioctl32.h>
+
+struct super_block;
+
+/*
+ * block_dev.c
+ */
+#ifdef CONFIG_BLOCK
+extern struct super_block *blockdev_superblock;
+extern void __init bdev_cache_init(void);
+
+static inline int sb_is_blkdev_sb(struct super_block *sb)
+{
+ return sb == blockdev_superblock;
+}
+
+#else
+static inline void bdev_cache_init(void)
+{
+}
+
+static inline int sb_is_blkdev_sb(struct super_block *sb)
+{
+ return 0;
+}
+#endif
+
+/*
+ * char_dev.c
+ */
+extern void __init chrdev_init(void);
+
+/*
+ * compat_ioctl.c
+ */
+#ifdef CONFIG_COMPAT
+extern struct ioctl_trans ioctl_start[];
+extern int ioctl_table_size;
+#endif
+
+/*
+ * namespace.c
+ */
+extern int copy_mount_options(const void __user *, unsigned long *);
/*
* fs/ioprio.c
*
- * Copyright (C) 2004 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk>
*
* Helper functions for setting/querying io priorities of processes. The
* system calls closely mimmick getpriority/setpriority, see the man page for
/* see wmb() in current_io_context() */
smp_read_barrier_depends();
- if (ioc && ioc->set_ioprio)
- ioc->set_ioprio(ioc, ioprio);
+ if (ioc)
+ ioc->ioprio_changed = 1;
task_unlock(task);
return 0;
}
ret = -ESRCH;
- read_lock_irq(&tasklist_lock);
+ /*
+ * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
+ * so we can't use rcu_read_lock(). See re-copy of ->ioprio
+ * in copy_process().
+ */
+ read_lock(&tasklist_lock);
switch (which) {
case IOPRIO_WHO_PROCESS:
if (!who)
ret = -EINVAL;
}
- read_unlock_irq(&tasklist_lock);
+ read_unlock(&tasklist_lock);
return ret;
}
int ret = -ESRCH;
int tmpio;
- read_lock_irq(&tasklist_lock);
+ read_lock(&tasklist_lock);
switch (which) {
case IOPRIO_WHO_PROCESS:
if (!who)
ret = -EINVAL;
}
- read_unlock_irq(&tasklist_lock);
+ read_unlock(&tasklist_lock);
return ret;
}
*/
#include <linux/fs.h>
-#include <linux/ext2_fs.h>
#include <linux/ctype.h>
#include <linux/capability.h>
#include <linux/time.h>
long jfs_flag;
long ext2_flag;
} jfs_map[] = {
- {JFS_NOATIME_FL, EXT2_NOATIME_FL},
- {JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL},
- {JFS_SYNC_FL, EXT2_SYNC_FL},
- {JFS_SECRM_FL, EXT2_SECRM_FL},
- {JFS_UNRM_FL, EXT2_UNRM_FL},
- {JFS_APPEND_FL, EXT2_APPEND_FL},
- {JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL},
+ {JFS_NOATIME_FL, FS_NOATIME_FL},
+ {JFS_DIRSYNC_FL, FS_DIRSYNC_FL},
+ {JFS_SYNC_FL, FS_SYNC_FL},
+ {JFS_SECRM_FL, FS_SECRM_FL},
+ {JFS_UNRM_FL, FS_UNRM_FL},
+ {JFS_APPEND_FL, FS_APPEND_FL},
+ {JFS_IMMUTABLE_FL, FS_IMMUTABLE_FL},
{0, 0},
};
* the call was made get new I/O started against them. If wbc->sync_mode is
* WB_SYNC_ALL then we were called for data integrity and we must wait for
* existing IO to complete.
+ *
+ * If you fix this you should check generic_writepages() also!
*/
int
mpage_writepages(struct address_space *mapping,
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/mount.h>
+#include <linux/ramfs.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include "pnode.h"
-extern int __init init_rootfs(void);
-
/* spinlock for vfsmount related operations, inplace of dcache_lock */
__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/file.h>
-#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/sunrpc/clnt.h>
--- /dev/null
+/* no-block.c: implementation of routines required for non-BLOCK configuration
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+
+static int no_blkdev_open(struct inode * inode, struct file * filp)
+{
+ return -ENODEV;
+}
+
+const struct file_operations def_blk_fops = {
+ .open = no_blkdev_open,
+};
# Makefile for the linux kernel.
#
-obj-y := check.o
+obj-$(CONFIG_BLOCK) := check.o
obj-$(CONFIG_ACORN_PARTITION) += acorn.o
obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
if (i == 0)
seq_printf(f, "Character devices:\n");
chrdev_show(f, i);
- } else {
+ }
+#ifdef CONFIG_BLOCK
+ else {
i -= CHRDEV_MAJOR_HASH_SIZE;
if (i == 0)
seq_printf(f, "\nBlock devices:\n");
blkdev_show(f, i);
}
+#endif
return 0;
}
}
#endif
+#ifdef CONFIG_BLOCK
extern struct seq_operations partitions_op;
static int partitions_open(struct inode *inode, struct file *file)
{
.llseek = seq_lseek,
.release = seq_release,
};
+#endif
#ifdef CONFIG_MODULES
extern struct seq_operations modules_op;
entry->proc_fops = &proc_kmsg_operations;
create_seq_entry("devices", 0, &proc_devinfo_operations);
create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
+#ifdef CONFIG_BLOCK
create_seq_entry("partitions", 0, &proc_partitions_operations);
+#endif
create_seq_entry("stat", 0, &proc_stat_operations);
create_seq_entry("interrupts", 0, &proc_interrupts_operations);
#ifdef CONFIG_SLAB
create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
+#ifdef CONFIG_BLOCK
create_seq_entry("diskstats", 0, &proc_diskstats_operations);
+#endif
#ifdef CONFIG_MODULES
create_seq_entry("modules", 0, &proc_modules_operations);
#endif
return 0;
}
+/*
+ * look up a superblock on which quota ops will be performed
+ * - use the name of a block device to find the superblock thereon
+ */
+static inline struct super_block *quotactl_block(const char __user *special)
+{
+#ifdef CONFIG_BLOCK
+ struct block_device *bdev;
+ struct super_block *sb;
+ char *tmp = getname(special);
+
+ if (IS_ERR(tmp))
+ return ERR_PTR(PTR_ERR(tmp));
+ bdev = lookup_bdev(tmp);
+ putname(tmp);
+ if (IS_ERR(bdev))
+ return ERR_PTR(PTR_ERR(bdev));
+ sb = get_super(bdev);
+ bdput(bdev);
+ if (!sb)
+ return ERR_PTR(-ENODEV);
+
+ return sb;
+#else
+ return ERR_PTR(-ENODEV);
+#endif
+}
+
/*
* This is the system call interface. This communicates with
* the user-level programs. Currently this only supports diskquota
{
uint cmds, type;
struct super_block *sb = NULL;
- struct block_device *bdev;
- char *tmp;
int ret;
cmds = cmd >> SUBCMDSHIFT;
type = cmd & SUBCMDMASK;
if (cmds != Q_SYNC || special) {
- tmp = getname(special);
- if (IS_ERR(tmp))
- return PTR_ERR(tmp);
- bdev = lookup_bdev(tmp);
- putname(tmp);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- sb = get_super(bdev);
- bdput(bdev);
- if (!sb)
- return -ENODEV;
+ sb = quotactl_block(special);
+ if (IS_ERR(sb))
+ return PTR_ERR(sb);
}
ret = check_quotactl_valid(sb, type, cmds, id);
.readdir = reiserfs_readdir,
.fsync = reiserfs_dir_fsync,
.ioctl = reiserfs_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = reiserfs_compat_ioctl,
+#endif
};
static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
*/
+#include <linux/config.h>
#include <linux/time.h>
#include <linux/reiserfs_fs.h>
#include <linux/reiserfs_acl.h>
.read = generic_file_read,
.write = reiserfs_file_write,
.ioctl = reiserfs_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = reiserfs_compat_ioctl,
+#endif
.mmap = generic_file_mmap,
.release = reiserfs_file_release,
.fsync = reiserfs_sync_file,
#include <asm/uaccess.h>
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
+#include <linux/compat.h>
static int reiserfs_unpack(struct inode *inode, struct file *filp);
}
}
+#ifdef CONFIG_COMPAT
+long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ int ret;
+
+ /* These are just misnamed, they actually get/put from/to user an int */
+ switch (cmd) {
+ case REISERFS_IOC32_UNPACK:
+ cmd = REISERFS_IOC_UNPACK;
+ break;
+ case REISERFS_IOC32_GETFLAGS:
+ cmd = REISERFS_IOC_GETFLAGS;
+ break;
+ case REISERFS_IOC32_SETFLAGS:
+ cmd = REISERFS_IOC_SETFLAGS;
+ break;
+ case REISERFS_IOC32_GETVERSION:
+ cmd = REISERFS_IOC_GETVERSION;
+ break;
+ case REISERFS_IOC32_SETVERSION:
+ cmd = REISERFS_IOC_SETVERSION;
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+ lock_kernel();
+ ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+ unlock_kernel();
+ return ret;
+}
+#endif
+
/*
** reiserfs_unpack
** Function try to convert tail from direct item into indirect.
* Jens to support splicing to files, network, direct splicing, etc and
* fixing lots of bugs.
*
- * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk>
* Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
* Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
*
return 0;
}
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock. Filesystem data as well as the underlying block
+ * device. Takes the superblock lock. Requires a second blkdev
+ * flush by the caller to complete the operation.
+ */
+void __fsync_super(struct super_block *sb)
+{
+ sync_inodes_sb(sb, 0);
+ DQUOT_SYNC(sb);
+ lock_super(sb);
+ if (sb->s_dirt && sb->s_op->write_super)
+ sb->s_op->write_super(sb);
+ unlock_super(sb);
+ if (sb->s_op->sync_fs)
+ sb->s_op->sync_fs(sb, 1);
+ sync_blockdev(sb->s_bdev);
+ sync_inodes_sb(sb, 1);
+}
+
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock. Filesystem data as well as the underlying block
+ * device. Takes the superblock lock.
+ */
+int fsync_super(struct super_block *sb)
+{
+ __fsync_super(sb);
+ return sync_blockdev(sb->s_bdev);
+}
+
/**
* generic_shutdown_super - common helper for ->kill_sb()
* @sb: superblock to kill
{
int retval;
+#ifdef CONFIG_BLOCK
if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
return -EACCES;
+#endif
if (flags & MS_RDONLY)
acct_auto_close(sb);
shrink_dcache_sb(sb);
EXPORT_SYMBOL(kill_litter_super);
+#ifdef CONFIG_BLOCK
static int set_bdev_super(struct super_block *s, void *data)
{
s->s_bdev = data;
}
EXPORT_SYMBOL(kill_block_super);
+#endif
int get_sb_nodev(struct file_system_type *fs_type,
int flags, void *data,
#include <linux/syscalls.h>
#include <linux/linkage.h>
#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
SYNC_FILE_RANGE_WAIT_AFTER)
+/*
+ * sync everything. Start out by waking pdflush, because that writes back
+ * all queues in parallel.
+ */
+static void do_sync(unsigned long wait)
+{
+ wakeup_pdflush(0);
+ sync_inodes(0); /* All mappings, inodes and their blockdevs */
+ DQUOT_SYNC(NULL);
+ sync_supers(); /* Write the superblocks */
+ sync_filesystems(0); /* Start syncing the filesystems */
+ sync_filesystems(wait); /* Waitingly sync the filesystems */
+ sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */
+ if (!wait)
+ printk("Emergency Sync complete\n");
+ if (unlikely(laptop_mode))
+ laptop_sync_completion();
+}
+
+asmlinkage long sys_sync(void)
+{
+ do_sync(1);
+ return 0;
+}
+
+void emergency_sync(void)
+{
+ pdflush_operation(do_sync, 0);
+}
+
+/*
+ * Generic function to fsync a file.
+ *
+ * filp may be NULL if called via the msync of a vma.
+ */
+int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+{
+ struct inode * inode = dentry->d_inode;
+ struct super_block * sb;
+ int ret, err;
+
+ /* sync the inode to buffers */
+ ret = write_inode_now(inode, 0);
+
+ /* sync the superblock to buffers */
+ sb = inode->i_sb;
+ lock_super(sb);
+ if (sb->s_op->write_super)
+ sb->s_op->write_super(sb);
+ unlock_super(sb);
+
+ /* .. finally sync the buffers to disk */
+ err = sync_blockdev(sb->s_bdev);
+ if (!ret)
+ ret = err;
+ return ret;
+}
+
+long do_fsync(struct file *file, int datasync)
+{
+ int ret;
+ int err;
+ struct address_space *mapping = file->f_mapping;
+
+ if (!file->f_op || !file->f_op->fsync) {
+ /* Why? We can still call filemap_fdatawrite */
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = filemap_fdatawrite(mapping);
+
+ /*
+ * We need to protect against concurrent writers, which could cause
+ * livelocks in fsync_buffers_list().
+ */
+ mutex_lock(&mapping->host->i_mutex);
+ err = file->f_op->fsync(file, file->f_dentry, datasync);
+ if (!ret)
+ ret = err;
+ mutex_unlock(&mapping->host->i_mutex);
+ err = filemap_fdatawait(mapping);
+ if (!ret)
+ ret = err;
+out:
+ return ret;
+}
+
+static long __do_fsync(unsigned int fd, int datasync)
+{
+ struct file *file;
+ int ret = -EBADF;
+
+ file = fget(fd);
+ if (file) {
+ ret = do_fsync(file, datasync);
+ fput(file);
+ }
+ return ret;
+}
+
+asmlinkage long sys_fsync(unsigned int fd)
+{
+ return __do_fsync(fd, 0);
+}
+
+asmlinkage long sys_fdatasync(unsigned int fd)
+{
+ return __do_fsync(fd, 1);
+}
+
/*
* sys_sync_file_range() permits finely controlled syncing over a segment of
* a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
config XFS_FS
tristate "XFS filesystem support"
+ depends on BLOCK
help
XFS is a high performance journaling filesystem which originated
on the SGI IRIX platform. It is completely multi-threaded, can
#define BIO_RW_BARRIER 2
#define BIO_RW_FAILFAST 3
#define BIO_RW_SYNC 4
+#define BIO_RW_META 5
/*
* upper 16 bits of bi_rw define the io priority of this bio
#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))
#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
+#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META))
/*
* will die
#ifndef _LINUX_BLKDEV_H
#define _LINUX_BLKDEV_H
+#include <linux/sched.h>
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/list.h>
#include <asm/scatterlist.h>
+#ifdef CONFIG_LBD
+# include <asm/div64.h>
+# define sector_div(a, b) do_div(a, b)
+#else
+# define sector_div(n, b)( \
+{ \
+ int _res; \
+ _res = (n) % (b); \
+ (n) /= (b); \
+ _res; \
+} \
+)
+#endif
+
+#ifdef CONFIG_BLOCK
+
struct scsi_ioctl_command;
struct request_queue;
atomic_t refcount;
struct task_struct *task;
- int (*set_ioprio)(struct io_context *, unsigned int);
+ unsigned int ioprio_changed;
/*
* For request batching
void put_io_context(struct io_context *ioc);
void exit_io_context(void);
-struct io_context *current_io_context(gfp_t gfp_flags);
-struct io_context *get_io_context(gfp_t gfp_flags);
+struct io_context *get_io_context(gfp_t gfp_flags, int node);
void copy_io_context(struct io_context **pdst, struct io_context **psrc);
void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
wait_queue_head_t wait[2];
};
+/*
+ * request command types
+ */
+enum rq_cmd_type_bits {
+ REQ_TYPE_FS = 1, /* fs request */
+ REQ_TYPE_BLOCK_PC, /* scsi command */
+ REQ_TYPE_SENSE, /* sense request */
+ REQ_TYPE_PM_SUSPEND, /* suspend request */
+ REQ_TYPE_PM_RESUME, /* resume request */
+ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */
+ REQ_TYPE_FLUSH, /* flush request */
+ REQ_TYPE_SPECIAL, /* driver defined type */
+ REQ_TYPE_LINUX_BLOCK, /* generic block layer message */
+ /*
+ * for ATA/ATAPI devices. this really doesn't belong here, ide should
+ * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
+ * private REQ_LB opcodes to differentiate what type of request this is
+ */
+ REQ_TYPE_ATA_CMD,
+ REQ_TYPE_ATA_TASK,
+ REQ_TYPE_ATA_TASKFILE,
+};
+
+/*
+ * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
+ * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
+ * SCSI cdb.
+ *
+ * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
+ * typically to differentiate REQ_TYPE_SPECIAL requests.
+ *
+ */
+enum {
+ /*
+ * just examples for now
+ */
+ REQ_LB_OP_EJECT = 0x40, /* eject request */
+ REQ_LB_OP_FLUSH = 0x41, /* flush device */
+};
+
+/*
+ * request type modified bits. first three bits match BIO_RW* bits, important
+ */
+enum rq_flag_bits {
+ __REQ_RW, /* not set, read. set, write */
+ __REQ_FAILFAST, /* no low level driver retries */
+ __REQ_SORTED, /* elevator knows about this request */
+ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
+ __REQ_HARDBARRIER, /* may not be passed by drive either */
+ __REQ_FUA, /* forced unit access */
+ __REQ_NOMERGE, /* don't touch this for merging */
+ __REQ_STARTED, /* drive already may have started this one */
+ __REQ_DONTPREP, /* don't call prep for this one */
+ __REQ_QUEUED, /* uses queueing */
+ __REQ_ELVPRIV, /* elevator private data attached */
+ __REQ_FAILED, /* set if the request failed */
+ __REQ_QUIET, /* don't worry about errors */
+ __REQ_PREEMPT, /* set for "ide_preempt" requests */
+ __REQ_ORDERED_COLOR, /* is before or after barrier */
+ __REQ_RW_SYNC, /* request is sync (O_DIRECT) */
+ __REQ_ALLOCED, /* request came from our alloc pool */
+ __REQ_RW_META, /* metadata io request */
+ __REQ_NR_BITS, /* stops here */
+};
+
+#define REQ_RW (1 << __REQ_RW)
+#define REQ_FAILFAST (1 << __REQ_FAILFAST)
+#define REQ_SORTED (1 << __REQ_SORTED)
+#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
+#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
+#define REQ_FUA (1 << __REQ_FUA)
+#define REQ_NOMERGE (1 << __REQ_NOMERGE)
+#define REQ_STARTED (1 << __REQ_STARTED)
+#define REQ_DONTPREP (1 << __REQ_DONTPREP)
+#define REQ_QUEUED (1 << __REQ_QUEUED)
+#define REQ_ELVPRIV (1 << __REQ_ELVPRIV)
+#define REQ_FAILED (1 << __REQ_FAILED)
+#define REQ_QUIET (1 << __REQ_QUIET)
+#define REQ_PREEMPT (1 << __REQ_PREEMPT)
+#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
+#define REQ_RW_SYNC (1 << __REQ_RW_SYNC)
+#define REQ_ALLOCED (1 << __REQ_ALLOCED)
+#define REQ_RW_META (1 << __REQ_RW_META)
+
#define BLK_MAX_CDB 16
/*
struct list_head queuelist;
struct list_head donelist;
- unsigned long flags; /* see REQ_ bits below */
+ request_queue_t *q;
+
+ unsigned int cmd_flags;
+ enum rq_cmd_type_bits cmd_type;
/* Maintain bio traversal state for part by part I/O submission.
* hard_* are block layer internals, no driver should touch them!
*/
sector_t sector; /* next sector to submit */
+ sector_t hard_sector; /* next sector to complete */
unsigned long nr_sectors; /* no. of sectors left to submit */
+ unsigned long hard_nr_sectors; /* no. of sectors left to complete */
/* no. of sectors left to submit in the current segment */
unsigned int current_nr_sectors;
- sector_t hard_sector; /* next sector to complete */
- unsigned long hard_nr_sectors; /* no. of sectors left to complete */
/* no. of sectors left to complete in the current segment */
unsigned int hard_cur_sectors;
struct bio *bio;
struct bio *biotail;
+ struct hlist_node hash; /* merge hash */
+ /*
+ * The rb_node is only used inside the io scheduler, requests
+ * are pruned when moved to the dispatch queue. So let the
+ * completion_data share space with the rb_node.
+ */
+ union {
+ struct rb_node rb_node; /* sort/lookup */
+ void *completion_data;
+ };
+
+ /*
+ * two pointers are available for the IO schedulers, if they need
+ * more they have to dynamically allocate it.
+ */
void *elevator_private;
- void *completion_data;
+ void *elevator_private2;
- int rq_status; /* should split this into a few status bits */
- int errors;
struct gendisk *rq_disk;
unsigned long start_time;
unsigned short ioprio;
+ void *special;
+ char *buffer;
+
int tag;
+ int errors;
int ref_count;
- request_queue_t *q;
- struct request_list *rl;
-
- struct completion *waiting;
- void *special;
- char *buffer;
/*
* when request is used as a packet command carrier
int retries;
/*
- * completion callback. end_io_data should be folded in with waiting
+ * completion callback.
*/
rq_end_io_fn *end_io;
void *end_io_data;
};
/*
- * first three bits match BIO_RW* bits, important
- */
-enum rq_flag_bits {
- __REQ_RW, /* not set, read. set, write */
- __REQ_FAILFAST, /* no low level driver retries */
- __REQ_SORTED, /* elevator knows about this request */
- __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
- __REQ_HARDBARRIER, /* may not be passed by drive either */
- __REQ_FUA, /* forced unit access */
- __REQ_CMD, /* is a regular fs rw request */
- __REQ_NOMERGE, /* don't touch this for merging */
- __REQ_STARTED, /* drive already may have started this one */
- __REQ_DONTPREP, /* don't call prep for this one */
- __REQ_QUEUED, /* uses queueing */
- __REQ_ELVPRIV, /* elevator private data attached */
- /*
- * for ATA/ATAPI devices
- */
- __REQ_PC, /* packet command (special) */
- __REQ_BLOCK_PC, /* queued down pc from block layer */
- __REQ_SENSE, /* sense retrival */
-
- __REQ_FAILED, /* set if the request failed */
- __REQ_QUIET, /* don't worry about errors */
- __REQ_SPECIAL, /* driver suplied command */
- __REQ_DRIVE_CMD,
- __REQ_DRIVE_TASK,
- __REQ_DRIVE_TASKFILE,
- __REQ_PREEMPT, /* set for "ide_preempt" requests */
- __REQ_PM_SUSPEND, /* suspend request */
- __REQ_PM_RESUME, /* resume request */
- __REQ_PM_SHUTDOWN, /* shutdown request */
- __REQ_ORDERED_COLOR, /* is before or after barrier */
- __REQ_RW_SYNC, /* request is sync (O_DIRECT) */
- __REQ_NR_BITS, /* stops here */
-};
-
-#define REQ_RW (1 << __REQ_RW)
-#define REQ_FAILFAST (1 << __REQ_FAILFAST)
-#define REQ_SORTED (1 << __REQ_SORTED)
-#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
-#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
-#define REQ_FUA (1 << __REQ_FUA)
-#define REQ_CMD (1 << __REQ_CMD)
-#define REQ_NOMERGE (1 << __REQ_NOMERGE)
-#define REQ_STARTED (1 << __REQ_STARTED)
-#define REQ_DONTPREP (1 << __REQ_DONTPREP)
-#define REQ_QUEUED (1 << __REQ_QUEUED)
-#define REQ_ELVPRIV (1 << __REQ_ELVPRIV)
-#define REQ_PC (1 << __REQ_PC)
-#define REQ_BLOCK_PC (1 << __REQ_BLOCK_PC)
-#define REQ_SENSE (1 << __REQ_SENSE)
-#define REQ_FAILED (1 << __REQ_FAILED)
-#define REQ_QUIET (1 << __REQ_QUIET)
-#define REQ_SPECIAL (1 << __REQ_SPECIAL)
-#define REQ_DRIVE_CMD (1 << __REQ_DRIVE_CMD)
-#define REQ_DRIVE_TASK (1 << __REQ_DRIVE_TASK)
-#define REQ_DRIVE_TASKFILE (1 << __REQ_DRIVE_TASKFILE)
-#define REQ_PREEMPT (1 << __REQ_PREEMPT)
-#define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND)
-#define REQ_PM_RESUME (1 << __REQ_PM_RESUME)
-#define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN)
-#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
-#define REQ_RW_SYNC (1 << __REQ_RW_SYNC)
-
-/*
- * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
+ * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
* requests. Some step values could eventually be made generic.
*/
struct request_pm_state
struct mutex sysfs_lock;
};
-#define RQ_INACTIVE (-1)
-#define RQ_ACTIVE 1
-
#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_flushing(q) ((q)->ordseq)
-#define blk_fs_request(rq) ((rq)->flags & REQ_CMD)
-#define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC)
-#define blk_noretry_request(rq) ((rq)->flags & REQ_FAILFAST)
-#define blk_rq_started(rq) ((rq)->flags & REQ_STARTED)
+#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
+#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
+#define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL)
+#define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE)
+
+#define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST)
+#define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
#define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq))
-#define blk_pm_suspend_request(rq) ((rq)->flags & REQ_PM_SUSPEND)
-#define blk_pm_resume_request(rq) ((rq)->flags & REQ_PM_RESUME)
+#define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
+#define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME)
#define blk_pm_request(rq) \
- ((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME))
+ (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
-#define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED)
-#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER)
-#define blk_fua_rq(rq) ((rq)->flags & REQ_FUA)
+#define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)
+#define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
+#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
-#define rq_data_dir(rq) ((rq)->flags & 1)
+#define rq_data_dir(rq) ((rq)->cmd_flags & 1)
+
+/*
+ * We regard a request as sync, if it's a READ or a SYNC write.
+ */
+#define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC)
+#define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META)
static inline int blk_queue_full(struct request_queue *q, int rw)
{
#define RQ_NOMERGE_FLAGS \
(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
#define rq_mergeable(rq) \
- (!((rq)->flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
-
-/*
- * noop, requests are automagically marked as active/inactive by I/O
- * scheduler -- see elv_next_request
- */
-#define blk_queue_headactive(q, head_active)
+ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
/*
* q->prep_rq_fn return values
if ((rq->bio)) \
for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
-struct sec_size {
- unsigned block_size;
- unsigned block_size_bits;
-};
-
extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
extern void register_disk(struct gendisk *dev);
extern void blk_sync_queue(struct request_queue *q);
extern void __blk_stop_queue(request_queue_t *q);
extern void blk_run_queue(request_queue_t *);
+extern void blk_start_queueing(request_queue_t *);
extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int);
extern int blk_rq_unmap_user(struct bio *, unsigned int);
extern void end_request(struct request *req, int uptodate);
extern void blk_complete_request(struct request *);
-static inline int rq_all_done(struct request *rq, unsigned int nr_bytes)
-{
- if (blk_fs_request(rq))
- return (nr_bytes >= (rq->hard_nr_sectors << 9));
- else if (blk_pc_request(rq))
- return nr_bytes >= rq->data_len;
-
- return 0;
-}
-
/*
* end_that_request_first/chunk() takes an uptodate argument. we account
* any value <= as an io error. 0 means -EIO for compatability reasons,
elv_dequeue_request(req->q, req);
}
-/*
- * This should be in elevator.h, but that requires pulling in rq and q
- */
-static inline void elv_dispatch_add_tail(struct request_queue *q,
- struct request *rq)
-{
- if (q->last_merge == rq)
- q->last_merge = NULL;
- q->nr_sorted--;
-
- q->end_sector = rq_end_sector(rq);
- q->boundary_rq = rq;
- list_add_tail(&rq->queuelist, &q->queue_head);
-}
-
/*
* Access functions for manipulating queue properties
*/
*/
#define blk_queue_tag_depth(q) ((q)->queue_tags->busy)
#define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth)
-#define blk_rq_tagged(rq) ((rq)->flags & REQ_QUEUED)
+#define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED)
extern int blk_queue_start_tag(request_queue_t *, struct request *);
extern struct request *blk_queue_find_tag(request_queue_t *, int);
extern void blk_queue_end_tag(request_queue_t *, struct request *);
return retval;
}
-static inline int bdev_dma_aligment(struct block_device *bdev)
-{
- return queue_dma_alignment(bdev_get_queue(bdev));
-}
-
-#define blk_finished_io(nsects) do { } while (0)
-#define blk_started_io(nsects) do { } while (0)
-
/* assumes size > 256 */
static inline unsigned int blksize_bits(unsigned int size)
{
int kblockd_schedule_work(struct work_struct *work);
void kblockd_flush(void);
-#ifdef CONFIG_LBD
-# include <asm/div64.h>
-# define sector_div(a, b) do_div(a, b)
-#else
-# define sector_div(n, b)( \
-{ \
- int _res; \
- _res = (n) % (b); \
- (n) /= (b); \
- _res; \
-} \
-)
-#endif
-
#define MODULE_ALIAS_BLOCKDEV(major,minor) \
MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
MODULE_ALIAS("block-major-" __stringify(major) "-*")
+#else /* CONFIG_BLOCK */
+/*
+ * stubs for when the block layer is configured out
+ */
+#define buffer_heads_over_limit 0
+
+static inline long blk_congestion_wait(int rw, long timeout)
+{
+ return io_schedule_timeout(timeout);
+}
+
+static inline long nr_blockdev_pages(void)
+{
+ return 0;
+}
+
+static inline void exit_io_context(void)
+{
+}
+
+#endif /* CONFIG_BLOCK */
+
#endif
BLK_TC_PC = 1 << 9, /* pc requests */
BLK_TC_NOTIFY = 1 << 10, /* special message */
BLK_TC_AHEAD = 1 << 11, /* readahead */
+ BLK_TC_META = 1 << 12, /* metadata */
BLK_TC_END = 1 << 15, /* only 16-bits, reminder */
};
u32 what)
{
struct blk_trace *bt = q->blk_trace;
- int rw = rq->flags & 0x03;
+ int rw = rq->cmd_flags & 0x03;
if (likely(!bt))
return;
#include <linux/wait.h>
#include <asm/atomic.h>
+#ifdef CONFIG_BLOCK
+
enum bh_state_bits {
BH_Uptodate, /* Contains valid data */
BH_Dirty, /* Is dirty */
* Generic address_space_operations implementations for buffer_head-backed
* address_spaces.
*/
-int try_to_release_page(struct page * page, gfp_t gfp_mask);
void block_invalidatepage(struct page *page, unsigned long offset);
-void do_invalidatepage(struct page *page, unsigned long offset);
int block_write_full_page(struct page *page, get_block_t *get_block,
struct writeback_control *wbc);
int block_read_full_page(struct page*, get_block_t*);
__lock_buffer(bh);
}
+extern int __set_page_dirty_buffers(struct page *page);
+
+#else /* CONFIG_BLOCK */
+
+static inline void buffer_init(void) {}
+static inline int try_to_free_buffers(struct page *page) { return 1; }
+static inline int sync_blockdev(struct block_device *bdev) { return 0; }
+static inline int inode_has_buffers(struct inode *inode) { return 0; }
+static inline void invalidate_inode_buffers(struct inode *inode) {}
+static inline int remove_inode_buffers(struct inode *inode) { return 1; }
+static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
+static inline void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) {}
+
+
+#endif /* CONFIG_BLOCK */
#endif /* _LINUX_BUFFER_HEAD_H */
COMPATIBLE_IOCTL(FDFMTTRK)
COMPATIBLE_IOCTL(FDRAWCMD)
/* 0x12 */
+#ifdef CONFIG_BLOCK
COMPATIBLE_IOCTL(BLKRASET)
COMPATIBLE_IOCTL(BLKROSET)
COMPATIBLE_IOCTL(BLKROGET)
COMPATIBLE_IOCTL(BLKTRACETEARDOWN)
ULONG_IOCTL(BLKRASET)
ULONG_IOCTL(BLKFRASET)
+#endif
/* RAID */
COMPATIBLE_IOCTL(RAID_VERSION)
COMPATIBLE_IOCTL(GET_ARRAY_INFO)
COMPATIBLE_IOCTL(DVD_AUTH)
/* pktcdvd */
COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
-/* Big L */
-ULONG_IOCTL(LOOP_SET_FD)
-ULONG_IOCTL(LOOP_CHANGE_FD)
-COMPATIBLE_IOCTL(LOOP_CLR_FD)
-COMPATIBLE_IOCTL(LOOP_GET_STATUS64)
-COMPATIBLE_IOCTL(LOOP_SET_STATUS64)
/* Big A */
/* sparc only */
/* Big Q for sound/OSS */
#ifndef _LINUX_ELEVATOR_H
#define _LINUX_ELEVATOR_H
+#include <linux/percpu.h>
+
+#ifdef CONFIG_BLOCK
+
typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
struct bio *);
typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *);
-typedef void (elevator_merged_fn) (request_queue_t *, struct request *);
+typedef void (elevator_merged_fn) (request_queue_t *, struct request *, int);
typedef int (elevator_dispatch_fn) (request_queue_t *, int);
typedef int (elevator_queue_empty_fn) (request_queue_t *);
typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
-typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *);
+typedef int (elevator_may_queue_fn) (request_queue_t *, int);
-typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t);
+typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, gfp_t);
typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *);
typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *);
struct kobject kobj;
struct elevator_type *elevator_type;
struct mutex sysfs_lock;
+ struct hlist_head *hash;
};
/*
* block elevator interface
*/
extern void elv_dispatch_sort(request_queue_t *, struct request *);
+extern void elv_dispatch_add_tail(request_queue_t *, struct request *);
extern void elv_add_request(request_queue_t *, struct request *, int, int);
extern void __elv_add_request(request_queue_t *, struct request *, int, int);
extern void elv_insert(request_queue_t *, struct request *, int);
extern int elv_merge(request_queue_t *, struct request **, struct bio *);
extern void elv_merge_requests(request_queue_t *, struct request *,
struct request *);
-extern void elv_merged_request(request_queue_t *, struct request *);
+extern void elv_merged_request(request_queue_t *, struct request *, int);
extern void elv_dequeue_request(request_queue_t *, struct request *);
extern void elv_requeue_request(request_queue_t *, struct request *);
extern int elv_queue_empty(request_queue_t *);
extern struct request *elv_latter_request(request_queue_t *, struct request *);
extern int elv_register_queue(request_queue_t *q);
extern void elv_unregister_queue(request_queue_t *q);
-extern int elv_may_queue(request_queue_t *, int, struct bio *);
+extern int elv_may_queue(request_queue_t *, int);
extern void elv_completed_request(request_queue_t *, struct request *);
-extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t);
+extern int elv_set_request(request_queue_t *, struct request *, gfp_t);
extern void elv_put_request(request_queue_t *, struct request *);
/*
extern void elevator_exit(elevator_t *);
extern int elv_rq_merge_ok(struct request *, struct bio *);
+/*
+ * Helper functions.
+ */
+extern struct request *elv_rb_former_request(request_queue_t *, struct request *);
+extern struct request *elv_rb_latter_request(request_queue_t *, struct request *);
+
+/*
+ * rb support functions.
+ */
+extern struct request *elv_rb_add(struct rb_root *, struct request *);
+extern void elv_rb_del(struct rb_root *, struct request *);
+extern struct request *elv_rb_find(struct rb_root *, sector_t);
+
/*
* Return values from elevator merger
*/
};
#define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors)
+#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
+
+/*
+ * Hack to reuse the donelist list_head as the fifo time holder while
+ * the request is in the io scheduler. Saves an unsigned long in rq.
+ */
+#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next)
+#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp))
+#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
+#define rq_fifo_clear(rq) do { \
+ list_del_init(&(rq)->queuelist); \
+ INIT_LIST_HEAD(&(rq)->donelist); \
+ } while (0)
+/*
+ * io context count accounting
+ */
+#define elv_ioc_count_mod(name, __val) \
+ do { \
+ preempt_disable(); \
+ __get_cpu_var(name) += (__val); \
+ preempt_enable(); \
+ } while (0)
+
+#define elv_ioc_count_inc(name) elv_ioc_count_mod(name, 1)
+#define elv_ioc_count_dec(name) elv_ioc_count_mod(name, -1)
+
+#define elv_ioc_count_read(name) \
+({ \
+ unsigned long __val = 0; \
+ int __cpu; \
+ smp_wmb(); \
+ for_each_possible_cpu(__cpu) \
+ __val += per_cpu(name, __cpu); \
+ __val; \
+})
+
+#endif /* CONFIG_BLOCK */
#endif
#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1)
/*
- * Inode flags
+ * Inode flags (GETFLAGS/SETFLAGS)
*/
-#define EXT2_SECRM_FL 0x00000001 /* Secure deletion */
-#define EXT2_UNRM_FL 0x00000002 /* Undelete */
-#define EXT2_COMPR_FL 0x00000004 /* Compress file */
-#define EXT2_SYNC_FL 0x00000008 /* Synchronous updates */
-#define EXT2_IMMUTABLE_FL 0x00000010 /* Immutable file */
-#define EXT2_APPEND_FL 0x00000020 /* writes to file may only append */
-#define EXT2_NODUMP_FL 0x00000040 /* do not dump file */
-#define EXT2_NOATIME_FL 0x00000080 /* do not update atime */
+#define EXT2_SECRM_FL FS_SECRM_FL /* Secure deletion */
+#define EXT2_UNRM_FL FS_UNRM_FL /* Undelete */
+#define EXT2_COMPR_FL FS_COMPR_FL /* Compress file */
+#define EXT2_SYNC_FL FS_SYNC_FL /* Synchronous updates */
+#define EXT2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */
+#define EXT2_APPEND_FL FS_APPEND_FL /* writes to file may only append */
+#define EXT2_NODUMP_FL FS_NODUMP_FL /* do not dump file */
+#define EXT2_NOATIME_FL FS_NOATIME_FL /* do not update atime */
/* Reserved for compression usage... */
-#define EXT2_DIRTY_FL 0x00000100
-#define EXT2_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
-#define EXT2_NOCOMP_FL 0x00000400 /* Don't compress */
-#define EXT2_ECOMPR_FL 0x00000800 /* Compression error */
+#define EXT2_DIRTY_FL FS_DIRTY_FL
+#define EXT2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */
+#define EXT2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */
+#define EXT2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */
/* End compression flags --- maybe not all used */
-#define EXT2_BTREE_FL 0x00001000 /* btree format dir */
-#define EXT2_INDEX_FL 0x00001000 /* hash-indexed directory */
-#define EXT2_IMAGIC_FL 0x00002000 /* AFS directory */
-#define EXT2_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */
-#define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */
-#define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
-#define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
-#define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
-
-#define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
-#define EXT2_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
+#define EXT2_BTREE_FL FS_BTREE_FL /* btree format dir */
+#define EXT2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */
+#define EXT2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */
+#define EXT2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */
+#define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
+#define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
+#define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
+#define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
+
+#define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
+#define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
/*
* ioctl commands
*/
-#define EXT2_IOC_GETFLAGS _IOR('f', 1, long)
-#define EXT2_IOC_SETFLAGS _IOW('f', 2, long)
-#define EXT2_IOC_GETVERSION _IOR('v', 1, long)
-#define EXT2_IOC_SETVERSION _IOW('v', 2, long)
+#define EXT2_IOC_GETFLAGS FS_IOC_GETFLAGS
+#define EXT2_IOC_SETFLAGS FS_IOC_SETFLAGS
+#define EXT2_IOC_GETVERSION FS_IOC_GETVERSION
+#define EXT2_IOC_SETVERSION FS_IOC_SETVERSION
+
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define EXT2_IOC32_GETFLAGS FS_IOC32_GETFLAGS
+#define EXT2_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#define EXT2_IOC32_GETVERSION FS_IOC32_GETVERSION
+#define EXT2_IOC32_SETVERSION FS_IOC32_SETVERSION
/*
* Structure of an inode on the disk
/*
* ioctl commands
*/
-#define EXT3_IOC_GETFLAGS _IOR('f', 1, long)
-#define EXT3_IOC_SETFLAGS _IOW('f', 2, long)
+#define EXT3_IOC_GETFLAGS FS_IOC_GETFLAGS
+#define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS
#define EXT3_IOC_GETVERSION _IOR('f', 3, long)
#define EXT3_IOC_SETVERSION _IOW('f', 4, long)
#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
#define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input)
-#define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long)
-#define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long)
+#define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION
+#define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION
#ifdef CONFIG_JBD_DEBUG
#define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
#endif
#define EXT3_IOC_GETRSVSZ _IOR('f', 5, long)
#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define EXT3_IOC32_GETFLAGS FS_IOC32_GETFLAGS
+#define EXT3_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#define EXT3_IOC32_GETVERSION _IOR('f', 3, int)
+#define EXT3_IOC32_SETVERSION _IOW('f', 4, int)
+#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int)
+#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int)
+#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
+#ifdef CONFIG_JBD_DEBUG
+#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
+#endif
+#define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
+#define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
+
+
/*
* Mount options
*/
/* ioctl.c */
extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
unsigned long);
+extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long);
/* namei.c */
extern int ext3_orphan_add(handle_t *, struct inode *);
#define WRITE 1
#define READA 2 /* read-ahead - don't block if no resources */
#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */
-#define SPECIAL 4 /* For non-blockdevice requests in request queue */
#define READ_SYNC (READ | (1 << BIO_RW_SYNC))
+#define READ_META (READ | (1 << BIO_RW_META))
#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC))
#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
#define FIBMAP _IO(0x00,1) /* bmap access */
#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
+#define FS_IOC_GETFLAGS _IOR('f', 1, long)
+#define FS_IOC_SETFLAGS _IOW('f', 2, long)
+#define FS_IOC_GETVERSION _IOR('v', 1, long)
+#define FS_IOC_SETVERSION _IOW('v', 2, long)
+#define FS_IOC32_GETFLAGS _IOR('f', 1, int)
+#define FS_IOC32_SETFLAGS _IOW('f', 2, int)
+#define FS_IOC32_GETVERSION _IOR('v', 1, int)
+#define FS_IOC32_SETVERSION _IOW('v', 2, int)
+
+/*
+ * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ */
+#define FS_SECRM_FL 0x00000001 /* Secure deletion */
+#define FS_UNRM_FL 0x00000002 /* Undelete */
+#define FS_COMPR_FL 0x00000004 /* Compress file */
+#define FS_SYNC_FL 0x00000008 /* Synchronous updates */
+#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
+#define FS_APPEND_FL 0x00000020 /* writes to file may only append */
+#define FS_NODUMP_FL 0x00000040 /* do not dump file */
+#define FS_NOATIME_FL 0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define FS_DIRTY_FL 0x00000100
+#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
+#define FS_NOCOMP_FL 0x00000400 /* Don't compress */
+#define FS_ECOMPR_FL 0x00000800 /* Compression error */
+/* End compression flags --- maybe not all used */
+#define FS_BTREE_FL 0x00001000 /* btree format dir */
+#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */
+#define FS_IMAGIC_FL 0x00002000 /* AFS directory */
+#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */
+#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
+#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
+#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
+
+#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
+#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
+
+
#define SYNC_FILE_RANGE_WAIT_BEFORE 1
#define SYNC_FILE_RANGE_WRITE 2
#define SYNC_FILE_RANGE_WAIT_AFTER 4
extern void putname(const char *name);
#endif
+#ifdef CONFIG_BLOCK
extern int register_blkdev(unsigned int, const char *);
extern int unregister_blkdev(unsigned int, const char *);
extern struct block_device *bdget(dev_t);
extern void bdput(struct block_device *);
extern struct block_device *open_by_devnum(dev_t, unsigned);
extern struct block_device *open_partition_by_devnum(dev_t, unsigned);
-extern const struct file_operations def_blk_fops;
extern const struct address_space_operations def_blk_aops;
+#else
+static inline void bd_forget(struct inode *inode) {}
+#endif
+extern const struct file_operations def_blk_fops;
extern const struct file_operations def_chr_fops;
extern const struct file_operations bad_sock_fops;
extern const struct file_operations def_fifo_fops;
+#ifdef CONFIG_BLOCK
extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
#define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder)
#define bd_release_from_disk(bdev, disk) bd_release(bdev)
#endif
+#endif
/* fs/char_dev.c */
#define CHRDEV_MAJOR_HASH_SIZE 255
extern void chrdev_show(struct seq_file *,off_t);
/* fs/block_dev.c */
-#define BLKDEV_MAJOR_HASH_SIZE 255
#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
+
+#ifdef CONFIG_BLOCK
+#define BLKDEV_MAJOR_HASH_SIZE 255
extern const char *__bdevname(dev_t, char *buffer);
extern const char *bdevname(struct block_device *bdev, char *buffer);
extern struct block_device *lookup_bdev(const char *);
extern struct block_device *open_bdev_excl(const char *, int, void *);
extern void close_bdev_excl(struct block_device *);
extern void blkdev_show(struct seq_file *,off_t);
+#else
+#define BLKDEV_MAJOR_HASH_SIZE 0
+#endif
extern void init_special_inode(struct inode *, umode_t, dev_t);
extern int fs_may_remount_ro(struct super_block *);
+#ifdef CONFIG_BLOCK
/*
* return READ, READA, or WRITE
*/
#define bio_data_dir(bio) ((bio)->bi_rw & 1)
extern int check_disk_change(struct block_device *);
-extern int invalidate_inodes(struct super_block *);
extern int __invalidate_device(struct block_device *);
extern int invalidate_partition(struct gendisk *, int);
+#endif
+extern int invalidate_inodes(struct super_block *);
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);
unsigned long invalidate_inode_pages(struct address_space *mapping);
extern long do_fsync(struct file *file, int datasync);
extern void sync_supers(void);
extern void sync_filesystems(int wait);
+extern void __fsync_super(struct super_block *sb);
extern void emergency_sync(void);
extern void emergency_remount(void);
extern int do_remount_sb(struct super_block *sb, int flags,
void *data, int force);
+#ifdef CONFIG_BLOCK
extern sector_t bmap(struct inode *, sector_t);
+#endif
extern int notify_change(struct dentry *, struct iattr *);
extern int permission(struct inode *, int, struct nameidata *);
extern int generic_permission(struct inode *, int,
extern struct file * get_empty_filp(void);
extern void file_move(struct file *f, struct list_head *list);
extern void file_kill(struct file *f);
+#ifdef CONFIG_BLOCK
struct bio;
extern void submit_bio(int, struct bio *);
extern int bdev_read_only(struct block_device *);
+#endif
extern int set_blocksize(struct block_device *, int);
extern int sb_set_blocksize(struct super_block *, int);
extern int sb_min_blocksize(struct super_block *, int);
actor);
}
+#ifdef CONFIG_BLOCK
ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
nr_segs, get_block, end_io, DIO_OWN_LOCKING);
}
+#endif
extern const struct file_operations generic_ro_fops;
#include <linux/types.h>
+#ifdef CONFIG_BLOCK
+
enum {
/* These three have identical behaviour; use the second one if DOS FDISK gets
confused about extended/logical partitions starting past cylinder 1023. */
#endif
#endif
+
+#endif
int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
-int __set_page_dirty_buffers(struct page *page);
+extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
+extern void do_invalidatepage(struct page *page, unsigned long offset);
+
int __set_page_dirty_nobuffers(struct page *page);
int redirty_page_for_writepage(struct writeback_control *wbc,
struct page *page);
* (And no, it doesn't do the #ifdef __MPAGE_H thing, and it doesn't do
* nested includes. Get it right in the .c file).
*/
+#ifdef CONFIG_BLOCK
struct writeback_control;
typedef int (writepage_t)(struct page *page, struct writeback_control *wbc);
int mpage_writepage(struct page *page, get_block_t *get_block,
struct writeback_control *wbc);
-static inline int
-generic_writepages(struct address_space *mapping, struct writeback_control *wbc)
-{
- return mpage_writepages(mapping, wbc, NULL);
-}
+#endif
#include <linux/raid/md_u.h>
#include <linux/raid/md_k.h>
+#ifdef CONFIG_MD
+
/*
* Different major versions are not compatible.
* Different minor versions are only downward compatible.
extern void md_update_sb(mddev_t * mddev);
+#endif /* CONFIG_MD */
#endif
/* and dm-bio-list.h is not under include/linux because.... ??? */
#include "../../../drivers/md/dm-bio-list.h"
+#ifdef CONFIG_BLOCK
+
#define LEVEL_MULTIPATH (-4)
#define LEVEL_LINEAR (-1)
#define LEVEL_FAULTY (-5)
if (p) put_page(p);
}
+#endif /* CONFIG_BLOCK */
#endif
extern const struct file_operations ramfs_file_operations;
extern struct vm_operations_struct generic_file_vm_ops;
+extern int __init init_rootfs(void);
#endif
#define rb_entry(ptr, type, member) container_of(ptr, type, member)
#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
-#define RB_EMPTY_NODE(node) (rb_parent(node) != node)
+#define RB_EMPTY_NODE(node) (rb_parent(node) == node)
#define RB_CLEAR_NODE(node) (rb_set_parent(node, node))
extern void rb_insert_color(struct rb_node *, struct rb_root *);
#define set_sd_v1_first_direct_byte(sdp,v) \
((sdp)->sd_first_direct_byte = cpu_to_le32(v))
-#include <linux/ext2_fs.h>
-
/* inode flags stored in sd_attrs (nee sd_reserved) */
/* we want common flags to have the same values as in ext2,
so chattr(1) will work without problems */
-#define REISERFS_IMMUTABLE_FL EXT2_IMMUTABLE_FL
-#define REISERFS_APPEND_FL EXT2_APPEND_FL
-#define REISERFS_SYNC_FL EXT2_SYNC_FL
-#define REISERFS_NOATIME_FL EXT2_NOATIME_FL
-#define REISERFS_NODUMP_FL EXT2_NODUMP_FL
-#define REISERFS_SECRM_FL EXT2_SECRM_FL
-#define REISERFS_UNRM_FL EXT2_UNRM_FL
-#define REISERFS_COMPR_FL EXT2_COMPR_FL
-#define REISERFS_NOTAIL_FL EXT2_NOTAIL_FL
+#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
+#define REISERFS_APPEND_FL FS_APPEND_FL
+#define REISERFS_SYNC_FL FS_SYNC_FL
+#define REISERFS_NOATIME_FL FS_NOATIME_FL
+#define REISERFS_NODUMP_FL FS_NODUMP_FL
+#define REISERFS_SECRM_FL FS_SECRM_FL
+#define REISERFS_UNRM_FL FS_UNRM_FL
+#define REISERFS_COMPR_FL FS_COMPR_FL
+#define REISERFS_NOTAIL_FL FS_NOTAIL_FL
/* persistent flags that file inherits from the parent directory */
#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \
/* prototypes from ioctl.c */
int reiserfs_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg);
+long reiserfs_compat_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg);
/* ioctl's command */
#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long)
/* define following flags to be the same as in ext2, so that chattr(1),
lsattr(1) will work with us. */
-#define REISERFS_IOC_GETFLAGS EXT2_IOC_GETFLAGS
-#define REISERFS_IOC_SETFLAGS EXT2_IOC_SETFLAGS
-#define REISERFS_IOC_GETVERSION EXT2_IOC_GETVERSION
-#define REISERFS_IOC_SETVERSION EXT2_IOC_SETVERSION
+#define REISERFS_IOC_GETFLAGS FS_IOC_GETFLAGS
+#define REISERFS_IOC_SETFLAGS FS_IOC_SETFLAGS
+#define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION
+#define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION
+
+/* the 32 bit compat definitions with int argument */
+#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int)
+#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
+#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION
+#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION
/* Locking primitives */
/* Right now we are still falling back to (un)lock_kernel, but eventually that
struct io_context; /* See blkdev.h */
-void exit_io_context(void);
struct cpuset;
#define NGROUPS_SMALL 32
extern void tty_wakeup(struct tty_struct *tty);
extern void tty_ldisc_flush(struct tty_struct *tty);
+extern int tty_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+ unsigned long arg);
+
extern struct mutex tty_mutex;
/* n_tty.c */
}
int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
+extern int generic_writepages(struct address_space *mapping,
+ struct writeback_control *wbc);
int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
int sync_page_range(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count);
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
-
#define MSG_SIMPLE_TAG 0x20
#define MSG_HEAD_TAG 0x21
#define MSG_ORDERED_TAG 0x22
#define SCSI_NO_TAG (-1) /* identify no tag in use */
+#ifdef CONFIG_BLOCK
/**
* scsi_get_tag_type - get the type of tag the device supports
struct scsi_device *sdev = cmd->device;
if (blk_rq_tagged(req)) {
- if (sdev->ordered_tags && req->flags & REQ_HARDBARRIER)
+ if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER)
*msg++ = MSG_ORDERED_TAG;
else
*msg++ = MSG_SIMPLE_TAG;
return shost->bqt ? 0 : -ENOMEM;
}
+#endif /* CONFIG_BLOCK */
#endif /* _SCSI_SCSI_TCQ_H */
config SWAP
bool "Support for paging of anonymous memory (swap)"
- depends on MMU
+ depends on MMU && BLOCK
default y
help
This option allows you to choose whether you want to have support
{
char *fs_names = __getname();
char *p;
+#ifdef CONFIG_BLOCK
char b[BDEVNAME_SIZE];
+#else
+ const char *b = name;
+#endif
get_fs_names(fs_names);
retry:
* Allow the user to distinguish between failed sys_open
* and bad superblock on root device.
*/
+#ifdef CONFIG_BLOCK
__bdevname(ROOT_DEV, b);
+#endif
printk("VFS: Cannot open root device \"%s\" or %s\n",
root_device_name, b);
printk("Please append a correct \"root=\" boot option\n");
for (p = fs_names; *p; p += strlen(p)+1)
printk(" %s", p);
printk("\n");
- panic("VFS: Unable to mount root fs on %s", __bdevname(ROOT_DEV, b));
+#ifdef CONFIG_BLOCK
+ __bdevname(ROOT_DEV, b);
+#endif
+ panic("VFS: Unable to mount root fs on %s", b);
out:
putname(fs_names);
}
change_floppy("root floppy");
}
#endif
+#ifdef CONFIG_BLOCK
create_dev("/dev/root", ROOT_DEV);
mount_block_root("/dev/root", root_mountflags);
+#endif
}
/*
#include <asm/uaccess.h>
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
{
return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
#include <linux/pipe_fs_i.h>
#include <linux/audit.h> /* for audit_free() */
#include <linux/resource.h>
+#include <linux/blkdev.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
cond_syscall(sys_mremap);
cond_syscall(sys_remap_file_pages);
cond_syscall(compat_sys_move_pages);
+
+/* block-layer dependent */
+cond_syscall(sys_bdflush);
+cond_syscall(sys_ioprio_set);
+cond_syscall(sys_ioprio_get);
{
struct rb_node *parent;
+ if (rb_parent(node) == node)
+ return NULL;
+
/* If we have a right-hand child, go down and then left as far
as we can. */
if (node->rb_right) {
{
struct rb_node *parent;
+ if (rb_parent(node) == node)
+ return NULL;
+
/* If we have a left-hand child, go down and then right as far
as we can. */
if (node->rb_left) {
readahead.o swap.o truncate.o vmscan.o \
prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
+ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
+obj-y += bounce.o
+endif
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_NUMA) += mempolicy.o
--- /dev/null
+/* bounce buffer handling for block devices
+ *
+ * - Split from highmem.c
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/bio.h>
+#include <linux/pagemap.h>
+#include <linux/mempool.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/hash.h>
+#include <linux/highmem.h>
+#include <linux/blktrace_api.h>
+#include <asm/tlbflush.h>
+
+#define POOL_SIZE 64
+#define ISA_POOL_SIZE 16
+
+static mempool_t *page_pool, *isa_page_pool;
+
+#ifdef CONFIG_HIGHMEM
+static __init int init_emergency_pool(void)
+{
+ struct sysinfo i;
+ si_meminfo(&i);
+ si_swapinfo(&i);
+
+ if (!i.totalhigh)
+ return 0;
+
+ page_pool = mempool_create_page_pool(POOL_SIZE, 0);
+ BUG_ON(!page_pool);
+ printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
+
+ return 0;
+}
+
+__initcall(init_emergency_pool);
+
+/*
+ * highmem version, map in to vec
+ */
+static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
+{
+ unsigned long flags;
+ unsigned char *vto;
+
+ local_irq_save(flags);
+ vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
+ memcpy(vto + to->bv_offset, vfrom, to->bv_len);
+ kunmap_atomic(vto, KM_BOUNCE_READ);
+ local_irq_restore(flags);
+}
+
+#else /* CONFIG_HIGHMEM */
+
+#define bounce_copy_vec(to, vfrom) \
+ memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
+
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * allocate pages in the DMA region for the ISA pool
+ */
+static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
+{
+ return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
+}
+
+/*
+ * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
+ * as the max address, so check if the pool has already been created.
+ */
+int init_emergency_isa_pool(void)
+{
+ if (isa_page_pool)
+ return 0;
+
+ isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
+ mempool_free_pages, (void *) 0);
+ BUG_ON(!isa_page_pool);
+
+ printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
+ return 0;
+}
+
+/*
+ * Simple bounce buffer support for highmem pages. Depending on the
+ * queue gfp mask set, *to may or may not be a highmem page. kmap it
+ * always, it will do the Right Thing
+ */
+static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
+{
+ unsigned char *vfrom;
+ struct bio_vec *tovec, *fromvec;
+ int i;
+
+ __bio_for_each_segment(tovec, to, i, 0) {
+ fromvec = from->bi_io_vec + i;
+
+ /*
+ * not bounced
+ */
+ if (tovec->bv_page == fromvec->bv_page)
+ continue;
+
+ /*
+ * fromvec->bv_offset and fromvec->bv_len might have been
+ * modified by the block layer, so use the original copy,
+ * bounce_copy_vec already uses tovec->bv_len
+ */
+ vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
+
+ flush_dcache_page(tovec->bv_page);
+ bounce_copy_vec(tovec, vfrom);
+ }
+}
+
+static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
+{
+ struct bio *bio_orig = bio->bi_private;
+ struct bio_vec *bvec, *org_vec;
+ int i;
+
+ if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
+ set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
+
+ /*
+ * free up bounce indirect pages used
+ */
+ __bio_for_each_segment(bvec, bio, i, 0) {
+ org_vec = bio_orig->bi_io_vec + i;
+ if (bvec->bv_page == org_vec->bv_page)
+ continue;
+
+ dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
+ mempool_free(bvec->bv_page, pool);
+ }
+
+ bio_endio(bio_orig, bio_orig->bi_size, err);
+ bio_put(bio);
+}
+
+static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
+{
+ if (bio->bi_size)
+ return 1;
+
+ bounce_end_io(bio, page_pool, err);
+ return 0;
+}
+
+static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
+{
+ if (bio->bi_size)
+ return 1;
+
+ bounce_end_io(bio, isa_page_pool, err);
+ return 0;
+}
+
+static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
+{
+ struct bio *bio_orig = bio->bi_private;
+
+ if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+ copy_to_high_bio_irq(bio_orig, bio);
+
+ bounce_end_io(bio, pool, err);
+}
+
+static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
+{
+ if (bio->bi_size)
+ return 1;
+
+ __bounce_end_io_read(bio, page_pool, err);
+ return 0;
+}
+
+static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
+{
+ if (bio->bi_size)
+ return 1;
+
+ __bounce_end_io_read(bio, isa_page_pool, err);
+ return 0;
+}
+
+static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
+ mempool_t *pool)
+{
+ struct page *page;
+ struct bio *bio = NULL;
+ int i, rw = bio_data_dir(*bio_orig);
+ struct bio_vec *to, *from;
+
+ bio_for_each_segment(from, *bio_orig, i) {
+ page = from->bv_page;
+
+ /*
+ * is destination page below bounce pfn?
+ */
+ if (page_to_pfn(page) < q->bounce_pfn)
+ continue;
+
+ /*
+ * irk, bounce it
+ */
+ if (!bio)
+ bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
+
+ to = bio->bi_io_vec + i;
+
+ to->bv_page = mempool_alloc(pool, q->bounce_gfp);
+ to->bv_len = from->bv_len;
+ to->bv_offset = from->bv_offset;
+ inc_zone_page_state(to->bv_page, NR_BOUNCE);
+
+ if (rw == WRITE) {
+ char *vto, *vfrom;
+
+ flush_dcache_page(from->bv_page);
+ vto = page_address(to->bv_page) + to->bv_offset;
+ vfrom = kmap(from->bv_page) + from->bv_offset;
+ memcpy(vto, vfrom, to->bv_len);
+ kunmap(from->bv_page);
+ }
+ }
+
+ /*
+ * no pages bounced
+ */
+ if (!bio)
+ return;
+
+ /*
+ * at least one page was bounced, fill in possible non-highmem
+ * pages
+ */
+ __bio_for_each_segment(from, *bio_orig, i, 0) {
+ to = bio_iovec_idx(bio, i);
+ if (!to->bv_page) {
+ to->bv_page = from->bv_page;
+ to->bv_len = from->bv_len;
+ to->bv_offset = from->bv_offset;
+ }
+ }
+
+ bio->bi_bdev = (*bio_orig)->bi_bdev;
+ bio->bi_flags |= (1 << BIO_BOUNCED);
+ bio->bi_sector = (*bio_orig)->bi_sector;
+ bio->bi_rw = (*bio_orig)->bi_rw;
+
+ bio->bi_vcnt = (*bio_orig)->bi_vcnt;
+ bio->bi_idx = (*bio_orig)->bi_idx;
+ bio->bi_size = (*bio_orig)->bi_size;
+
+ if (pool == page_pool) {
+ bio->bi_end_io = bounce_end_io_write;
+ if (rw == READ)
+ bio->bi_end_io = bounce_end_io_read;
+ } else {
+ bio->bi_end_io = bounce_end_io_write_isa;
+ if (rw == READ)
+ bio->bi_end_io = bounce_end_io_read_isa;
+ }
+
+ bio->bi_private = *bio_orig;
+ *bio_orig = bio;
+}
+
+void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
+{
+ mempool_t *pool;
+
+ /*
+ * for non-isa bounce case, just check if the bounce pfn is equal
+ * to or bigger than the highest pfn in the system -- in that case,
+ * don't waste time iterating over bio segments
+ */
+ if (!(q->bounce_gfp & GFP_DMA)) {
+ if (q->bounce_pfn >= blk_max_pfn)
+ return;
+ pool = page_pool;
+ } else {
+ BUG_ON(!isa_page_pool);
+ pool = isa_page_pool;
+ }
+
+ blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
+
+ /*
+ * slow path
+ */
+ __blk_queue_bounce(q, bio_orig, pool);
+}
+
+EXPORT_SYMBOL(blk_queue_bounce);
if (unlikely(*pos + *count > inode->i_sb->s_maxbytes))
*count = inode->i_sb->s_maxbytes - *pos;
} else {
+#ifdef CONFIG_BLOCK
loff_t isize;
if (bdev_read_only(I_BDEV(inode)))
return -EPERM;
if (*pos + *count > isize)
*count = isize - *pos;
+#else
+ return -EPERM;
+#endif
}
return 0;
}
}
return retval;
}
+
+/**
+ * try_to_release_page() - release old fs-specific metadata on a page
+ *
+ * @page: the page which the kernel is trying to free
+ * @gfp_mask: memory allocation flags (and I/O mode)
+ *
+ * The address_space is to try to release any data against the page
+ * (presumably at page->private). If the release was successful, return `1'.
+ * Otherwise return zero.
+ *
+ * The @gfp_mask argument specifies whether I/O may be performed to release
+ * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
+ *
+ * NOTE: @gfp_mask may go away, and this function may become non-blocking.
+ */
+int try_to_release_page(struct page *page, gfp_t gfp_mask)
+{
+ struct address_space * const mapping = page->mapping;
+
+ BUG_ON(!PageLocked(page));
+ if (PageWriteback(page))
+ return 0;
+
+ if (mapping && mapping->a_ops->releasepage)
+ return mapping->a_ops->releasepage(page, gfp_mask);
+ return try_to_free_buffers(page);
+}
+
+EXPORT_SYMBOL(try_to_release_page);
#include <linux/blktrace_api.h>
#include <asm/tlbflush.h>
-static mempool_t *page_pool, *isa_page_pool;
-
-static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
-{
- return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
-}
-
/*
* Virtual_count is not a pure "count".
* 0 means that it is not mapped, and has not been mapped
}
EXPORT_SYMBOL(kunmap_high);
-
-#define POOL_SIZE 64
-
-static __init int init_emergency_pool(void)
-{
- struct sysinfo i;
- si_meminfo(&i);
- si_swapinfo(&i);
-
- if (!i.totalhigh)
- return 0;
-
- page_pool = mempool_create_page_pool(POOL_SIZE, 0);
- BUG_ON(!page_pool);
- printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
-
- return 0;
-}
-
-__initcall(init_emergency_pool);
-
-/*
- * highmem version, map in to vec
- */
-static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
-{
- unsigned long flags;
- unsigned char *vto;
-
- local_irq_save(flags);
- vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
- memcpy(vto + to->bv_offset, vfrom, to->bv_len);
- kunmap_atomic(vto, KM_BOUNCE_READ);
- local_irq_restore(flags);
-}
-
-#else /* CONFIG_HIGHMEM */
-
-#define bounce_copy_vec(to, vfrom) \
- memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
-
#endif
-#define ISA_POOL_SIZE 16
-
-/*
- * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
- * as the max address, so check if the pool has already been created.
- */
-int init_emergency_isa_pool(void)
-{
- if (isa_page_pool)
- return 0;
-
- isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
- mempool_free_pages, (void *) 0);
- BUG_ON(!isa_page_pool);
-
- printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
- return 0;
-}
-
-/*
- * Simple bounce buffer support for highmem pages. Depending on the
- * queue gfp mask set, *to may or may not be a highmem page. kmap it
- * always, it will do the Right Thing
- */
-static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
-{
- unsigned char *vfrom;
- struct bio_vec *tovec, *fromvec;
- int i;
-
- __bio_for_each_segment(tovec, to, i, 0) {
- fromvec = from->bi_io_vec + i;
-
- /*
- * not bounced
- */
- if (tovec->bv_page == fromvec->bv_page)
- continue;
-
- /*
- * fromvec->bv_offset and fromvec->bv_len might have been
- * modified by the block layer, so use the original copy,
- * bounce_copy_vec already uses tovec->bv_len
- */
- vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
-
- flush_dcache_page(tovec->bv_page);
- bounce_copy_vec(tovec, vfrom);
- }
-}
-
-static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
-{
- struct bio *bio_orig = bio->bi_private;
- struct bio_vec *bvec, *org_vec;
- int i;
-
- if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
- set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
-
- /*
- * free up bounce indirect pages used
- */
- __bio_for_each_segment(bvec, bio, i, 0) {
- org_vec = bio_orig->bi_io_vec + i;
- if (bvec->bv_page == org_vec->bv_page)
- continue;
-
- dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
- mempool_free(bvec->bv_page, pool);
- }
-
- bio_endio(bio_orig, bio_orig->bi_size, err);
- bio_put(bio);
-}
-
-static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
-{
- if (bio->bi_size)
- return 1;
-
- bounce_end_io(bio, page_pool, err);
- return 0;
-}
-
-static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
-{
- if (bio->bi_size)
- return 1;
-
- bounce_end_io(bio, isa_page_pool, err);
- return 0;
-}
-
-static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
-{
- struct bio *bio_orig = bio->bi_private;
-
- if (test_bit(BIO_UPTODATE, &bio->bi_flags))
- copy_to_high_bio_irq(bio_orig, bio);
-
- bounce_end_io(bio, pool, err);
-}
-
-static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
-{
- if (bio->bi_size)
- return 1;
-
- __bounce_end_io_read(bio, page_pool, err);
- return 0;
-}
-
-static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
-{
- if (bio->bi_size)
- return 1;
-
- __bounce_end_io_read(bio, isa_page_pool, err);
- return 0;
-}
-
-static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
- mempool_t *pool)
-{
- struct page *page;
- struct bio *bio = NULL;
- int i, rw = bio_data_dir(*bio_orig);
- struct bio_vec *to, *from;
-
- bio_for_each_segment(from, *bio_orig, i) {
- page = from->bv_page;
-
- /*
- * is destination page below bounce pfn?
- */
- if (page_to_pfn(page) < q->bounce_pfn)
- continue;
-
- /*
- * irk, bounce it
- */
- if (!bio)
- bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
-
- to = bio->bi_io_vec + i;
-
- to->bv_page = mempool_alloc(pool, q->bounce_gfp);
- to->bv_len = from->bv_len;
- to->bv_offset = from->bv_offset;
- inc_zone_page_state(to->bv_page, NR_BOUNCE);
-
- if (rw == WRITE) {
- char *vto, *vfrom;
-
- flush_dcache_page(from->bv_page);
- vto = page_address(to->bv_page) + to->bv_offset;
- vfrom = kmap(from->bv_page) + from->bv_offset;
- memcpy(vto, vfrom, to->bv_len);
- kunmap(from->bv_page);
- }
- }
-
- /*
- * no pages bounced
- */
- if (!bio)
- return;
-
- /*
- * at least one page was bounced, fill in possible non-highmem
- * pages
- */
- __bio_for_each_segment(from, *bio_orig, i, 0) {
- to = bio_iovec_idx(bio, i);
- if (!to->bv_page) {
- to->bv_page = from->bv_page;
- to->bv_len = from->bv_len;
- to->bv_offset = from->bv_offset;
- }
- }
-
- bio->bi_bdev = (*bio_orig)->bi_bdev;
- bio->bi_flags |= (1 << BIO_BOUNCED);
- bio->bi_sector = (*bio_orig)->bi_sector;
- bio->bi_rw = (*bio_orig)->bi_rw;
-
- bio->bi_vcnt = (*bio_orig)->bi_vcnt;
- bio->bi_idx = (*bio_orig)->bi_idx;
- bio->bi_size = (*bio_orig)->bi_size;
-
- if (pool == page_pool) {
- bio->bi_end_io = bounce_end_io_write;
- if (rw == READ)
- bio->bi_end_io = bounce_end_io_read;
- } else {
- bio->bi_end_io = bounce_end_io_write_isa;
- if (rw == READ)
- bio->bi_end_io = bounce_end_io_read_isa;
- }
-
- bio->bi_private = *bio_orig;
- *bio_orig = bio;
-}
-
-void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
-{
- mempool_t *pool;
-
- /*
- * for non-isa bounce case, just check if the bounce pfn is equal
- * to or bigger than the highest pfn in the system -- in that case,
- * don't waste time iterating over bio segments
- */
- if (!(q->bounce_gfp & GFP_DMA)) {
- if (q->bounce_pfn >= blk_max_pfn)
- return;
- pool = page_pool;
- } else {
- BUG_ON(!isa_page_pool);
- pool = isa_page_pool;
- }
-
- blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
-
- /*
- * slow path
- */
- __blk_queue_bounce(q, bio_orig, pool);
-}
-
-EXPORT_SYMBOL(blk_queue_bounce);
-
#if defined(HASHED_PAGE_VIRTUAL)
#define PA_HASH_ORDER 7
}
EXPORT_SYMBOL(migrate_page);
+#ifdef CONFIG_BLOCK
/*
* Migration function for pages with buffers. This function can only be used
* if the underlying filesystem guarantees that no other references to "page"
return 0;
}
EXPORT_SYMBOL(buffer_migrate_page);
+#endif
/*
* Writeback a page to clean the dirty state
* Buffers may be managed in a filesystem specific way.
* We must have no buffers or drop them.
*/
- if (page_has_buffers(page) &&
+ if (PagePrivate(page) &&
!try_to_release_page(page, GFP_KERNEL))
return -EAGAIN;
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/syscalls.h>
+#include <linux/buffer_head.h>
+#include <linux/pagevec.h>
/*
* The maximum number of pages to writeout in a single bdflush/kupdate
register_cpu_notifier(&ratelimit_nb);
}
+/**
+ * generic_writepages - walk the list of dirty pages of the given
+ * address space and writepage() all of them.
+ *
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ *
+ * This is a library function, which implements the writepages()
+ * address_space_operation.
+ *
+ * If a page is already under I/O, generic_writepages() skips it, even
+ * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
+ * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
+ * and msync() need to guarantee that all the data which was dirty at the time
+ * the call was made get new I/O started against them. If wbc->sync_mode is
+ * WB_SYNC_ALL then we were called for data integrity and we must wait for
+ * existing IO to complete.
+ *
+ * Derived from mpage_writepages() - if you fix this you should check that
+ * also!
+ */
+int generic_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct backing_dev_info *bdi = mapping->backing_dev_info;
+ int ret = 0;
+ int done = 0;
+ int (*writepage)(struct page *page, struct writeback_control *wbc);
+ struct pagevec pvec;
+ int nr_pages;
+ pgoff_t index;
+ pgoff_t end; /* Inclusive */
+ int scanned = 0;
+ int range_whole = 0;
+
+ if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ wbc->encountered_congestion = 1;
+ return 0;
+ }
+
+ writepage = mapping->a_ops->writepage;
+
+ /* deal with chardevs and other special file */
+ if (!writepage)
+ return 0;
+
+ pagevec_init(&pvec, 0);
+ if (wbc->range_cyclic) {
+ index = mapping->writeback_index; /* Start from prev offset */
+ end = -1;
+ } else {
+ index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;
+ scanned = 1;
+ }
+retry:
+ while (!done && (index <= end) &&
+ (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ unsigned i;
+
+ scanned = 1;
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ /*
+ * At this point we hold neither mapping->tree_lock nor
+ * lock on the page itself: the page may be truncated or
+ * invalidated (changing page->mapping to NULL), or even
+ * swizzled back from swapper_space to tmpfs file
+ * mapping
+ */
+ lock_page(page);
+
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ continue;
+ }
+
+ if (!wbc->range_cyclic && page->index > end) {
+ done = 1;
+ unlock_page(page);
+ continue;
+ }
+
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ wait_on_page_writeback(page);
+
+ if (PageWriteback(page) ||
+ !clear_page_dirty_for_io(page)) {
+ unlock_page(page);
+ continue;
+ }
+
+ ret = (*writepage)(page, wbc);
+ if (ret) {
+ if (ret == -ENOSPC)
+ set_bit(AS_ENOSPC, &mapping->flags);
+ else
+ set_bit(AS_EIO, &mapping->flags);
+ }
+
+ if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
+ unlock_page(page);
+ if (ret || (--(wbc->nr_to_write) <= 0))
+ done = 1;
+ if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ wbc->encountered_congestion = 1;
+ done = 1;
+ }
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+ if (!scanned && !done) {
+ /*
+ * We hit the last page and there is more work to be done: wrap
+ * back to the start of the file
+ */
+ scanned = 1;
+ index = 0;
+ goto retry;
+ }
+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ mapping->writeback_index = index;
+ return ret;
+}
+
+EXPORT_SYMBOL(generic_writepages);
+
int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
int ret;
if (likely(mapping)) {
int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
- if (spd)
- return (*spd)(page);
- return __set_page_dirty_buffers(page);
+#ifdef CONFIG_BLOCK
+ if (!spd)
+ spd = __set_page_dirty_buffers;
+#endif
+ return (*spd)(page);
}
if (!PageDirty(page)) {
if (!TestSetPageDirty(page))
do_invalidatepage */
+/**
+ * do_invalidatepage - invalidate part of all of a page
+ * @page: the page which is affected
+ * @offset: the index of the truncation point
+ *
+ * do_invalidatepage() is called when all or part of the page has become
+ * invalidated by a truncate operation.
+ *
+ * do_invalidatepage() does not have to release all buffers, but it must
+ * ensure that no dirty buffer is left outside @offset and that no I/O
+ * is underway against any of the blocks which are outside the truncation
+ * point. Because the caller is about to free (and possibly reuse) those
+ * blocks on-disk.
+ */
+void do_invalidatepage(struct page *page, unsigned long offset)
+{
+ void (*invalidatepage)(struct page *, unsigned long);
+ invalidatepage = page->mapping->a_ops->invalidatepage;
+#ifdef CONFIG_BLOCK
+ if (!invalidatepage)
+ invalidatepage = block_invalidatepage;
+#endif
+ if (invalidatepage)
+ (*invalidatepage)(page, offset);
+}
+
static inline void truncate_partial_page(struct page *page, unsigned partial)
{
memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);