]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/commitdiff
Merge branch 'block' of git://brick.kernel.dk/data/git/linux-2.6-block
authorLinus Torvalds <torvalds@g5.osdl.org>
Sat, 30 Sep 2006 19:07:01 +0000 (12:07 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Sat, 30 Sep 2006 19:07:01 +0000 (12:07 -0700)
* 'block' of git://brick.kernel.dk/data/git/linux-2.6-block: (67 commits)
  [PATCH] blk_queue_start_tag() shared map race fix
  [PATCH] Update axboe@suse.de email address
  [PATCH] fix creating zero sized bio mempools in low memory system
  [PATCH] CONFIG_BLOCK: blk_congestion_wait() fix
  [PATCH] CONFIG_BLOCK internal.h cleanups
  [PATCH] BLOCK: Make USB storage depend on SCSI rather than selecting it [try #6]
  [PATCH] BLOCK: Make it possible to disable the block layer [try #6]
  [PATCH] BLOCK: Remove no-longer necessary linux/buffer_head.h inclusions [try #6]
  [PATCH] BLOCK: Remove no-longer necessary linux/mpage.h inclusions [try #6]
  [PATCH] BLOCK: Move the msdos device ioctl compat stuff to the msdos driver [try #6]
  [PATCH] BLOCK: Move the Ext3 device ioctl compat stuff to the Ext3 driver [try #6]
  [PATCH] BLOCK: Move the Ext2 device ioctl compat stuff to the Ext2 driver [try #6]
  [PATCH] BLOCK: Move the ReiserFS device ioctl compat stuff to the ReiserFS driver [try #6]
  [PATCH] BLOCK: Move common FS-specific ioctls to linux/fs.h [try #6]
  [PATCH] BLOCK: Move the loop device ioctl compat stuff to the loop driver [try #6]
  [PATCH] BLOCK: Move __invalidate_device() to block_dev.c [try #6]
  [PATCH] BLOCK: Dissociate generic_writepages() from mpage stuff [try #6]
  [PATCH] BLOCK: Remove dependence on existence of blockdev_superblock [try #6]
  [PATCH] BLOCK: Move extern declarations out of fs/*.c into header files [try #6]
  [PATCH] BLOCK: Don't call block_sync_page() from AFS [try #6]
  ...

147 files changed:
MAINTAINERS
arch/mips/kernel/signal_n32.c
arch/um/drivers/ubd_kern.c
block/Kconfig
block/Kconfig.iosched
block/Makefile
block/as-iosched.c
block/blktrace.c
block/cfq-iosched.c
block/deadline-iosched.c
block/elevator.c
block/ll_rw_blk.c
block/noop-iosched.c
block/scsi_ioctl.c
drivers/block/DAC960.c
drivers/block/Kconfig
drivers/block/cciss.c
drivers/block/cpqarray.c
drivers/block/floppy.c
drivers/block/loop.c
drivers/block/nbd.c
drivers/block/paride/pd.c
drivers/block/pktcdvd.c
drivers/block/swim3.c
drivers/block/swim_iop.c
drivers/block/xd.c
drivers/cdrom/Kconfig
drivers/cdrom/cdrom.c
drivers/cdrom/cdu31a.c
drivers/char/Kconfig
drivers/char/random.c
drivers/fc4/fc.c
drivers/ide/Kconfig
drivers/ide/ide-cd.c
drivers/ide/ide-disk.c
drivers/ide/ide-dma.c
drivers/ide/ide-floppy.c
drivers/ide/ide-io.c
drivers/ide/ide-lib.c
drivers/ide/ide-tape.c
drivers/ide/ide-taskfile.c
drivers/ide/ide.c
drivers/ide/legacy/hd.c
drivers/md/Kconfig
drivers/md/dm-emc.c
drivers/message/i2o/Kconfig
drivers/message/i2o/i2o_block.c
drivers/mmc/Kconfig
drivers/mmc/Makefile
drivers/mmc/mmc_queue.c
drivers/mtd/Kconfig
drivers/mtd/devices/Kconfig
drivers/mtd/mtd_blkdevs.c
drivers/s390/block/Kconfig
drivers/s390/block/dasd_diag.c
drivers/s390/block/dasd_eckd.c
drivers/s390/block/dasd_fba.c
drivers/scsi/Kconfig
drivers/scsi/aic7xxx_old.c
drivers/scsi/ide-scsi.c
drivers/scsi/pluto.c
drivers/scsi/scsi.c
drivers/scsi/scsi_lib.c
drivers/scsi/sd.c
drivers/scsi/sun3_NCR5380.c
drivers/scsi/sun3_scsi.c
drivers/scsi/sun3_scsi_vme.c
drivers/usb/storage/Kconfig
fs/Kconfig
fs/Makefile
fs/afs/file.c
fs/binfmt_elf.c
fs/bio.c
fs/block_dev.c
fs/buffer.c
fs/char_dev.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/ioctl.c
fs/compat.c
fs/compat_ioctl.c
fs/dcache.c
fs/ext2/dir.c
fs/ext2/ext2.h
fs/ext2/file.c
fs/ext2/ioctl.c
fs/ext3/dir.c
fs/ext3/file.c
fs/ext3/inode.c
fs/ext3/ioctl.c
fs/ext3/namei.c
fs/fat/dir.c
fs/fs-writeback.c
fs/hfsplus/hfsplus_fs.h
fs/hfsplus/ioctl.c
fs/inode.c
fs/internal.h [new file with mode: 0644]
fs/ioprio.c
fs/jfs/ioctl.c
fs/mpage.c
fs/namespace.c
fs/nfs/write.c
fs/no-block.c [new file with mode: 0644]
fs/partitions/Makefile
fs/proc/proc_misc.c
fs/quota.c
fs/reiserfs/dir.c
fs/reiserfs/file.c
fs/reiserfs/ioctl.c
fs/splice.c
fs/super.c
fs/sync.c
fs/xfs/Kconfig
include/linux/bio.h
include/linux/blkdev.h
include/linux/blktrace_api.h
include/linux/buffer_head.h
include/linux/compat_ioctl.h
include/linux/elevator.h
include/linux/ext2_fs.h
include/linux/ext3_fs.h
include/linux/fs.h
include/linux/genhd.h
include/linux/mm.h
include/linux/mpage.h
include/linux/raid/md.h
include/linux/raid/md_k.h
include/linux/ramfs.h
include/linux/rbtree.h
include/linux/reiserfs_fs.h
include/linux/sched.h
include/linux/tty.h
include/linux/writeback.h
include/scsi/scsi_tcq.h
init/Kconfig
init/do_mounts.c
kernel/compat.c
kernel/exit.c
kernel/sys_ni.c
lib/rbtree.c
mm/Makefile
mm/bounce.c [new file with mode: 0644]
mm/filemap.c
mm/highmem.c
mm/migrate.c
mm/page-writeback.c
mm/truncate.c

index 28e1c7b9244ad30c119ffd05ee069cf6e7d73b56..f0cd5a3f6de69188f57e1aafb10406b58a2fe288 100644 (file)
@@ -501,7 +501,7 @@ S:  Maintained
 
 BLOCK LAYER
 P:     Jens Axboe
-M:     axboe@suse.de
+M:     axboe@kernel.dk
 L:     linux-kernel@vger.kernel.org
 T:     git kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
 S:     Maintained
@@ -1380,7 +1380,7 @@ S:        Maintained
 
 IDE/ATAPI CDROM DRIVER
 P:     Jens Axboe
-M:     axboe@suse.de
+M:     axboe@kernel.dk
 L:     linux-kernel@vger.kernel.org
 W:     http://www.kernel.dk
 S:     Maintained
@@ -2531,7 +2531,7 @@ S:        Maintained
 
 SCSI CDROM DRIVER
 P:     Jens Axboe
-M:     axboe@suse.de
+M:     axboe@kernel.dk
 L:     linux-scsi@vger.kernel.org
 W:     http://www.kernel.dk
 S:     Maintained
@@ -2976,7 +2976,7 @@ S:        Maintained
 
 UNIFORM CDROM DRIVER
 P:     Jens Axboe
-M:     axboe@suse.de
+M:     axboe@kernel.dk
 L:     linux-kernel@vger.kernel.org
 W:     http://www.kernel.dk
 S:     Maintained
index 477c5334ec1b459eefb010c9ef5918ca1358b47b..50c17eaa7f252c0012efb15054c25797a793cea4 100644 (file)
@@ -42,6 +42,8 @@
 
 #include "signal-common.h"
 
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
 /*
  * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
  */
@@ -81,8 +83,6 @@ struct rt_sigframe_n32 {
 #endif
 };
 
-extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat);
-
 save_static_function(sysn32_rt_sigsuspend);
 __attribute_used__ noinline static int
 _sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs)
index 5fa4c8e258a4e4d76d9406b5e002589e6c042374..fda4a3940698c26a8be776ce5141e5fa3e03189f 100644 (file)
@@ -981,8 +981,6 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req)
        __u64 offset;
        int len;
 
-       if(req->rq_status == RQ_INACTIVE) return(1);
-
        /* This should be impossible now */
        if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
                printk("Write attempted on readonly ubd device %s\n",
index b6f5f0a79655a3de0125af945682f91bf92bc726..9af6c614dfde8cc8118d1b8c21f2f24f53e8ef8a 100644 (file)
@@ -1,6 +1,24 @@
 #
 # Block layer core configuration
 #
+config BLOCK
+       bool "Enable the block layer"
+       default y
+       help
+        This permits the block layer to be removed from the kernel if it's not
+        needed (on some embedded devices for example).  If this option is
+        disabled, then blockdev files will become unusable and some
+        filesystems (such as ext3) will become unavailable.
+
+        This option will also disable SCSI character devices and USB storage
+        since they make use of various block layer definitions and
+        facilities.
+
+        Say Y here unless you know you really don't want to mount disks and
+        suchlike.
+
+if BLOCK
+
 #XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64
 #for instance.
 config LBD
@@ -33,4 +51,6 @@ config LSF
 
          If unsure, say Y.
 
+endif
+
 source block/Kconfig.iosched
index 48d090e266fc5745be94af97ff8deb32c2726995..903f0d3b68520a94a5a76222b31c6860ba57de20 100644 (file)
@@ -1,3 +1,4 @@
+if BLOCK
 
 menu "IO Schedulers"
 
@@ -67,3 +68,5 @@ config DEFAULT_IOSCHED
        default "noop" if DEFAULT_NOOP
 
 endmenu
+
+endif
index c05de0e0037fe6f14f9e6f508bece1271568bb36..4b84d0d5947bc22cf8a94f91fb7c556b29ff571c 100644 (file)
@@ -2,7 +2,7 @@
 # Makefile for the kernel block layer
 #
 
-obj- := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
+obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
 
 obj-$(CONFIG_IOSCHED_NOOP)     += noop-iosched.o
 obj-$(CONFIG_IOSCHED_AS)       += as-iosched.o
index 5da56d48fbd36473ba92fb346a91d06dbea148a1..165509e8659ef7642ec0e60ec5d6c511a46ccafe 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *  Anticipatory & deadline i/o scheduler.
  *
- *  Copyright (C) 2002 Jens Axboe <axboe@suse.de>
+ *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
  *                     Nick Piggin <nickpiggin@yahoo.com.au>
  *
  */
@@ -14,7 +14,6 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
-#include <linux/hash.h>
 #include <linux/rbtree.h>
 #include <linux/interrupt.h>
 
@@ -93,9 +92,8 @@ struct as_data {
        struct rb_root sort_list[2];
        struct list_head fifo_list[2];
 
-       struct as_rq *next_arq[2];      /* next in sort order */
+       struct request *next_rq[2];     /* next in sort order */
        sector_t last_sector[2];        /* last REQ_SYNC & REQ_ASYNC sectors */
-       struct hlist_head *hash;        /* request hash */
 
        unsigned long exit_prob;        /* probability a task will exit while
                                           being waited on */
@@ -115,7 +113,6 @@ struct as_data {
        int write_batch_count;          /* max # of reqs in a write batch */
        int current_write_count;        /* how many requests left this batch */
        int write_batch_idled;          /* has the write batch gone idle? */
-       mempool_t *arq_pool;
 
        enum anticipation_status antic_status;
        unsigned long antic_start;      /* jiffies: when it started */
@@ -133,8 +130,6 @@ struct as_data {
        unsigned long antic_expire;
 };
 
-#define list_entry_fifo(ptr)   list_entry((ptr), struct as_rq, fifo)
-
 /*
  * per-request data.
  */
@@ -150,40 +145,14 @@ enum arq_state {
        AS_RQ_POSTSCHED,        /* when they shouldn't be */
 };
 
-struct as_rq {
-       /*
-        * rbtree index, key is the starting offset
-        */
-       struct rb_node rb_node;
-       sector_t rb_key;
-
-       struct request *request;
-
-       struct io_context *io_context;  /* The submitting task */
-
-       /*
-        * request hash, key is the ending offset (for back merge lookup)
-        */
-       struct hlist_node hash;
-
-       /*
-        * expire fifo
-        */
-       struct list_head fifo;
-       unsigned long expires;
+#define RQ_IOC(rq)     ((struct io_context *) (rq)->elevator_private)
+#define RQ_STATE(rq)   ((enum arq_state)(rq)->elevator_private2)
+#define RQ_SET_STATE(rq, state)        ((rq)->elevator_private2 = (void *) state)
 
-       unsigned int is_sync;
-       enum arq_state state;
-};
-
-#define RQ_DATA(rq)    ((struct as_rq *) (rq)->elevator_private)
-
-static kmem_cache_t *arq_pool;
-
-static atomic_t ioc_count = ATOMIC_INIT(0);
+static DEFINE_PER_CPU(unsigned long, ioc_count);
 static struct completion *ioc_gone;
 
-static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq);
+static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
 static void as_antic_stop(struct as_data *ad);
 
 /*
@@ -194,7 +163,8 @@ static void as_antic_stop(struct as_data *ad);
 static void free_as_io_context(struct as_io_context *aic)
 {
        kfree(aic);
-       if (atomic_dec_and_test(&ioc_count) && ioc_gone)
+       elv_ioc_count_dec(ioc_count);
+       if (ioc_gone && !elv_ioc_count_read(ioc_count))
                complete(ioc_gone);
 }
 
@@ -230,7 +200,7 @@ static struct as_io_context *alloc_as_io_context(void)
                ret->seek_total = 0;
                ret->seek_samples = 0;
                ret->seek_mean = 0;
-               atomic_inc(&ioc_count);
+               elv_ioc_count_inc(ioc_count);
        }
 
        return ret;
@@ -240,9 +210,9 @@ static struct as_io_context *alloc_as_io_context(void)
  * If the current task has no AS IO context then create one and initialise it.
  * Then take a ref on the task's io context and return it.
  */
-static struct io_context *as_get_io_context(void)
+static struct io_context *as_get_io_context(int node)
 {
-       struct io_context *ioc = get_io_context(GFP_ATOMIC);
+       struct io_context *ioc = get_io_context(GFP_ATOMIC, node);
        if (ioc && !ioc->aic) {
                ioc->aic = alloc_as_io_context();
                if (!ioc->aic) {
@@ -253,194 +223,43 @@ static struct io_context *as_get_io_context(void)
        return ioc;
 }
 
-static void as_put_io_context(struct as_rq *arq)
+static void as_put_io_context(struct request *rq)
 {
        struct as_io_context *aic;
 
-       if (unlikely(!arq->io_context))
+       if (unlikely(!RQ_IOC(rq)))
                return;
 
-       aic = arq->io_context->aic;
+       aic = RQ_IOC(rq)->aic;
 
-       if (arq->is_sync == REQ_SYNC && aic) {
+       if (rq_is_sync(rq) && aic) {
                spin_lock(&aic->lock);
                set_bit(AS_TASK_IORUNNING, &aic->state);
                aic->last_end_request = jiffies;
                spin_unlock(&aic->lock);
        }
 
-       put_io_context(arq->io_context);
-}
-
-/*
- * the back merge hash support functions
- */
-static const int as_hash_shift = 6;
-#define AS_HASH_BLOCK(sec)     ((sec) >> 3)
-#define AS_HASH_FN(sec)                (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
-#define AS_HASH_ENTRIES                (1 << as_hash_shift)
-#define rq_hash_key(rq)                ((rq)->sector + (rq)->nr_sectors)
-
-static inline void __as_del_arq_hash(struct as_rq *arq)
-{
-       hlist_del_init(&arq->hash);
-}
-
-static inline void as_del_arq_hash(struct as_rq *arq)
-{
-       if (!hlist_unhashed(&arq->hash))
-               __as_del_arq_hash(arq);
-}
-
-static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
-       struct request *rq = arq->request;
-
-       BUG_ON(!hlist_unhashed(&arq->hash));
-
-       hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
-}
-
-/*
- * move hot entry to front of chain
- */
-static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
-       struct request *rq = arq->request;
-       struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
-
-       if (hlist_unhashed(&arq->hash)) {
-               WARN_ON(1);
-               return;
-       }
-
-       if (&arq->hash != head->first) {
-               hlist_del(&arq->hash);
-               hlist_add_head(&arq->hash, head);
-       }
-}
-
-static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
-{
-       struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
-       struct hlist_node *entry, *next;
-       struct as_rq *arq;
-
-       hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) {
-               struct request *__rq = arq->request;
-
-               BUG_ON(hlist_unhashed(&arq->hash));
-
-               if (!rq_mergeable(__rq)) {
-                       as_del_arq_hash(arq);
-                       continue;
-               }
-
-               if (rq_hash_key(__rq) == offset)
-                       return __rq;
-       }
-
-       return NULL;
+       put_io_context(RQ_IOC(rq));
 }
 
 /*
  * rb tree support functions
  */
-#define rb_entry_arq(node)     rb_entry((node), struct as_rq, rb_node)
-#define ARQ_RB_ROOT(ad, arq)   (&(ad)->sort_list[(arq)->is_sync])
-#define rq_rb_key(rq)          (rq)->sector
-
-/*
- * as_find_first_arq finds the first (lowest sector numbered) request
- * for the specified data_dir. Used to sweep back to the start of the disk
- * (1-way elevator) after we process the last (highest sector) request.
- */
-static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir)
-{
-       struct rb_node *n = ad->sort_list[data_dir].rb_node;
-
-       if (n == NULL)
-               return NULL;
-
-       for (;;) {
-               if (n->rb_left == NULL)
-                       return rb_entry_arq(n);
-
-               n = n->rb_left;
-       }
-}
-
-/*
- * Add the request to the rb tree if it is unique.  If there is an alias (an
- * existing request against the same sector), which can happen when using
- * direct IO, then return the alias.
- */
-static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
-{
-       struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
-       struct rb_node *parent = NULL;
-       struct as_rq *__arq;
-       struct request *rq = arq->request;
-
-       arq->rb_key = rq_rb_key(rq);
-
-       while (*p) {
-               parent = *p;
-               __arq = rb_entry_arq(parent);
-
-               if (arq->rb_key < __arq->rb_key)
-                       p = &(*p)->rb_left;
-               else if (arq->rb_key > __arq->rb_key)
-                       p = &(*p)->rb_right;
-               else
-                       return __arq;
-       }
-
-       rb_link_node(&arq->rb_node, parent, p);
-       rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
-
-       return NULL;
-}
+#define RQ_RB_ROOT(ad, rq)     (&(ad)->sort_list[rq_is_sync((rq))])
 
-static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
+static void as_add_rq_rb(struct as_data *ad, struct request *rq)
 {
-       struct as_rq *alias;
+       struct request *alias;
 
-       while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) {
+       while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) {
                as_move_to_dispatch(ad, alias);
                as_antic_stop(ad);
        }
 }
 
-static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)
-{
-       if (!RB_EMPTY_NODE(&arq->rb_node)) {
-               WARN_ON(1);
-               return;
-       }
-
-       rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
-       RB_CLEAR_NODE(&arq->rb_node);
-}
-
-static struct request *
-as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
+static inline void as_del_rq_rb(struct as_data *ad, struct request *rq)
 {
-       struct rb_node *n = ad->sort_list[data_dir].rb_node;
-       struct as_rq *arq;
-
-       while (n) {
-               arq = rb_entry_arq(n);
-
-               if (sector < arq->rb_key)
-                       n = n->rb_left;
-               else if (sector > arq->rb_key)
-                       n = n->rb_right;
-               else
-                       return arq->request;
-       }
-
-       return NULL;
+       elv_rb_del(RQ_RB_ROOT(ad, rq), rq);
 }
 
 /*
@@ -458,26 +277,26 @@ as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
  * as_choose_req selects the preferred one of two requests of the same data_dir
  * ignoring time - eg. timeouts, which is the job of as_dispatch_request
  */
-static struct as_rq *
-as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2)
+static struct request *
+as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2)
 {
        int data_dir;
        sector_t last, s1, s2, d1, d2;
        int r1_wrap=0, r2_wrap=0;       /* requests are behind the disk head */
        const sector_t maxback = MAXBACK;
 
-       if (arq1 == NULL || arq1 == arq2)
-               return arq2;
-       if (arq2 == NULL)
-               return arq1;
+       if (rq1 == NULL || rq1 == rq2)
+               return rq2;
+       if (rq2 == NULL)
+               return rq1;
 
-       data_dir = arq1->is_sync;
+       data_dir = rq_is_sync(rq1);
 
        last = ad->last_sector[data_dir];
-       s1 = arq1->request->sector;
-       s2 = arq2->request->sector;
+       s1 = rq1->sector;
+       s2 = rq2->sector;
 
-       BUG_ON(data_dir != arq2->is_sync);
+       BUG_ON(data_dir != rq_is_sync(rq2));
 
        /*
         * Strict one way elevator _except_ in the case where we allow
@@ -504,61 +323,58 @@ as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2)
 
        /* Found required data */
        if (!r1_wrap && r2_wrap)
-               return arq1;
+               return rq1;
        else if (!r2_wrap && r1_wrap)
-               return arq2;
+               return rq2;
        else if (r1_wrap && r2_wrap) {
                /* both behind the head */
                if (s1 <= s2)
-                       return arq1;
+                       return rq1;
                else
-                       return arq2;
+                       return rq2;
        }
 
        /* Both requests in front of the head */
        if (d1 < d2)
-               return arq1;
+               return rq1;
        else if (d2 < d1)
-               return arq2;
+               return rq2;
        else {
                if (s1 >= s2)
-                       return arq1;
+                       return rq1;
                else
-                       return arq2;
+                       return rq2;
        }
 }
 
 /*
- * as_find_next_arq finds the next request after @prev in elevator order.
+ * as_find_next_rq finds the next request after @prev in elevator order.
  * this with as_choose_req form the basis for how the scheduler chooses
  * what request to process next. Anticipation works on top of this.
  */
-static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last)
+static struct request *
+as_find_next_rq(struct as_data *ad, struct request *last)
 {
-       const int data_dir = last->is_sync;
-       struct as_rq *ret;
        struct rb_node *rbnext = rb_next(&last->rb_node);
        struct rb_node *rbprev = rb_prev(&last->rb_node);
-       struct as_rq *arq_next, *arq_prev;
+       struct request *next = NULL, *prev = NULL;
 
-       BUG_ON(!RB_EMPTY_NODE(&last->rb_node));
+       BUG_ON(RB_EMPTY_NODE(&last->rb_node));
 
        if (rbprev)
-               arq_prev = rb_entry_arq(rbprev);
-       else
-               arq_prev = NULL;
+               prev = rb_entry_rq(rbprev);
 
        if (rbnext)
-               arq_next = rb_entry_arq(rbnext);
+               next = rb_entry_rq(rbnext);
        else {
-               arq_next = as_find_first_arq(ad, data_dir);
-               if (arq_next == last)
-                       arq_next = NULL;
-       }
+               const int data_dir = rq_is_sync(last);
 
-       ret = as_choose_req(ad, arq_next, arq_prev);
+               rbnext = rb_first(&ad->sort_list[data_dir]);
+               if (rbnext && rbnext != &last->rb_node)
+                       next = rb_entry_rq(rbnext);
+       }
 
-       return ret;
+       return as_choose_req(ad, next, prev);
 }
 
 /*
@@ -712,8 +528,7 @@ static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic,
 static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
                                struct request *rq)
 {
-       struct as_rq *arq = RQ_DATA(rq);
-       int data_dir = arq->is_sync;
+       int data_dir = rq_is_sync(rq);
        unsigned long thinktime = 0;
        sector_t seek_dist;
 
@@ -752,11 +567,11 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
  * previous one issued.
  */
 static int as_close_req(struct as_data *ad, struct as_io_context *aic,
-                               struct as_rq *arq)
+                       struct request *rq)
 {
        unsigned long delay;    /* milliseconds */
        sector_t last = ad->last_sector[ad->batch_data_dir];
-       sector_t next = arq->request->sector;
+       sector_t next = rq->sector;
        sector_t delta; /* acceptable close offset (in sectors) */
        sector_t s;
 
@@ -813,7 +628,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic,
  *
  * If this task has queued some other IO, do not enter enticipation.
  */
-static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
+static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
 {
        struct io_context *ioc;
        struct as_io_context *aic;
@@ -821,7 +636,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
        ioc = ad->io_context;
        BUG_ON(!ioc);
 
-       if (arq && ioc == arq->io_context) {
+       if (rq && ioc == RQ_IOC(rq)) {
                /* request from same process */
                return 1;
        }
@@ -848,7 +663,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
                return 1;
        }
 
-       if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) {
+       if (rq && rq_is_sync(rq) && as_close_req(ad, aic, rq)) {
                /*
                 * Found a close request that is not one of ours.
                 *
@@ -864,7 +679,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
                        ad->exit_no_coop = (7*ad->exit_no_coop)/8;
                }
 
-               as_update_iohist(ad, aic, arq->request);
+               as_update_iohist(ad, aic, rq);
                return 1;
        }
 
@@ -891,10 +706,10 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
 }
 
 /*
- * as_can_anticipate indicates whether we should either run arq
+ * as_can_anticipate indicates whether we should either run rq
  * or keep anticipating a better request.
  */
-static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
+static int as_can_anticipate(struct as_data *ad, struct request *rq)
 {
        if (!ad->io_context)
                /*
@@ -908,7 +723,7 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
                 */
                return 0;
 
-       if (as_can_break_anticipation(ad, arq))
+       if (as_can_break_anticipation(ad, rq))
                /*
                 * This request is a good candidate. Don't keep anticipating,
                 * run it.
@@ -926,16 +741,16 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
 }
 
 /*
- * as_update_arq must be called whenever a request (arq) is added to
+ * as_update_rq must be called whenever a request (rq) is added to
  * the sort_list. This function keeps caches up to date, and checks if the
  * request might be one we are "anticipating"
  */
-static void as_update_arq(struct as_data *ad, struct as_rq *arq)
+static void as_update_rq(struct as_data *ad, struct request *rq)
 {
-       const int data_dir = arq->is_sync;
+       const int data_dir = rq_is_sync(rq);
 
-       /* keep the next_arq cache up to date */
-       ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]);
+       /* keep the next_rq cache up to date */
+       ad->next_rq[data_dir] = as_choose_req(ad, rq, ad->next_rq[data_dir]);
 
        /*
         * have we been anticipating this request?
@@ -944,7 +759,7 @@ static void as_update_arq(struct as_data *ad, struct as_rq *arq)
         */
        if (ad->antic_status == ANTIC_WAIT_REQ
                        || ad->antic_status == ANTIC_WAIT_NEXT) {
-               if (as_can_break_anticipation(ad, arq))
+               if (as_can_break_anticipation(ad, rq))
                        as_antic_stop(ad);
        }
 }
@@ -984,12 +799,11 @@ static void update_write_batch(struct as_data *ad)
 static void as_completed_request(request_queue_t *q, struct request *rq)
 {
        struct as_data *ad = q->elevator->elevator_data;
-       struct as_rq *arq = RQ_DATA(rq);
 
        WARN_ON(!list_empty(&rq->queuelist));
 
-       if (arq->state != AS_RQ_REMOVED) {
-               printk("arq->state %d\n", arq->state);
+       if (RQ_STATE(rq) != AS_RQ_REMOVED) {
+               printk("rq->state %d\n", RQ_STATE(rq));
                WARN_ON(1);
                goto out;
        }
@@ -1009,14 +823,14 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
         * actually serviced. This should help devices with big TCQ windows
         * and writeback caches
         */
-       if (ad->new_batch && ad->batch_data_dir == arq->is_sync) {
+       if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
                update_write_batch(ad);
                ad->current_batch_expires = jiffies +
                                ad->batch_expire[REQ_SYNC];
                ad->new_batch = 0;
        }
 
-       if (ad->io_context == arq->io_context && ad->io_context) {
+       if (ad->io_context == RQ_IOC(rq) && ad->io_context) {
                ad->antic_start = jiffies;
                ad->ioc_finished = 1;
                if (ad->antic_status == ANTIC_WAIT_REQ) {
@@ -1028,9 +842,9 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
                }
        }
 
-       as_put_io_context(arq);
+       as_put_io_context(rq);
 out:
-       arq->state = AS_RQ_POSTSCHED;
+       RQ_SET_STATE(rq, AS_RQ_POSTSCHED);
 }
 
 /*
@@ -1041,27 +855,27 @@ out:
  */
 static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 {
-       struct as_rq *arq = RQ_DATA(rq);
-       const int data_dir = arq->is_sync;
+       const int data_dir = rq_is_sync(rq);
        struct as_data *ad = q->elevator->elevator_data;
+       struct io_context *ioc;
 
-       WARN_ON(arq->state != AS_RQ_QUEUED);
+       WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
 
-       if (arq->io_context && arq->io_context->aic) {
-               BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued));
-               atomic_dec(&arq->io_context->aic->nr_queued);
+       ioc = RQ_IOC(rq);
+       if (ioc && ioc->aic) {
+               BUG_ON(!atomic_read(&ioc->aic->nr_queued));
+               atomic_dec(&ioc->aic->nr_queued);
        }
 
        /*
-        * Update the "next_arq" cache if we are about to remove its
+        * Update the "next_rq" cache if we are about to remove its
         * entry
         */
-       if (ad->next_arq[data_dir] == arq)
-               ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
+       if (ad->next_rq[data_dir] == rq)
+               ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
 
-       list_del_init(&arq->fifo);
-       as_del_arq_hash(arq);
-       as_del_arq_rb(ad, arq);
+       rq_fifo_clear(rq);
+       as_del_rq_rb(ad, rq);
 }
 
 /*
@@ -1074,7 +888,7 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
  */
 static int as_fifo_expired(struct as_data *ad, int adir)
 {
-       struct as_rq *arq;
+       struct request *rq;
        long delta_jif;
 
        delta_jif = jiffies - ad->last_check_fifo[adir];
@@ -1088,9 +902,9 @@ static int as_fifo_expired(struct as_data *ad, int adir)
        if (list_empty(&ad->fifo_list[adir]))
                return 0;
 
-       arq = list_entry_fifo(ad->fifo_list[adir].next);
+       rq = rq_entry_fifo(ad->fifo_list[adir].next);
 
-       return time_after(jiffies, arq->expires);
+       return time_after(jiffies, rq_fifo_time(rq));
 }
 
 /*
@@ -1113,25 +927,25 @@ static inline int as_batch_expired(struct as_data *ad)
 /*
  * move an entry to dispatch queue
  */
-static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
+static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
 {
-       struct request *rq = arq->request;
-       const int data_dir = arq->is_sync;
+       const int data_dir = rq_is_sync(rq);
 
-       BUG_ON(!RB_EMPTY_NODE(&arq->rb_node));
+       BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
 
        as_antic_stop(ad);
        ad->antic_status = ANTIC_OFF;
 
        /*
         * This has to be set in order to be correctly updated by
-        * as_find_next_arq
+        * as_find_next_rq
         */
        ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
 
        if (data_dir == REQ_SYNC) {
+               struct io_context *ioc = RQ_IOC(rq);
                /* In case we have to anticipate after this */
-               copy_io_context(&ad->io_context, &arq->io_context);
+               copy_io_context(&ad->io_context, &ioc);
        } else {
                if (ad->io_context) {
                        put_io_context(ad->io_context);
@@ -1143,19 +957,19 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
        }
        ad->ioc_finished = 0;
 
-       ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
+       ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
 
        /*
         * take it off the sort and fifo list, add to dispatch queue
         */
        as_remove_queued_request(ad->q, rq);
-       WARN_ON(arq->state != AS_RQ_QUEUED);
+       WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
 
        elv_dispatch_sort(ad->q, rq);
 
-       arq->state = AS_RQ_DISPATCHED;
-       if (arq->io_context && arq->io_context->aic)
-               atomic_inc(&arq->io_context->aic->nr_dispatched);
+       RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
+       if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
+               atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
        ad->nr_dispatched++;
 }
 
@@ -1167,9 +981,9 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 static int as_dispatch_request(request_queue_t *q, int force)
 {
        struct as_data *ad = q->elevator->elevator_data;
-       struct as_rq *arq;
        const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
        const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
+       struct request *rq;
 
        if (unlikely(force)) {
                /*
@@ -1185,14 +999,14 @@ static int as_dispatch_request(request_queue_t *q, int force)
                ad->changed_batch = 0;
                ad->new_batch = 0;
 
-               while (ad->next_arq[REQ_SYNC]) {
-                       as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
+               while (ad->next_rq[REQ_SYNC]) {
+                       as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]);
                        dispatched++;
                }
                ad->last_check_fifo[REQ_SYNC] = jiffies;
 
-               while (ad->next_arq[REQ_ASYNC]) {
-                       as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
+               while (ad->next_rq[REQ_ASYNC]) {
+                       as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]);
                        dispatched++;
                }
                ad->last_check_fifo[REQ_ASYNC] = jiffies;
@@ -1216,19 +1030,19 @@ static int as_dispatch_request(request_queue_t *q, int force)
                /*
                 * batch is still running or no reads or no writes
                 */
-               arq = ad->next_arq[ad->batch_data_dir];
+               rq = ad->next_rq[ad->batch_data_dir];
 
                if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
                        if (as_fifo_expired(ad, REQ_SYNC))
                                goto fifo_expired;
 
-                       if (as_can_anticipate(ad, arq)) {
+                       if (as_can_anticipate(ad, rq)) {
                                as_antic_waitreq(ad);
                                return 0;
                        }
                }
 
-               if (arq) {
+               if (rq) {
                        /* we have a "next request" */
                        if (reads && !writes)
                                ad->current_batch_expires =
@@ -1256,7 +1070,7 @@ static int as_dispatch_request(request_queue_t *q, int force)
                        ad->changed_batch = 1;
                }
                ad->batch_data_dir = REQ_SYNC;
-               arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
+               rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next);
                ad->last_check_fifo[ad->batch_data_dir] = jiffies;
                goto dispatch_request;
        }
@@ -1282,7 +1096,7 @@ dispatch_writes:
                ad->batch_data_dir = REQ_ASYNC;
                ad->current_write_count = ad->write_batch_count;
                ad->write_batch_idled = 0;
-               arq = ad->next_arq[ad->batch_data_dir];
+               rq = ad->next_rq[ad->batch_data_dir];
                goto dispatch_request;
        }
 
@@ -1296,8 +1110,7 @@ dispatch_request:
 
        if (as_fifo_expired(ad, ad->batch_data_dir)) {
 fifo_expired:
-               arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
-               BUG_ON(arq == NULL);
+               rq = rq_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
        }
 
        if (ad->changed_batch) {
@@ -1316,70 +1129,58 @@ fifo_expired:
        }
 
        /*
-        * arq is the selected appropriate request.
+        * rq is the selected appropriate request.
         */
-       as_move_to_dispatch(ad, arq);
+       as_move_to_dispatch(ad, rq);
 
        return 1;
 }
 
 /*
- * add arq to rbtree and fifo
+ * add rq to rbtree and fifo
  */
 static void as_add_request(request_queue_t *q, struct request *rq)
 {
        struct as_data *ad = q->elevator->elevator_data;
-       struct as_rq *arq = RQ_DATA(rq);
        int data_dir;
 
-       arq->state = AS_RQ_NEW;
+       RQ_SET_STATE(rq, AS_RQ_NEW);
 
-       if (rq_data_dir(arq->request) == READ
-                       || (arq->request->flags & REQ_RW_SYNC))
-               arq->is_sync = 1;
-       else
-               arq->is_sync = 0;
-       data_dir = arq->is_sync;
+       data_dir = rq_is_sync(rq);
 
-       arq->io_context = as_get_io_context();
+       rq->elevator_private = as_get_io_context(q->node);
 
-       if (arq->io_context) {
-               as_update_iohist(ad, arq->io_context->aic, arq->request);
-               atomic_inc(&arq->io_context->aic->nr_queued);
+       if (RQ_IOC(rq)) {
+               as_update_iohist(ad, RQ_IOC(rq)->aic, rq);
+               atomic_inc(&RQ_IOC(rq)->aic->nr_queued);
        }
 
-       as_add_arq_rb(ad, arq);
-       if (rq_mergeable(arq->request))
-               as_add_arq_hash(ad, arq);
+       as_add_rq_rb(ad, rq);
 
        /*
         * set expire time (only used for reads) and add to fifo list
         */
-       arq->expires = jiffies + ad->fifo_expire[data_dir];
-       list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
+       rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]);
+       list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]);
 
-       as_update_arq(ad, arq); /* keep state machine up to date */
-       arq->state = AS_RQ_QUEUED;
+       as_update_rq(ad, rq); /* keep state machine up to date */
+       RQ_SET_STATE(rq, AS_RQ_QUEUED);
 }
 
 static void as_activate_request(request_queue_t *q, struct request *rq)
 {
-       struct as_rq *arq = RQ_DATA(rq);
-
-       WARN_ON(arq->state != AS_RQ_DISPATCHED);
-       arq->state = AS_RQ_REMOVED;
-       if (arq->io_context && arq->io_context->aic)
-               atomic_dec(&arq->io_context->aic->nr_dispatched);
+       WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED);
+       RQ_SET_STATE(rq, AS_RQ_REMOVED);
+       if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
+               atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched);
 }
 
 static void as_deactivate_request(request_queue_t *q, struct request *rq)
 {
-       struct as_rq *arq = RQ_DATA(rq);
-
-       WARN_ON(arq->state != AS_RQ_REMOVED);
-       arq->state = AS_RQ_DISPATCHED;
-       if (arq->io_context && arq->io_context->aic)
-               atomic_inc(&arq->io_context->aic->nr_dispatched);
+       WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED);
+       RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
+       if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
+               atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
 }
 
 /*
@@ -1396,93 +1197,35 @@ static int as_queue_empty(request_queue_t *q)
                && list_empty(&ad->fifo_list[REQ_SYNC]);
 }
 
-static struct request *as_former_request(request_queue_t *q,
-                                       struct request *rq)
-{
-       struct as_rq *arq = RQ_DATA(rq);
-       struct rb_node *rbprev = rb_prev(&arq->rb_node);
-       struct request *ret = NULL;
-
-       if (rbprev)
-               ret = rb_entry_arq(rbprev)->request;
-
-       return ret;
-}
-
-static struct request *as_latter_request(request_queue_t *q,
-                                       struct request *rq)
-{
-       struct as_rq *arq = RQ_DATA(rq);
-       struct rb_node *rbnext = rb_next(&arq->rb_node);
-       struct request *ret = NULL;
-
-       if (rbnext)
-               ret = rb_entry_arq(rbnext)->request;
-
-       return ret;
-}
-
 static int
 as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
        struct as_data *ad = q->elevator->elevator_data;
        sector_t rb_key = bio->bi_sector + bio_sectors(bio);
        struct request *__rq;
-       int ret;
-
-       /*
-        * see if the merge hash can satisfy a back merge
-        */
-       __rq = as_find_arq_hash(ad, bio->bi_sector);
-       if (__rq) {
-               BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
-               if (elv_rq_merge_ok(__rq, bio)) {
-                       ret = ELEVATOR_BACK_MERGE;
-                       goto out;
-               }
-       }
 
        /*
         * check for front merge
         */
-       __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio));
-       if (__rq) {
-               BUG_ON(rb_key != rq_rb_key(__rq));
-
-               if (elv_rq_merge_ok(__rq, bio)) {
-                       ret = ELEVATOR_FRONT_MERGE;
-                       goto out;
-               }
+       __rq = elv_rb_find(&ad->sort_list[bio_data_dir(bio)], rb_key);
+       if (__rq && elv_rq_merge_ok(__rq, bio)) {
+               *req = __rq;
+               return ELEVATOR_FRONT_MERGE;
        }
 
        return ELEVATOR_NO_MERGE;
-out:
-       if (ret) {
-               if (rq_mergeable(__rq))
-                       as_hot_arq_hash(ad, RQ_DATA(__rq));
-       }
-       *req = __rq;
-       return ret;
 }
 
-static void as_merged_request(request_queue_t *q, struct request *req)
+static void as_merged_request(request_queue_t *q, struct request *req, int type)
 {
        struct as_data *ad = q->elevator->elevator_data;
-       struct as_rq *arq = RQ_DATA(req);
-
-       /*
-        * hash always needs to be repositioned, key is end sector
-        */
-       as_del_arq_hash(arq);
-       as_add_arq_hash(ad, arq);
 
        /*
         * if the merge was a front merge, we need to reposition request
         */
-       if (rq_rb_key(req) != arq->rb_key) {
-               as_del_arq_rb(ad, arq);
-               as_add_arq_rb(ad, arq);
+       if (type == ELEVATOR_FRONT_MERGE) {
+               as_del_rq_rb(ad, req);
+               as_add_rq_rb(ad, req);
                /*
                 * Note! At this stage of this and the next function, our next
                 * request may not be optimal - eg the request may have "grown"
@@ -1494,38 +1237,22 @@ static void as_merged_request(request_queue_t *q, struct request *req)
 static void as_merged_requests(request_queue_t *q, struct request *req,
                                struct request *next)
 {
-       struct as_data *ad = q->elevator->elevator_data;
-       struct as_rq *arq = RQ_DATA(req);
-       struct as_rq *anext = RQ_DATA(next);
-
-       BUG_ON(!arq);
-       BUG_ON(!anext);
-
        /*
-        * reposition arq (this is the merged request) in hash, and in rbtree
-        * in case of a front merge
+        * if next expires before rq, assign its expire time to arq
+        * and move into next position (next will be deleted) in fifo
         */
-       as_del_arq_hash(arq);
-       as_add_arq_hash(ad, arq);
-
-       if (rq_rb_key(req) != arq->rb_key) {
-               as_del_arq_rb(ad, arq);
-               as_add_arq_rb(ad, arq);
-       }
+       if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
+               if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
+                       struct io_context *rioc = RQ_IOC(req);
+                       struct io_context *nioc = RQ_IOC(next);
 
-       /*
-        * if anext expires before arq, assign its expire time to arq
-        * and move into anext position (anext will be deleted) in fifo
-        */
-       if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) {
-               if (time_before(anext->expires, arq->expires)) {
-                       list_move(&arq->fifo, &anext->fifo);
-                       arq->expires = anext->expires;
+                       list_move(&req->queuelist, &next->queuelist);
+                       rq_set_fifo_time(req, rq_fifo_time(next));
                        /*
                         * Don't copy here but swap, because when anext is
                         * removed below, it must contain the unused context
                         */
-                       swap_io_context(&arq->io_context, &anext->io_context);
+                       swap_io_context(&rioc, &nioc);
                }
        }
 
@@ -1533,9 +1260,9 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
         * kill knowledge of next, this one is a goner
         */
        as_remove_queued_request(q, next);
-       as_put_io_context(anext);
+       as_put_io_context(next);
 
-       anext->state = AS_RQ_MERGED;
+       RQ_SET_STATE(next, AS_RQ_MERGED);
 }
 
 /*
@@ -1553,61 +1280,18 @@ static void as_work_handler(void *data)
        unsigned long flags;
 
        spin_lock_irqsave(q->queue_lock, flags);
-       if (!as_queue_empty(q))
-               q->request_fn(q);
+       blk_start_queueing(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
-static void as_put_request(request_queue_t *q, struct request *rq)
-{
-       struct as_data *ad = q->elevator->elevator_data;
-       struct as_rq *arq = RQ_DATA(rq);
-
-       if (!arq) {
-               WARN_ON(1);
-               return;
-       }
-
-       if (unlikely(arq->state != AS_RQ_POSTSCHED &&
-                    arq->state != AS_RQ_PRESCHED &&
-                    arq->state != AS_RQ_MERGED)) {
-               printk("arq->state %d\n", arq->state);
-               WARN_ON(1);
-       }
-
-       mempool_free(arq, ad->arq_pool);
-       rq->elevator_private = NULL;
-}
-
-static int as_set_request(request_queue_t *q, struct request *rq,
-                         struct bio *bio, gfp_t gfp_mask)
-{
-       struct as_data *ad = q->elevator->elevator_data;
-       struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
-
-       if (arq) {
-               memset(arq, 0, sizeof(*arq));
-               RB_CLEAR_NODE(&arq->rb_node);
-               arq->request = rq;
-               arq->state = AS_RQ_PRESCHED;
-               arq->io_context = NULL;
-               INIT_HLIST_NODE(&arq->hash);
-               INIT_LIST_HEAD(&arq->fifo);
-               rq->elevator_private = arq;
-               return 0;
-       }
-
-       return 1;
-}
-
-static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
+static int as_may_queue(request_queue_t *q, int rw)
 {
        int ret = ELV_MQUEUE_MAY;
        struct as_data *ad = q->elevator->elevator_data;
        struct io_context *ioc;
        if (ad->antic_status == ANTIC_WAIT_REQ ||
                        ad->antic_status == ANTIC_WAIT_NEXT) {
-               ioc = as_get_io_context();
+               ioc = as_get_io_context(q->node);
                if (ad->io_context == ioc)
                        ret = ELV_MQUEUE_MUST;
                put_io_context(ioc);
@@ -1626,23 +1310,16 @@ static void as_exit_queue(elevator_t *e)
        BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
        BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
 
-       mempool_destroy(ad->arq_pool);
        put_io_context(ad->io_context);
-       kfree(ad->hash);
        kfree(ad);
 }
 
 /*
- * initialize elevator private data (as_data), and alloc a arq for
- * each request on the free lists
+ * initialize elevator private data (as_data).
  */
 static void *as_init_queue(request_queue_t *q, elevator_t *e)
 {
        struct as_data *ad;
-       int i;
-
-       if (!arq_pool)
-               return NULL;
 
        ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
        if (!ad)
@@ -1651,30 +1328,12 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
 
        ad->q = q; /* Identify what queue the data belongs to */
 
-       ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES,
-                               GFP_KERNEL, q->node);
-       if (!ad->hash) {
-               kfree(ad);
-               return NULL;
-       }
-
-       ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
-                               mempool_free_slab, arq_pool, q->node);
-       if (!ad->arq_pool) {
-               kfree(ad->hash);
-               kfree(ad);
-               return NULL;
-       }
-
        /* anticipatory scheduling helpers */
        ad->antic_timer.function = as_antic_timeout;
        ad->antic_timer.data = (unsigned long)q;
        init_timer(&ad->antic_timer);
        INIT_WORK(&ad->antic_work, as_work_handler, q);
 
-       for (i = 0; i < AS_HASH_ENTRIES; i++)
-               INIT_HLIST_HEAD(&ad->hash[i]);
-
        INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
        INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
        ad->sort_list[REQ_SYNC] = RB_ROOT;
@@ -1787,10 +1446,8 @@ static struct elevator_type iosched_as = {
                .elevator_deactivate_req_fn =   as_deactivate_request,
                .elevator_queue_empty_fn =      as_queue_empty,
                .elevator_completed_req_fn =    as_completed_request,
-               .elevator_former_req_fn =       as_former_request,
-               .elevator_latter_req_fn =       as_latter_request,
-               .elevator_set_req_fn =          as_set_request,
-               .elevator_put_req_fn =          as_put_request,
+               .elevator_former_req_fn =       elv_rb_former_request,
+               .elevator_latter_req_fn =       elv_rb_latter_request,
                .elevator_may_queue_fn =        as_may_queue,
                .elevator_init_fn =             as_init_queue,
                .elevator_exit_fn =             as_exit_queue,
@@ -1806,11 +1463,6 @@ static int __init as_init(void)
 {
        int ret;
 
-       arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq),
-                                    0, 0, NULL, NULL);
-       if (!arq_pool)
-               return -ENOMEM;
-
        ret = elv_register(&iosched_as);
        if (!ret) {
                /*
@@ -1822,7 +1474,6 @@ static int __init as_init(void)
                return 0;
        }
 
-       kmem_cache_destroy(arq_pool);
        return ret;
 }
 
@@ -1833,10 +1484,9 @@ static void __exit as_exit(void)
        ioc_gone = &all_gone;
        /* ioc_gone's update must be visible before reading ioc_count */
        smp_wmb();
-       if (atomic_read(&ioc_count))
+       if (elv_ioc_count_read(ioc_count))
                wait_for_completion(ioc_gone);
        synchronize_rcu();
-       kmem_cache_destroy(arq_pool);
 }
 
 module_init(as_init);
index 8ff33441d8a214259bd2c718ddb4735874dd2fc5..135593c8e45bdee40f97c3e690d47c34e769370a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -69,7 +69,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK
 /*
  * Bio action bits of interest
  */
-static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD) };
+static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
 
 /*
  * More could be added as needed, taking care to increment the decrementer
@@ -81,6 +81,8 @@ static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_AC
        (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
 #define trace_ahead_bit(rw)    \
        (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
+#define trace_meta_bit(rw)     \
+       (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
 
 /*
  * The worker for the various blk_add_trace*() types. Fills out a
@@ -103,6 +105,7 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
        what |= bio_act[trace_barrier_bit(rw)];
        what |= bio_act[trace_sync_bit(rw)];
        what |= bio_act[trace_ahead_bit(rw)];
+       what |= bio_act[trace_meta_bit(rw)];
 
        pid = tsk->pid;
        if (unlikely(act_log_check(bt, what, sector, pid)))
@@ -473,6 +476,9 @@ static void blk_check_time(unsigned long long *t)
        *t -= (a + b) / 2;
 }
 
+/*
+ * calibrate our inter-CPU timings
+ */
 static void blk_trace_check_cpu_time(void *data)
 {
        unsigned long long *t;
@@ -490,20 +496,6 @@ static void blk_trace_check_cpu_time(void *data)
        put_cpu();
 }
 
-/*
- * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
- * timings
- */
-static void blk_trace_calibrate_offsets(void)
-{
-       unsigned long flags;
-
-       smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
-       local_irq_save(flags);
-       blk_trace_check_cpu_time(NULL);
-       local_irq_restore(flags);
-}
-
 static void blk_trace_set_ht_offsets(void)
 {
 #if defined(CONFIG_SCHED_SMT)
@@ -532,7 +524,7 @@ static void blk_trace_set_ht_offsets(void)
 static __init int blk_trace_init(void)
 {
        mutex_init(&blk_tree_mutex);
-       blk_trace_calibrate_offsets();
+       on_each_cpu(blk_trace_check_cpu_time, NULL, 1, 1);
        blk_trace_set_ht_offsets();
 
        return 0;
index 3a3aee08ec5f4850f0a1877b3bdf86d9b03d9625..99116e2a310aaa3506f724a93fcb1b1f06b4d2fc 100644 (file)
@@ -4,7 +4,7 @@
  *  Based on ideas from a previously unfinished io
  *  scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
  *
- *  Copyright (C) 2003 Jens Axboe <axboe@suse.de>
+ *  Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
  */
 #include <linux/module.h>
 #include <linux/blkdev.h>
@@ -17,7 +17,6 @@
  * tunables
  */
 static const int cfq_quantum = 4;              /* max queue in one round of service */
-static const int cfq_queued = 8;               /* minimum rq allocate limit per-queue*/
 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
 static const int cfq_back_max = 16 * 1024;     /* maximum backwards seek, in KiB */
 static const int cfq_back_penalty = 2;         /* penalty of a backwards seek */
@@ -32,8 +31,6 @@ static int cfq_slice_idle = HZ / 125;
 
 #define CFQ_KEY_ASYNC          (0)
 
-static DEFINE_SPINLOCK(cfq_exit_lock);
-
 /*
  * for the hash of cfqq inside the cfqd
  */
@@ -41,37 +38,19 @@ static DEFINE_SPINLOCK(cfq_exit_lock);
 #define CFQ_QHASH_ENTRIES      (1 << CFQ_QHASH_SHIFT)
 #define list_entry_qhash(entry)        hlist_entry((entry), struct cfq_queue, cfq_hash)
 
-/*
- * for the hash of crq inside the cfqq
- */
-#define CFQ_MHASH_SHIFT                6
-#define CFQ_MHASH_BLOCK(sec)   ((sec) >> 3)
-#define CFQ_MHASH_ENTRIES      (1 << CFQ_MHASH_SHIFT)
-#define CFQ_MHASH_FN(sec)      hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
-#define rq_hash_key(rq)                ((rq)->sector + (rq)->nr_sectors)
-#define list_entry_hash(ptr)   hlist_entry((ptr), struct cfq_rq, hash)
-
 #define list_entry_cfqq(ptr)   list_entry((ptr), struct cfq_queue, cfq_list)
-#define list_entry_fifo(ptr)   list_entry((ptr), struct request, queuelist)
 
-#define RQ_DATA(rq)            (rq)->elevator_private
+#define RQ_CIC(rq)             ((struct cfq_io_context*)(rq)->elevator_private)
+#define RQ_CFQQ(rq)            ((rq)->elevator_private2)
 
-/*
- * rb-tree defines
- */
-#define rb_entry_crq(node)     rb_entry((node), struct cfq_rq, rb_node)
-#define rq_rb_key(rq)          (rq)->sector
-
-static kmem_cache_t *crq_pool;
 static kmem_cache_t *cfq_pool;
 static kmem_cache_t *cfq_ioc_pool;
 
-static atomic_t ioc_count = ATOMIC_INIT(0);
+static DEFINE_PER_CPU(unsigned long, ioc_count);
 static struct completion *ioc_gone;
 
 #define CFQ_PRIO_LISTS         IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)   ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
-#define cfq_class_be(cfqq)     ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
 #define cfq_class_rt(cfqq)     ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
 
 #define ASYNC                  (0)
@@ -102,29 +81,14 @@ struct cfq_data {
        struct list_head idle_rr;
        unsigned int busy_queues;
 
-       /*
-        * non-ordered list of empty cfqq's
-        */
-       struct list_head empty_list;
-
        /*
         * cfqq lookup hash
         */
        struct hlist_head *cfq_hash;
 
-       /*
-        * global crq hash for all queues
-        */
-       struct hlist_head *crq_hash;
-
-       mempool_t *crq_pool;
-
        int rq_in_driver;
        int hw_tag;
 
-       /*
-        * schedule slice state info
-        */
        /*
         * idle window management
         */
@@ -141,13 +105,10 @@ struct cfq_data {
        sector_t last_sector;
        unsigned long last_end_request;
 
-       unsigned int rq_starved;
-
        /*
         * tunables, see top of file
         */
        unsigned int cfq_quantum;
-       unsigned int cfq_queued;
        unsigned int cfq_fifo_expire[2];
        unsigned int cfq_back_penalty;
        unsigned int cfq_back_max;
@@ -170,23 +131,24 @@ struct cfq_queue {
        struct hlist_node cfq_hash;
        /* hash key */
        unsigned int key;
-       /* on either rr or empty list of cfqd */
+       /* member of the rr/busy/cur/idle cfqd list */
        struct list_head cfq_list;
        /* sorted list of pending requests */
        struct rb_root sort_list;
        /* if fifo isn't expired, next request to serve */
-       struct cfq_rq *next_crq;
+       struct request *next_rq;
        /* requests queued in sort_list */
        int queued[2];
        /* currently allocated requests */
        int allocated[2];
+       /* pending metadata requests */
+       int meta_pending;
        /* fifo list of requests in sort_list */
        struct list_head fifo;
 
        unsigned long slice_start;
        unsigned long slice_end;
        unsigned long slice_left;
-       unsigned long service_last;
 
        /* number of requests that are on the dispatch list */
        int on_dispatch[2];
@@ -199,18 +161,6 @@ struct cfq_queue {
        unsigned int flags;
 };
 
-struct cfq_rq {
-       struct rb_node rb_node;
-       sector_t rb_key;
-       struct request *request;
-       struct hlist_node hash;
-
-       struct cfq_queue *cfq_queue;
-       struct cfq_io_context *io_context;
-
-       unsigned int crq_flags;
-};
-
 enum cfqq_state_flags {
        CFQ_CFQQ_FLAG_on_rr = 0,
        CFQ_CFQQ_FLAG_wait_request,
@@ -220,6 +170,7 @@ enum cfqq_state_flags {
        CFQ_CFQQ_FLAG_fifo_expire,
        CFQ_CFQQ_FLAG_idle_window,
        CFQ_CFQQ_FLAG_prio_changed,
+       CFQ_CFQQ_FLAG_queue_new,
 };
 
 #define CFQ_CFQQ_FNS(name)                                             \
@@ -244,69 +195,13 @@ CFQ_CFQQ_FNS(must_dispatch);
 CFQ_CFQQ_FNS(fifo_expire);
 CFQ_CFQQ_FNS(idle_window);
 CFQ_CFQQ_FNS(prio_changed);
+CFQ_CFQQ_FNS(queue_new);
 #undef CFQ_CFQQ_FNS
 
-enum cfq_rq_state_flags {
-       CFQ_CRQ_FLAG_is_sync = 0,
-};
-
-#define CFQ_CRQ_FNS(name)                                              \
-static inline void cfq_mark_crq_##name(struct cfq_rq *crq)             \
-{                                                                      \
-       crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name);                   \
-}                                                                      \
-static inline void cfq_clear_crq_##name(struct cfq_rq *crq)            \
-{                                                                      \
-       crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name);                  \
-}                                                                      \
-static inline int cfq_crq_##name(const struct cfq_rq *crq)             \
-{                                                                      \
-       return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0;      \
-}
-
-CFQ_CRQ_FNS(is_sync);
-#undef CFQ_CRQ_FNS
-
 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
-static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
+static void cfq_dispatch_insert(request_queue_t *, struct request *);
 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
 
-/*
- * lots of deadline iosched dupes, can be abstracted later...
- */
-static inline void cfq_del_crq_hash(struct cfq_rq *crq)
-{
-       hlist_del_init(&crq->hash);
-}
-
-static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
-{
-       const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
-
-       hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
-}
-
-static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
-{
-       struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
-       struct hlist_node *entry, *next;
-
-       hlist_for_each_safe(entry, next, hash_list) {
-               struct cfq_rq *crq = list_entry_hash(entry);
-               struct request *__rq = crq->request;
-
-               if (!rq_mergeable(__rq)) {
-                       cfq_del_crq_hash(crq);
-                       continue;
-               }
-
-               if (rq_hash_key(__rq) == offset)
-                       return __rq;
-       }
-
-       return NULL;
-}
-
 /*
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
@@ -333,12 +228,12 @@ static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
 }
 
 /*
- * Lifted from AS - choose which of crq1 and crq2 that is best served now.
+ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
  * We choose the request that is closest to the head right now. Distance
  * behind the head is penalized and only allowed to a certain extent.
  */
-static struct cfq_rq *
-cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
+static struct request *
+cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
 {
        sector_t last, s1, s2, d1 = 0, d2 = 0;
        unsigned long back_max;
@@ -346,18 +241,22 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 #define CFQ_RQ2_WRAP   0x02 /* request 2 wraps */
        unsigned wrap = 0; /* bit mask: requests behind the disk head? */
 
-       if (crq1 == NULL || crq1 == crq2)
-               return crq2;
-       if (crq2 == NULL)
-               return crq1;
+       if (rq1 == NULL || rq1 == rq2)
+               return rq2;
+       if (rq2 == NULL)
+               return rq1;
 
-       if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
-               return crq1;
-       else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
-               return crq2;
+       if (rq_is_sync(rq1) && !rq_is_sync(rq2))
+               return rq1;
+       else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
+               return rq2;
+       if (rq_is_meta(rq1) && !rq_is_meta(rq2))
+               return rq1;
+       else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
+               return rq2;
 
-       s1 = crq1->request->sector;
-       s2 = crq2->request->sector;
+       s1 = rq1->sector;
+       s2 = rq2->sector;
 
        last = cfqd->last_sector;
 
@@ -392,23 +291,23 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
         * check two variables for all permutations: --> faster!
         */
        switch (wrap) {
-       case 0: /* common case for CFQ: crq1 and crq2 not wrapped */
+       case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
                if (d1 < d2)
-                       return crq1;
+                       return rq1;
                else if (d2 < d1)
-                       return crq2;
+                       return rq2;
                else {
                        if (s1 >= s2)
-                               return crq1;
+                               return rq1;
                        else
-                               return crq2;
+                               return rq2;
                }
 
        case CFQ_RQ2_WRAP:
-               return crq1;
+               return rq1;
        case CFQ_RQ1_WRAP:
-               return crq2;
-       case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */
+               return rq2;
+       case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
        default:
                /*
                 * Since both rqs are wrapped,
@@ -417,50 +316,43 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
                 * since back seek takes more time than forward.
                 */
                if (s1 <= s2)
-                       return crq1;
+                       return rq1;
                else
-                       return crq2;
+                       return rq2;
        }
 }
 
 /*
  * would be nice to take fifo expire time into account as well
  */
-static struct cfq_rq *
-cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-                 struct cfq_rq *last)
+static struct request *
+cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+                 struct request *last)
 {
-       struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
-       struct rb_node *rbnext, *rbprev;
-
-       if (!(rbnext = rb_next(&last->rb_node))) {
-               rbnext = rb_first(&cfqq->sort_list);
-               if (rbnext == &last->rb_node)
-                       rbnext = NULL;
-       }
+       struct rb_node *rbnext = rb_next(&last->rb_node);
+       struct rb_node *rbprev = rb_prev(&last->rb_node);
+       struct request *next = NULL, *prev = NULL;
 
-       rbprev = rb_prev(&last->rb_node);
+       BUG_ON(RB_EMPTY_NODE(&last->rb_node));
 
        if (rbprev)
-               crq_prev = rb_entry_crq(rbprev);
-       if (rbnext)
-               crq_next = rb_entry_crq(rbnext);
-
-       return cfq_choose_req(cfqd, crq_next, crq_prev);
-}
+               prev = rb_entry_rq(rbprev);
 
-static void cfq_update_next_crq(struct cfq_rq *crq)
-{
-       struct cfq_queue *cfqq = crq->cfq_queue;
+       if (rbnext)
+               next = rb_entry_rq(rbnext);
+       else {
+               rbnext = rb_first(&cfqq->sort_list);
+               if (rbnext && rbnext != &last->rb_node)
+                       next = rb_entry_rq(rbnext);
+       }
 
-       if (cfqq->next_crq == crq)
-               cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
+       return cfq_choose_req(cfqd, next, prev);
 }
 
 static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
 {
        struct cfq_data *cfqd = cfqq->cfqd;
-       struct list_head *list, *entry;
+       struct list_head *list;
 
        BUG_ON(!cfq_cfqq_on_rr(cfqq));
 
@@ -485,31 +377,26 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
        }
 
        /*
-        * if queue was preempted, just add to front to be fair. busy_rr
-        * isn't sorted, but insert at the back for fairness.
+        * If this queue was preempted or is new (never been serviced), let
+        * it be added first for fairness but beind other new queues.
+        * Otherwise, just add to the back  of the list.
         */
-       if (preempted || list == &cfqd->busy_rr) {
-               if (preempted)
-                       list = list->prev;
+       if (preempted || cfq_cfqq_queue_new(cfqq)) {
+               struct list_head *n = list;
+               struct cfq_queue *__cfqq;
 
-               list_add_tail(&cfqq->cfq_list, list);
-               return;
-       }
+               while (n->next != list) {
+                       __cfqq = list_entry_cfqq(n->next);
+                       if (!cfq_cfqq_queue_new(__cfqq))
+                               break;
 
-       /*
-        * sort by when queue was last serviced
-        */
-       entry = list;
-       while ((entry = entry->prev) != list) {
-               struct cfq_queue *__cfqq = list_entry_cfqq(entry);
+                       n = n->next;
+               }
 
-               if (!__cfqq->service_last)
-                       break;
-               if (time_before(__cfqq->service_last, cfqq->service_last))
-                       break;
+               list = n;
        }
 
-       list_add(&cfqq->cfq_list, entry);
+       list_add_tail(&cfqq->cfq_list, list);
 }
 
 /*
@@ -531,7 +418,7 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        BUG_ON(!cfq_cfqq_on_rr(cfqq));
        cfq_clear_cfqq_on_rr(cfqq);
-       list_move(&cfqq->cfq_list, &cfqd->empty_list);
+       list_del_init(&cfqq->cfq_list);
 
        BUG_ON(!cfqd->busy_queues);
        cfqd->busy_queues--;
@@ -540,81 +427,43 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 /*
  * rb tree support functions
  */
-static inline void cfq_del_crq_rb(struct cfq_rq *crq)
+static inline void cfq_del_rq_rb(struct request *rq)
 {
-       struct cfq_queue *cfqq = crq->cfq_queue;
+       struct cfq_queue *cfqq = RQ_CFQQ(rq);
        struct cfq_data *cfqd = cfqq->cfqd;
-       const int sync = cfq_crq_is_sync(crq);
+       const int sync = rq_is_sync(rq);
 
        BUG_ON(!cfqq->queued[sync]);
        cfqq->queued[sync]--;
 
-       cfq_update_next_crq(crq);
-
-       rb_erase(&crq->rb_node, &cfqq->sort_list);
+       elv_rb_del(&cfqq->sort_list, rq);
 
        if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
                cfq_del_cfqq_rr(cfqd, cfqq);
 }
 
-static struct cfq_rq *
-__cfq_add_crq_rb(struct cfq_rq *crq)
+static void cfq_add_rq_rb(struct request *rq)
 {
-       struct rb_node **p = &crq->cfq_queue->sort_list.rb_node;
-       struct rb_node *parent = NULL;
-       struct cfq_rq *__crq;
-
-       while (*p) {
-               parent = *p;
-               __crq = rb_entry_crq(parent);
-
-               if (crq->rb_key < __crq->rb_key)
-                       p = &(*p)->rb_left;
-               else if (crq->rb_key > __crq->rb_key)
-                       p = &(*p)->rb_right;
-               else
-                       return __crq;
-       }
-
-       rb_link_node(&crq->rb_node, parent, p);
-       return NULL;
-}
-
-static void cfq_add_crq_rb(struct cfq_rq *crq)
-{
-       struct cfq_queue *cfqq = crq->cfq_queue;
+       struct cfq_queue *cfqq = RQ_CFQQ(rq);
        struct cfq_data *cfqd = cfqq->cfqd;
-       struct request *rq = crq->request;
-       struct cfq_rq *__alias;
+       struct request *__alias;
 
-       crq->rb_key = rq_rb_key(rq);
-       cfqq->queued[cfq_crq_is_sync(crq)]++;
+       cfqq->queued[rq_is_sync(rq)]++;
 
        /*
         * looks a little odd, but the first insert might return an alias.
         * if that happens, put the alias on the dispatch list
         */
-       while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
+       while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
                cfq_dispatch_insert(cfqd->queue, __alias);
-
-       rb_insert_color(&crq->rb_node, &cfqq->sort_list);
-
-       if (!cfq_cfqq_on_rr(cfqq))
-               cfq_add_cfqq_rr(cfqd, cfqq);
-
-       /*
-        * check if this request is a better next-serve candidate
-        */
-       cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
 }
 
 static inline void
-cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
+cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 {
-       rb_erase(&crq->rb_node, &cfqq->sort_list);
-       cfqq->queued[cfq_crq_is_sync(crq)]--;
-
-       cfq_add_crq_rb(crq);
+       elv_rb_del(&cfqq->sort_list, rq);
+       cfqq->queued[rq_is_sync(rq)]--;
+       cfq_add_rq_rb(rq);
 }
 
 static struct request *
@@ -623,27 +472,14 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
        struct task_struct *tsk = current;
        pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio));
        struct cfq_queue *cfqq;
-       struct rb_node *n;
-       sector_t sector;
 
        cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
-       if (!cfqq)
-               goto out;
-
-       sector = bio->bi_sector + bio_sectors(bio);
-       n = cfqq->sort_list.rb_node;
-       while (n) {
-               struct cfq_rq *crq = rb_entry_crq(n);
+       if (cfqq) {
+               sector_t sector = bio->bi_sector + bio_sectors(bio);
 
-               if (sector < crq->rb_key)
-                       n = n->rb_left;
-               else if (sector > crq->rb_key)
-                       n = n->rb_right;
-               else
-                       return crq->request;
+               return elv_rb_find(&cfqq->sort_list, sector);
        }
 
-out:
        return NULL;
 }
 
@@ -673,11 +509,18 @@ static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
 
 static void cfq_remove_request(struct request *rq)
 {
-       struct cfq_rq *crq = RQ_DATA(rq);
+       struct cfq_queue *cfqq = RQ_CFQQ(rq);
+
+       if (cfqq->next_rq == rq)
+               cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
 
        list_del_init(&rq->queuelist);
-       cfq_del_crq_rb(crq);
-       cfq_del_crq_hash(crq);
+       cfq_del_rq_rb(rq);
+
+       if (rq_is_meta(rq)) {
+               WARN_ON(!cfqq->meta_pending);
+               cfqq->meta_pending--;
+       }
 }
 
 static int
@@ -685,39 +528,23 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct request *__rq;
-       int ret;
-
-       __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
-       if (__rq && elv_rq_merge_ok(__rq, bio)) {
-               ret = ELEVATOR_BACK_MERGE;
-               goto out;
-       }
 
        __rq = cfq_find_rq_fmerge(cfqd, bio);
        if (__rq && elv_rq_merge_ok(__rq, bio)) {
-               ret = ELEVATOR_FRONT_MERGE;
-               goto out;
+               *req = __rq;
+               return ELEVATOR_FRONT_MERGE;
        }
 
        return ELEVATOR_NO_MERGE;
-out:
-       *req = __rq;
-       return ret;
 }
 
-static void cfq_merged_request(request_queue_t *q, struct request *req)
+static void cfq_merged_request(request_queue_t *q, struct request *req,
+                              int type)
 {
-       struct cfq_data *cfqd = q->elevator->elevator_data;
-       struct cfq_rq *crq = RQ_DATA(req);
-
-       cfq_del_crq_hash(crq);
-       cfq_add_crq_hash(cfqd, crq);
-
-       if (rq_rb_key(req) != crq->rb_key) {
-               struct cfq_queue *cfqq = crq->cfq_queue;
+       if (type == ELEVATOR_FRONT_MERGE) {
+               struct cfq_queue *cfqq = RQ_CFQQ(req);
 
-               cfq_update_next_crq(crq);
-               cfq_reposition_crq_rb(cfqq, crq);
+               cfq_reposition_rq_rb(cfqq, req);
        }
 }
 
@@ -725,8 +552,6 @@ static void
 cfq_merged_requests(request_queue_t *q, struct request *rq,
                    struct request *next)
 {
-       cfq_merged_request(q, rq);
-
        /*
         * reposition in fifo if next is older than rq
         */
@@ -768,13 +593,12 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        if (cfq_cfqq_wait_request(cfqq))
                del_timer(&cfqd->idle_slice_timer);
 
-       if (!preempted && !cfq_cfqq_dispatched(cfqq)) {
-               cfqq->service_last = now;
+       if (!preempted && !cfq_cfqq_dispatched(cfqq))
                cfq_schedule_dispatch(cfqd);
-       }
 
        cfq_clear_cfqq_must_dispatch(cfqq);
        cfq_clear_cfqq_wait_request(cfqq);
+       cfq_clear_cfqq_queue_new(cfqq);
 
        /*
         * store what was left of this slice, if the queue idled out
@@ -868,26 +692,25 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
 {
        struct cfq_queue *cfqq = NULL;
 
-       /*
-        * if current list is non-empty, grab first entry. if it is empty,
-        * get next prio level and grab first entry then if any are spliced
-        */
-       if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
+       if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) {
+               /*
+                * if current list is non-empty, grab first entry. if it is
+                * empty, get next prio level and grab first entry then if any
+                * are spliced
+                */
                cfqq = list_entry_cfqq(cfqd->cur_rr.next);
-
-       /*
-        * If no new queues are available, check if the busy list has some
-        * before falling back to idle io.
-        */
-       if (!cfqq && !list_empty(&cfqd->busy_rr))
+       } else if (!list_empty(&cfqd->busy_rr)) {
+               /*
+                * If no new queues are available, check if the busy list has
+                * some before falling back to idle io.
+                */
                cfqq = list_entry_cfqq(cfqd->busy_rr.next);
-
-       /*
-        * if we have idle queues and no rt or be queues had pending
-        * requests, either allow immediate service if the grace period
-        * has passed or arm the idle grace timer
-        */
-       if (!cfqq && !list_empty(&cfqd->idle_rr)) {
+       } else if (!list_empty(&cfqd->idle_rr)) {
+               /*
+                * if we have idle queues and no rt or be queues had pending
+                * requests, either allow immediate service if the grace period
+                * has passed or arm the idle grace timer
+                */
                unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
 
                if (time_after_eq(jiffies, end))
@@ -942,16 +765,14 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
        return 1;
 }
 
-static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
+static void cfq_dispatch_insert(request_queue_t *q, struct request *rq)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
-       struct cfq_queue *cfqq = crq->cfq_queue;
-       struct request *rq;
+       struct cfq_queue *cfqq = RQ_CFQQ(rq);
 
-       cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
-       cfq_remove_request(crq->request);
-       cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
-       elv_dispatch_sort(q, crq->request);
+       cfq_remove_request(rq);
+       cfqq->on_dispatch[rq_is_sync(rq)]++;
+       elv_dispatch_sort(q, rq);
 
        rq = list_entry(q->queue_head.prev, struct request, queuelist);
        cfqd->last_sector = rq->sector + rq->nr_sectors;
@@ -960,24 +781,23 @@ static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
 /*
  * return expired entry, or NULL to just start from scratch in rbtree
  */
-static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
+static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq)
 {
        struct cfq_data *cfqd = cfqq->cfqd;
        struct request *rq;
-       struct cfq_rq *crq;
+       int fifo;
 
        if (cfq_cfqq_fifo_expire(cfqq))
                return NULL;
+       if (list_empty(&cfqq->fifo))
+               return NULL;
 
-       if (!list_empty(&cfqq->fifo)) {
-               int fifo = cfq_cfqq_class_sync(cfqq);
+       fifo = cfq_cfqq_class_sync(cfqq);
+       rq = rq_entry_fifo(cfqq->fifo.next);
 
-               crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));
-               rq = crq->request;
-               if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
-                       cfq_mark_cfqq_fifo_expire(cfqq);
-                       return crq;
-               }
+       if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
+               cfq_mark_cfqq_fifo_expire(cfqq);
+               return rq;
        }
 
        return NULL;
@@ -1063,25 +883,25 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
 
        do {
-               struct cfq_rq *crq;
+               struct request *rq;
 
                /*
                 * follow expired path, else get first next available
                 */
-               if ((crq = cfq_check_fifo(cfqq)) == NULL)
-                       crq = cfqq->next_crq;
+               if ((rq = cfq_check_fifo(cfqq)) == NULL)
+                       rq = cfqq->next_rq;
 
                /*
                 * finally, insert request into driver dispatch list
                 */
-               cfq_dispatch_insert(cfqd->queue, crq);
+               cfq_dispatch_insert(cfqd->queue, rq);
 
                cfqd->dispatch_slice++;
                dispatched++;
 
                if (!cfqd->active_cic) {
-                       atomic_inc(&crq->io_context->ioc->refcount);
-                       cfqd->active_cic = crq->io_context;
+                       atomic_inc(&RQ_CIC(rq)->ioc->refcount);
+                       cfqd->active_cic = RQ_CIC(rq);
                }
 
                if (RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -1112,13 +932,12 @@ static int
 cfq_forced_dispatch_cfqqs(struct list_head *list)
 {
        struct cfq_queue *cfqq, *next;
-       struct cfq_rq *crq;
        int dispatched;
 
        dispatched = 0;
        list_for_each_entry_safe(cfqq, next, list, cfq_list) {
-               while ((crq = cfqq->next_crq)) {
-                       cfq_dispatch_insert(cfqq->cfqd->queue, crq);
+               while (cfqq->next_rq) {
+                       cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
                        dispatched++;
                }
                BUG_ON(!list_empty(&cfqq->fifo));
@@ -1194,8 +1013,8 @@ cfq_dispatch_requests(request_queue_t *q, int force)
 }
 
 /*
- * task holds one reference to the queue, dropped when task exits. each crq
- * in-flight on this queue also holds a reference, dropped when crq is freed.
+ * task holds one reference to the queue, dropped when task exits. each rq
+ * in-flight on this queue also holds a reference, dropped when rq is freed.
  *
  * queue lock must be held here.
  */
@@ -1223,7 +1042,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
        kmem_cache_free(cfq_pool, cfqq);
 }
 
-static inline struct cfq_queue *
+static struct cfq_queue *
 __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
                    const int hashval)
 {
@@ -1260,62 +1079,63 @@ static void cfq_free_io_context(struct io_context *ioc)
                freed++;
        }
 
-       if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone)
+       elv_ioc_count_mod(ioc_count, -freed);
+
+       if (ioc_gone && !elv_ioc_count_read(ioc_count))
                complete(ioc_gone);
 }
 
-static void cfq_trim(struct io_context *ioc)
+static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-       ioc->set_ioprio = NULL;
-       cfq_free_io_context(ioc);
+       if (unlikely(cfqq == cfqd->active_queue))
+               __cfq_slice_expired(cfqd, cfqq, 0);
+
+       cfq_put_queue(cfqq);
 }
 
-/*
- * Called with interrupts disabled
- */
-static void cfq_exit_single_io_context(struct cfq_io_context *cic)
+static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
+                                        struct cfq_io_context *cic)
 {
-       struct cfq_data *cfqd = cic->key;
-       request_queue_t *q;
-
-       if (!cfqd)
-               return;
-
-       q = cfqd->queue;
-
-       WARN_ON(!irqs_disabled());
-
-       spin_lock(q->queue_lock);
+       list_del_init(&cic->queue_list);
+       smp_wmb();
+       cic->key = NULL;
 
        if (cic->cfqq[ASYNC]) {
-               if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue))
-                       __cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0);
-               cfq_put_queue(cic->cfqq[ASYNC]);
+               cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
                cic->cfqq[ASYNC] = NULL;
        }
 
        if (cic->cfqq[SYNC]) {
-               if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue))
-                       __cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0);
-               cfq_put_queue(cic->cfqq[SYNC]);
+               cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]);
                cic->cfqq[SYNC] = NULL;
        }
+}
 
-       cic->key = NULL;
-       list_del_init(&cic->queue_list);
-       spin_unlock(q->queue_lock);
+
+/*
+ * Called with interrupts disabled
+ */
+static void cfq_exit_single_io_context(struct cfq_io_context *cic)
+{
+       struct cfq_data *cfqd = cic->key;
+
+       if (cfqd) {
+               request_queue_t *q = cfqd->queue;
+
+               spin_lock_irq(q->queue_lock);
+               __cfq_exit_single_io_context(cfqd, cic);
+               spin_unlock_irq(q->queue_lock);
+       }
 }
 
 static void cfq_exit_io_context(struct io_context *ioc)
 {
        struct cfq_io_context *__cic;
-       unsigned long flags;
        struct rb_node *n;
 
        /*
         * put the reference this task is holding to the various queues
         */
-       spin_lock_irqsave(&cfq_exit_lock, flags);
 
        n = rb_first(&ioc->cic_root);
        while (n != NULL) {
@@ -1324,22 +1144,21 @@ static void cfq_exit_io_context(struct io_context *ioc)
                cfq_exit_single_io_context(__cic);
                n = rb_next(n);
        }
-
-       spin_unlock_irqrestore(&cfq_exit_lock, flags);
 }
 
 static struct cfq_io_context *
 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
-       struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
+       struct cfq_io_context *cic;
 
+       cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node);
        if (cic) {
                memset(cic, 0, sizeof(*cic));
                cic->last_end_request = jiffies;
                INIT_LIST_HEAD(&cic->queue_list);
                cic->dtor = cfq_free_io_context;
                cic->exit = cfq_exit_io_context;
-               atomic_inc(&ioc_count);
+               elv_ioc_count_inc(ioc_count);
        }
 
        return cic;
@@ -1420,15 +1239,12 @@ static inline void changed_ioprio(struct cfq_io_context *cic)
        spin_unlock(cfqd->queue->queue_lock);
 }
 
-/*
- * callback from sys_ioprio_set, irqs are disabled
- */
-static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
+static void cfq_ioc_set_ioprio(struct io_context *ioc)
 {
        struct cfq_io_context *cic;
        struct rb_node *n;
 
-       spin_lock(&cfq_exit_lock);
+       ioc->ioprio_changed = 0;
 
        n = rb_first(&ioc->cic_root);
        while (n != NULL) {
@@ -1437,10 +1253,6 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
                changed_ioprio(cic);
                n = rb_next(n);
        }
-
-       spin_unlock(&cfq_exit_lock);
-
-       return 0;
 }
 
 static struct cfq_queue *
@@ -1460,12 +1272,18 @@ retry:
                        cfqq = new_cfqq;
                        new_cfqq = NULL;
                } else if (gfp_mask & __GFP_WAIT) {
+                       /*
+                        * Inform the allocator of the fact that we will
+                        * just repeat this allocation if it fails, to allow
+                        * the allocator to do whatever it needs to attempt to
+                        * free memory.
+                        */
                        spin_unlock_irq(cfqd->queue->queue_lock);
-                       new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+                       new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node);
                        spin_lock_irq(cfqd->queue->queue_lock);
                        goto retry;
                } else {
-                       cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+                       cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node);
                        if (!cfqq)
                                goto out;
                }
@@ -1480,13 +1298,13 @@ retry:
                hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
                atomic_set(&cfqq->ref, 0);
                cfqq->cfqd = cfqd;
-               cfqq->service_last = 0;
                /*
                 * set ->slice_left to allow preemption for a new process
                 */
                cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
                cfq_mark_cfqq_idle_window(cfqq);
                cfq_mark_cfqq_prio_changed(cfqq);
+               cfq_mark_cfqq_queue_new(cfqq);
                cfq_init_prio_data(cfqq);
        }
 
@@ -1502,12 +1320,10 @@ out:
 static void
 cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
 {
-       spin_lock(&cfq_exit_lock);
+       WARN_ON(!list_empty(&cic->queue_list));
        rb_erase(&cic->rb_node, &ioc->cic_root);
-       list_del_init(&cic->queue_list);
-       spin_unlock(&cfq_exit_lock);
        kmem_cache_free(cfq_ioc_pool, cic);
-       atomic_dec(&ioc_count);
+       elv_ioc_count_dec(ioc_count);
 }
 
 static struct cfq_io_context *
@@ -1551,7 +1367,6 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
        cic->ioc = ioc;
        cic->key = cfqd;
 
-       ioc->set_ioprio = cfq_ioc_set_ioprio;
 restart:
        parent = NULL;
        p = &ioc->cic_root.rb_node;
@@ -1573,11 +1388,12 @@ restart:
                        BUG();
        }
 
-       spin_lock(&cfq_exit_lock);
        rb_link_node(&cic->rb_node, parent, p);
        rb_insert_color(&cic->rb_node, &ioc->cic_root);
+
+       spin_lock_irq(cfqd->queue->queue_lock);
        list_add(&cic->queue_list, &cfqd->cic_list);
-       spin_unlock(&cfq_exit_lock);
+       spin_unlock_irq(cfqd->queue->queue_lock);
 }
 
 /*
@@ -1593,7 +1409,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 
        might_sleep_if(gfp_mask & __GFP_WAIT);
 
-       ioc = get_io_context(gfp_mask);
+       ioc = get_io_context(gfp_mask, cfqd->queue->node);
        if (!ioc)
                return NULL;
 
@@ -1607,6 +1423,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 
        cfq_cic_link(cfqd, ioc, cic);
 out:
+       smp_read_barrier_depends();
+       if (unlikely(ioc->ioprio_changed))
+               cfq_ioc_set_ioprio(ioc);
+
        return cic;
 err:
        put_io_context(ioc);
@@ -1640,15 +1460,15 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
 
 static void
 cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
-                      struct cfq_rq *crq)
+                      struct request *rq)
 {
        sector_t sdist;
        u64 total;
 
-       if (cic->last_request_pos < crq->request->sector)
-               sdist = crq->request->sector - cic->last_request_pos;
+       if (cic->last_request_pos < rq->sector)
+               sdist = rq->sector - cic->last_request_pos;
        else
-               sdist = cic->last_request_pos - crq->request->sector;
+               sdist = cic->last_request_pos - rq->sector;
 
        /*
         * Don't allow the seek distance to get too large from the
@@ -1699,7 +1519,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
  */
 static int
 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
-                  struct cfq_rq *crq)
+                  struct request *rq)
 {
        struct cfq_queue *cfqq = cfqd->active_queue;
 
@@ -1718,7 +1538,17 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
         */
        if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
                return 0;
-       if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq))
+       /*
+        * if the new request is sync, but the currently running queue is
+        * not, let the sync request have priority.
+        */
+       if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
+               return 1;
+       /*
+        * So both queues are sync. Let the new request get disk time if
+        * it's a metadata request and the current queue is doing regular IO.
+        */
+       if (rq_is_meta(rq) && !cfqq->meta_pending)
                return 1;
 
        return 0;
@@ -1730,47 +1560,45 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
  */
 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-       struct cfq_queue *__cfqq, *next;
-
-       list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)
-               cfq_resort_rr_list(__cfqq, 1);
+       cfq_slice_expired(cfqd, 1);
 
        if (!cfqq->slice_left)
                cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
 
-       cfqq->slice_end = cfqq->slice_left + jiffies;
-       cfq_slice_expired(cfqd, 1);
-       __cfq_set_active_queue(cfqd, cfqq);
-}
-
-/*
- * should really be a ll_rw_blk.c helper
- */
-static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
-       request_queue_t *q = cfqd->queue;
+       /*
+        * Put the new queue at the front of the of the current list,
+        * so we know that it will be selected next.
+        */
+       BUG_ON(!cfq_cfqq_on_rr(cfqq));
+       list_move(&cfqq->cfq_list, &cfqd->cur_rr);
 
-       if (!blk_queue_plugged(q))
-               q->request_fn(q);
-       else
-               __generic_unplug_device(q);
+       cfqq->slice_end = cfqq->slice_left + jiffies;
 }
 
 /*
- * Called when a new fs request (crq) is added (to cfqq). Check if there's
+ * Called when a new fs request (rq) is added (to cfqq). Check if there's
  * something we should do about it
  */
 static void
-cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-                struct cfq_rq *crq)
+cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+               struct request *rq)
 {
-       struct cfq_io_context *cic = crq->io_context;
+       struct cfq_io_context *cic = RQ_CIC(rq);
+
+       if (rq_is_meta(rq))
+               cfqq->meta_pending++;
+
+       /*
+        * check if this request is a better next-serve candidate)) {
+        */
+       cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
+       BUG_ON(!cfqq->next_rq);
 
        /*
         * we never wait for an async request and we don't allow preemption
         * of an async request. so just return early
         */
-       if (!cfq_crq_is_sync(crq)) {
+       if (!rq_is_sync(rq)) {
                /*
                 * sync process issued an async request, if it's waiting
                 * then expire it and kick rq handling.
@@ -1778,17 +1606,17 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                if (cic == cfqd->active_cic &&
                    del_timer(&cfqd->idle_slice_timer)) {
                        cfq_slice_expired(cfqd, 0);
-                       cfq_start_queueing(cfqd, cfqq);
+                       blk_start_queueing(cfqd->queue);
                }
                return;
        }
 
        cfq_update_io_thinktime(cfqd, cic);
-       cfq_update_io_seektime(cfqd, cic, crq);
+       cfq_update_io_seektime(cfqd, cic, rq);
        cfq_update_idle_window(cfqd, cfqq, cic);
 
        cic->last_queue = jiffies;
-       cic->last_request_pos = crq->request->sector + crq->request->nr_sectors;
+       cic->last_request_pos = rq->sector + rq->nr_sectors;
 
        if (cfqq == cfqd->active_queue) {
                /*
@@ -1799,9 +1627,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                if (cfq_cfqq_wait_request(cfqq)) {
                        cfq_mark_cfqq_must_dispatch(cfqq);
                        del_timer(&cfqd->idle_slice_timer);
-                       cfq_start_queueing(cfqd, cfqq);
+                       blk_start_queueing(cfqd->queue);
                }
-       } else if (cfq_should_preempt(cfqd, cfqq, crq)) {
+       } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
                /*
                 * not the active queue - expire current slice if it is
                 * idle and has expired it's mean thinktime or this new queue
@@ -1809,34 +1637,32 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                 */
                cfq_preempt_queue(cfqd, cfqq);
                cfq_mark_cfqq_must_dispatch(cfqq);
-               cfq_start_queueing(cfqd, cfqq);
+               blk_start_queueing(cfqd->queue);
        }
 }
 
 static void cfq_insert_request(request_queue_t *q, struct request *rq)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
-       struct cfq_rq *crq = RQ_DATA(rq);
-       struct cfq_queue *cfqq = crq->cfq_queue;
+       struct cfq_queue *cfqq = RQ_CFQQ(rq);
 
        cfq_init_prio_data(cfqq);
 
-       cfq_add_crq_rb(crq);
+       cfq_add_rq_rb(rq);
 
-       list_add_tail(&rq->queuelist, &cfqq->fifo);
+       if (!cfq_cfqq_on_rr(cfqq))
+               cfq_add_cfqq_rr(cfqd, cfqq);
 
-       if (rq_mergeable(rq))
-               cfq_add_crq_hash(cfqd, crq);
+       list_add_tail(&rq->queuelist, &cfqq->fifo);
 
-       cfq_crq_enqueued(cfqd, cfqq, crq);
+       cfq_rq_enqueued(cfqd, cfqq, rq);
 }
 
 static void cfq_completed_request(request_queue_t *q, struct request *rq)
 {
-       struct cfq_rq *crq = RQ_DATA(rq);
-       struct cfq_queue *cfqq = crq->cfq_queue;
+       struct cfq_queue *cfqq = RQ_CFQQ(rq);
        struct cfq_data *cfqd = cfqq->cfqd;
-       const int sync = cfq_crq_is_sync(crq);
+       const int sync = rq_is_sync(rq);
        unsigned long now;
 
        now = jiffies;
@@ -1849,15 +1675,11 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
        if (!cfq_class_idle(cfqq))
                cfqd->last_end_request = now;
 
-       if (!cfq_cfqq_dispatched(cfqq)) {
-               if (cfq_cfqq_on_rr(cfqq)) {
-                       cfqq->service_last = now;
-                       cfq_resort_rr_list(cfqq, 0);
-               }
-       }
+       if (!cfq_cfqq_dispatched(cfqq) && cfq_cfqq_on_rr(cfqq))
+               cfq_resort_rr_list(cfqq, 0);
 
        if (sync)
-               crq->io_context->last_end_request = now;
+               RQ_CIC(rq)->last_end_request = now;
 
        /*
         * If this is the active queue, check if it needs to be expired,
@@ -1873,30 +1695,6 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
        }
 }
 
-static struct request *
-cfq_former_request(request_queue_t *q, struct request *rq)
-{
-       struct cfq_rq *crq = RQ_DATA(rq);
-       struct rb_node *rbprev = rb_prev(&crq->rb_node);
-
-       if (rbprev)
-               return rb_entry_crq(rbprev)->request;
-
-       return NULL;
-}
-
-static struct request *
-cfq_latter_request(request_queue_t *q, struct request *rq)
-{
-       struct cfq_rq *crq = RQ_DATA(rq);
-       struct rb_node *rbnext = rb_next(&crq->rb_node);
-
-       if (rbnext)
-               return rb_entry_crq(rbnext)->request;
-
-       return NULL;
-}
-
 /*
  * we temporarily boost lower priority queues if they are holding fs exclusive
  * resources. they are boosted to normal prio (CLASS_BE/4)
@@ -1933,9 +1731,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
                cfq_resort_rr_list(cfqq, 0);
 }
 
-static inline int
-__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-               struct task_struct *task, int rw)
+static inline int __cfq_may_queue(struct cfq_queue *cfqq)
 {
        if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
            !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -1946,7 +1742,7 @@ __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        return ELV_MQUEUE_MAY;
 }
 
-static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
+static int cfq_may_queue(request_queue_t *q, int rw)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct task_struct *tsk = current;
@@ -1963,48 +1759,30 @@ static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
                cfq_init_prio_data(cfqq);
                cfq_prio_boost(cfqq);
 
-               return __cfq_may_queue(cfqd, cfqq, tsk, rw);
+               return __cfq_may_queue(cfqq);
        }
 
        return ELV_MQUEUE_MAY;
 }
 
-static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
-{
-       struct cfq_data *cfqd = q->elevator->elevator_data;
-
-       if (unlikely(cfqd->rq_starved)) {
-               struct request_list *rl = &q->rq;
-
-               smp_mb();
-               if (waitqueue_active(&rl->wait[READ]))
-                       wake_up(&rl->wait[READ]);
-               if (waitqueue_active(&rl->wait[WRITE]))
-                       wake_up(&rl->wait[WRITE]);
-       }
-}
-
 /*
  * queue lock held here
  */
 static void cfq_put_request(request_queue_t *q, struct request *rq)
 {
-       struct cfq_data *cfqd = q->elevator->elevator_data;
-       struct cfq_rq *crq = RQ_DATA(rq);
+       struct cfq_queue *cfqq = RQ_CFQQ(rq);
 
-       if (crq) {
-               struct cfq_queue *cfqq = crq->cfq_queue;
+       if (cfqq) {
                const int rw = rq_data_dir(rq);
 
                BUG_ON(!cfqq->allocated[rw]);
                cfqq->allocated[rw]--;
 
-               put_io_context(crq->io_context->ioc);
+               put_io_context(RQ_CIC(rq)->ioc);
 
-               mempool_free(crq, cfqd->crq_pool);
                rq->elevator_private = NULL;
+               rq->elevator_private2 = NULL;
 
-               cfq_check_waiters(q, cfqq);
                cfq_put_queue(cfqq);
        }
 }
@@ -2013,8 +1791,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq)
  * Allocate cfq data structures associated with this request.
  */
 static int
-cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-               gfp_t gfp_mask)
+cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct task_struct *tsk = current;
@@ -2022,7 +1799,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
        const int rw = rq_data_dir(rq);
        pid_t key = cfq_queue_pid(tsk, rw);
        struct cfq_queue *cfqq;
-       struct cfq_rq *crq;
        unsigned long flags;
        int is_sync = key != CFQ_KEY_ASYNC;
 
@@ -2046,42 +1822,18 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 
        cfqq->allocated[rw]++;
        cfq_clear_cfqq_must_alloc(cfqq);
-       cfqd->rq_starved = 0;
        atomic_inc(&cfqq->ref);
-       spin_unlock_irqrestore(q->queue_lock, flags);
 
-       crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
-       if (crq) {
-               RB_CLEAR_NODE(&crq->rb_node);
-               crq->rb_key = 0;
-               crq->request = rq;
-               INIT_HLIST_NODE(&crq->hash);
-               crq->cfq_queue = cfqq;
-               crq->io_context = cic;
-
-               if (is_sync)
-                       cfq_mark_crq_is_sync(crq);
-               else
-                       cfq_clear_crq_is_sync(crq);
+       spin_unlock_irqrestore(q->queue_lock, flags);
 
-               rq->elevator_private = crq;
-               return 0;
-       }
+       rq->elevator_private = cic;
+       rq->elevator_private2 = cfqq;
+       return 0;
 
-       spin_lock_irqsave(q->queue_lock, flags);
-       cfqq->allocated[rw]--;
-       if (!(cfqq->allocated[0] + cfqq->allocated[1]))
-               cfq_mark_cfqq_must_alloc(cfqq);
-       cfq_put_queue(cfqq);
 queue_fail:
        if (cic)
                put_io_context(cic->ioc);
-       /*
-        * mark us rq allocation starved. we need to kickstart the process
-        * ourselves if there are no pending requests that can do it for us.
-        * that would be an extremely rare OOM situation
-        */
-       cfqd->rq_starved = 1;
+
        cfq_schedule_dispatch(cfqd);
        spin_unlock_irqrestore(q->queue_lock, flags);
        return 1;
@@ -2090,27 +1842,10 @@ queue_fail:
 static void cfq_kick_queue(void *data)
 {
        request_queue_t *q = data;
-       struct cfq_data *cfqd = q->elevator->elevator_data;
        unsigned long flags;
 
        spin_lock_irqsave(q->queue_lock, flags);
-
-       if (cfqd->rq_starved) {
-               struct request_list *rl = &q->rq;
-
-               /*
-                * we aren't guaranteed to get a request after this, but we
-                * have to be opportunistic
-                */
-               smp_mb();
-               if (waitqueue_active(&rl->wait[READ]))
-                       wake_up(&rl->wait[READ]);
-               if (waitqueue_active(&rl->wait[WRITE]))
-                       wake_up(&rl->wait[WRITE]);
-       }
-
-       blk_remove_plug(q);
-       q->request_fn(q);
+       blk_start_queueing(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -2193,7 +1928,6 @@ static void cfq_exit_queue(elevator_t *e)
 
        cfq_shutdown_timer_wq(cfqd);
 
-       spin_lock(&cfq_exit_lock);
        spin_lock_irq(q->queue_lock);
 
        if (cfqd->active_queue)
@@ -2203,25 +1937,14 @@ static void cfq_exit_queue(elevator_t *e)
                struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
                                                        struct cfq_io_context,
                                                        queue_list);
-               if (cic->cfqq[ASYNC]) {
-                       cfq_put_queue(cic->cfqq[ASYNC]);
-                       cic->cfqq[ASYNC] = NULL;
-               }
-               if (cic->cfqq[SYNC]) {
-                       cfq_put_queue(cic->cfqq[SYNC]);
-                       cic->cfqq[SYNC] = NULL;
-               }
-               cic->key = NULL;
-               list_del_init(&cic->queue_list);
+
+               __cfq_exit_single_io_context(cfqd, cic);
        }
 
        spin_unlock_irq(q->queue_lock);
-       spin_unlock(&cfq_exit_lock);
 
        cfq_shutdown_timer_wq(cfqd);
 
-       mempool_destroy(cfqd->crq_pool);
-       kfree(cfqd->crq_hash);
        kfree(cfqd->cfq_hash);
        kfree(cfqd);
 }
@@ -2231,7 +1954,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
        struct cfq_data *cfqd;
        int i;
 
-       cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
+       cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
        if (!cfqd)
                return NULL;
 
@@ -2243,23 +1966,12 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
        INIT_LIST_HEAD(&cfqd->busy_rr);
        INIT_LIST_HEAD(&cfqd->cur_rr);
        INIT_LIST_HEAD(&cfqd->idle_rr);
-       INIT_LIST_HEAD(&cfqd->empty_list);
        INIT_LIST_HEAD(&cfqd->cic_list);
 
-       cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
-       if (!cfqd->crq_hash)
-               goto out_crqhash;
-
-       cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
+       cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node);
        if (!cfqd->cfq_hash)
-               goto out_cfqhash;
-
-       cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool);
-       if (!cfqd->crq_pool)
-               goto out_crqpool;
+               goto out_free;
 
-       for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
-               INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
        for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
                INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
 
@@ -2275,7 +1987,6 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 
        INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
 
-       cfqd->cfq_queued = cfq_queued;
        cfqd->cfq_quantum = cfq_quantum;
        cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
        cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
@@ -2287,19 +1998,13 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
        cfqd->cfq_slice_idle = cfq_slice_idle;
 
        return cfqd;
-out_crqpool:
-       kfree(cfqd->cfq_hash);
-out_cfqhash:
-       kfree(cfqd->crq_hash);
-out_crqhash:
+out_free:
        kfree(cfqd);
        return NULL;
 }
 
 static void cfq_slab_kill(void)
 {
-       if (crq_pool)
-               kmem_cache_destroy(crq_pool);
        if (cfq_pool)
                kmem_cache_destroy(cfq_pool);
        if (cfq_ioc_pool)
@@ -2308,11 +2013,6 @@ static void cfq_slab_kill(void)
 
 static int __init cfq_slab_setup(void)
 {
-       crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
-                                       NULL, NULL);
-       if (!crq_pool)
-               goto fail;
-
        cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
                                        NULL, NULL);
        if (!cfq_pool)
@@ -2358,7 +2058,6 @@ static ssize_t __FUNC(elevator_t *e, char *page)                  \
        return cfq_var_show(__data, (page));                            \
 }
 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
-SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
 SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
 SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
 SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
@@ -2386,7 +2085,6 @@ static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)      \
        return ret;                                                     \
 }
 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
-STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
 STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
@@ -2402,7 +2100,6 @@ STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX,
 
 static struct elv_fs_entry cfq_attrs[] = {
        CFQ_ATTR(quantum),
-       CFQ_ATTR(queued),
        CFQ_ATTR(fifo_expire_sync),
        CFQ_ATTR(fifo_expire_async),
        CFQ_ATTR(back_seek_max),
@@ -2425,14 +2122,14 @@ static struct elevator_type iosched_cfq = {
                .elevator_deactivate_req_fn =   cfq_deactivate_request,
                .elevator_queue_empty_fn =      cfq_queue_empty,
                .elevator_completed_req_fn =    cfq_completed_request,
-               .elevator_former_req_fn =       cfq_former_request,
-               .elevator_latter_req_fn =       cfq_latter_request,
+               .elevator_former_req_fn =       elv_rb_former_request,
+               .elevator_latter_req_fn =       elv_rb_latter_request,
                .elevator_set_req_fn =          cfq_set_request,
                .elevator_put_req_fn =          cfq_put_request,
                .elevator_may_queue_fn =        cfq_may_queue,
                .elevator_init_fn =             cfq_init_queue,
                .elevator_exit_fn =             cfq_exit_queue,
-               .trim =                         cfq_trim,
+               .trim =                         cfq_free_io_context,
        },
        .elevator_attrs =       cfq_attrs,
        .elevator_name =        "cfq",
@@ -2468,7 +2165,7 @@ static void __exit cfq_exit(void)
        ioc_gone = &all_gone;
        /* ioc_gone's update must be visible before reading ioc_count */
        smp_wmb();
-       if (atomic_read(&ioc_count))
+       if (elv_ioc_count_read(ioc_count))
                wait_for_completion(ioc_gone);
        synchronize_rcu();
        cfq_slab_kill();
index c7ca9f0b64989cdda8a16a56593fd52f2a251470..b7c5b34cb7b43b4688b3f4bbda5951726bbb008a 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *  Deadline i/o scheduler.
  *
- *  Copyright (C) 2002 Jens Axboe <axboe@suse.de>
+ *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
  */
 #include <linux/kernel.h>
 #include <linux/fs.h>
@@ -12,7 +12,6 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
-#include <linux/hash.h>
 #include <linux/rbtree.h>
 
 /*
@@ -24,13 +23,6 @@ static const int writes_starved = 2;    /* max times reads can starve a write */
 static const int fifo_batch = 16;       /* # of sequential requests treated as one
                                     by the above parameters. For throughput. */
 
-static const int deadline_hash_shift = 5;
-#define DL_HASH_BLOCK(sec)     ((sec) >> 3)
-#define DL_HASH_FN(sec)                (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
-#define DL_HASH_ENTRIES                (1 << deadline_hash_shift)
-#define rq_hash_key(rq)                ((rq)->sector + (rq)->nr_sectors)
-#define ON_HASH(drq)           (!hlist_unhashed(&(drq)->hash))
-
 struct deadline_data {
        /*
         * run time data
@@ -45,8 +37,7 @@ struct deadline_data {
        /*
         * next in sort order. read, write or both are NULL
         */
-       struct deadline_rq *next_drq[2];
-       struct hlist_head *hash;        /* request hash */
+       struct request *next_rq[2];
        unsigned int batching;          /* number of sequential requests made */
        sector_t last_sector;           /* head position */
        unsigned int starved;           /* times reads have starved writes */
@@ -58,240 +49,69 @@ struct deadline_data {
        int fifo_batch;
        int writes_starved;
        int front_merges;
-
-       mempool_t *drq_pool;
 };
 
-/*
- * pre-request data.
- */
-struct deadline_rq {
-       /*
-        * rbtree index, key is the starting offset
-        */
-       struct rb_node rb_node;
-       sector_t rb_key;
-
-       struct request *request;
-
-       /*
-        * request hash, key is the ending offset (for back merge lookup)
-        */
-       struct hlist_node hash;
-
-       /*
-        * expire fifo
-        */
-       struct list_head fifo;
-       unsigned long expires;
-};
-
-static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq);
-
-static kmem_cache_t *drq_pool;
-
-#define RQ_DATA(rq)    ((struct deadline_rq *) (rq)->elevator_private)
+static void deadline_move_request(struct deadline_data *, struct request *);
 
-/*
- * the back merge hash support functions
- */
-static inline void __deadline_del_drq_hash(struct deadline_rq *drq)
-{
-       hlist_del_init(&drq->hash);
-}
-
-static inline void deadline_del_drq_hash(struct deadline_rq *drq)
-{
-       if (ON_HASH(drq))
-               __deadline_del_drq_hash(drq);
-}
-
-static inline void
-deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
-{
-       struct request *rq = drq->request;
-
-       BUG_ON(ON_HASH(drq));
-
-       hlist_add_head(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
-}
-
-/*
- * move hot entry to front of chain
- */
-static inline void
-deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
-{
-       struct request *rq = drq->request;
-       struct hlist_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];
-
-       if (ON_HASH(drq) && &drq->hash != head->first) {
-               hlist_del(&drq->hash);
-               hlist_add_head(&drq->hash, head);
-       }
-}
-
-static struct request *
-deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
-{
-       struct hlist_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
-       struct hlist_node *entry, *next;
-       struct deadline_rq *drq;
-
-       hlist_for_each_entry_safe(drq, entry, next, hash_list, hash) {
-               struct request *__rq = drq->request;
-
-               BUG_ON(!ON_HASH(drq));
-
-               if (!rq_mergeable(__rq)) {
-                       __deadline_del_drq_hash(drq);
-                       continue;
-               }
-
-               if (rq_hash_key(__rq) == offset)
-                       return __rq;
-       }
-
-       return NULL;
-}
-
-/*
- * rb tree support functions
- */
-#define rb_entry_drq(node)     rb_entry((node), struct deadline_rq, rb_node)
-#define DRQ_RB_ROOT(dd, drq)   (&(dd)->sort_list[rq_data_dir((drq)->request)])
-#define rq_rb_key(rq)          (rq)->sector
-
-static struct deadline_rq *
-__deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
-{
-       struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
-       struct rb_node *parent = NULL;
-       struct deadline_rq *__drq;
-
-       while (*p) {
-               parent = *p;
-               __drq = rb_entry_drq(parent);
-
-               if (drq->rb_key < __drq->rb_key)
-                       p = &(*p)->rb_left;
-               else if (drq->rb_key > __drq->rb_key)
-                       p = &(*p)->rb_right;
-               else
-                       return __drq;
-       }
-
-       rb_link_node(&drq->rb_node, parent, p);
-       return NULL;
-}
+#define RQ_RB_ROOT(dd, rq)     (&(dd)->sort_list[rq_data_dir((rq))])
 
 static void
-deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
 {
-       struct deadline_rq *__alias;
-
-       drq->rb_key = rq_rb_key(drq->request);
+       struct rb_root *root = RQ_RB_ROOT(dd, rq);
+       struct request *__alias;
 
 retry:
-       __alias = __deadline_add_drq_rb(dd, drq);
-       if (!__alias) {
-               rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
-               return;
+       __alias = elv_rb_add(root, rq);
+       if (unlikely(__alias)) {
+               deadline_move_request(dd, __alias);
+               goto retry;
        }
-
-       deadline_move_request(dd, __alias);
-       goto retry;
 }
 
 static inline void
-deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
 {
-       const int data_dir = rq_data_dir(drq->request);
+       const int data_dir = rq_data_dir(rq);
 
-       if (dd->next_drq[data_dir] == drq) {
-               struct rb_node *rbnext = rb_next(&drq->rb_node);
+       if (dd->next_rq[data_dir] == rq) {
+               struct rb_node *rbnext = rb_next(&rq->rb_node);
 
-               dd->next_drq[data_dir] = NULL;
+               dd->next_rq[data_dir] = NULL;
                if (rbnext)
-                       dd->next_drq[data_dir] = rb_entry_drq(rbnext);
-       }
-
-       BUG_ON(!RB_EMPTY_NODE(&drq->rb_node));
-       rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
-       RB_CLEAR_NODE(&drq->rb_node);
-}
-
-static struct request *
-deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
-{
-       struct rb_node *n = dd->sort_list[data_dir].rb_node;
-       struct deadline_rq *drq;
-
-       while (n) {
-               drq = rb_entry_drq(n);
-
-               if (sector < drq->rb_key)
-                       n = n->rb_left;
-               else if (sector > drq->rb_key)
-                       n = n->rb_right;
-               else
-                       return drq->request;
+                       dd->next_rq[data_dir] = rb_entry_rq(rbnext);
        }
 
-       return NULL;
+       elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
 }
 
 /*
- * deadline_find_first_drq finds the first (lowest sector numbered) request
- * for the specified data_dir. Used to sweep back to the start of the disk
- * (1-way elevator) after we process the last (highest sector) request.
- */
-static struct deadline_rq *
-deadline_find_first_drq(struct deadline_data *dd, int data_dir)
-{
-       struct rb_node *n = dd->sort_list[data_dir].rb_node;
-
-       for (;;) {
-               if (n->rb_left == NULL)
-                       return rb_entry_drq(n);
-               
-               n = n->rb_left;
-       }
-}
-
-/*
- * add drq to rbtree and fifo
+ * add rq to rbtree and fifo
  */
 static void
 deadline_add_request(struct request_queue *q, struct request *rq)
 {
        struct deadline_data *dd = q->elevator->elevator_data;
-       struct deadline_rq *drq = RQ_DATA(rq);
+       const int data_dir = rq_data_dir(rq);
 
-       const int data_dir = rq_data_dir(drq->request);
+       deadline_add_rq_rb(dd, rq);
 
-       deadline_add_drq_rb(dd, drq);
        /*
         * set expire time (only used for reads) and add to fifo list
         */
-       drq->expires = jiffies + dd->fifo_expire[data_dir];
-       list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
-
-       if (rq_mergeable(rq))
-               deadline_add_drq_hash(dd, drq);
+       rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
+       list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
 }
 
 /*
- * remove rq from rbtree, fifo, and hash
+ * remove rq from rbtree and fifo.
  */
 static void deadline_remove_request(request_queue_t *q, struct request *rq)
 {
-       struct deadline_rq *drq = RQ_DATA(rq);
        struct deadline_data *dd = q->elevator->elevator_data;
 
-       list_del_init(&drq->fifo);
-       deadline_del_drq_rb(dd, drq);
-       deadline_del_drq_hash(drq);
+       rq_fifo_clear(rq);
+       deadline_del_rq_rb(dd, rq);
 }
 
 static int
@@ -301,28 +121,15 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
        struct request *__rq;
        int ret;
 
-       /*
-        * see if the merge hash can satisfy a back merge
-        */
-       __rq = deadline_find_drq_hash(dd, bio->bi_sector);
-       if (__rq) {
-               BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
-               if (elv_rq_merge_ok(__rq, bio)) {
-                       ret = ELEVATOR_BACK_MERGE;
-                       goto out;
-               }
-       }
-
        /*
         * check for front merge
         */
        if (dd->front_merges) {
-               sector_t rb_key = bio->bi_sector + bio_sectors(bio);
+               sector_t sector = bio->bi_sector + bio_sectors(bio);
 
-               __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio));
+               __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
                if (__rq) {
-                       BUG_ON(rb_key != rq_rb_key(__rq));
+                       BUG_ON(sector != __rq->sector);
 
                        if (elv_rq_merge_ok(__rq, bio)) {
                                ret = ELEVATOR_FRONT_MERGE;
@@ -333,29 +140,21 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
        return ELEVATOR_NO_MERGE;
 out:
-       if (ret)
-               deadline_hot_drq_hash(dd, RQ_DATA(__rq));
        *req = __rq;
        return ret;
 }
 
-static void deadline_merged_request(request_queue_t *q, struct request *req)
+static void deadline_merged_request(request_queue_t *q, struct request *req,
+                                   int type)
 {
        struct deadline_data *dd = q->elevator->elevator_data;
-       struct deadline_rq *drq = RQ_DATA(req);
-
-       /*
-        * hash always needs to be repositioned, key is end sector
-        */
-       deadline_del_drq_hash(drq);
-       deadline_add_drq_hash(dd, drq);
 
        /*
         * if the merge was a front merge, we need to reposition request
         */
-       if (rq_rb_key(req) != drq->rb_key) {
-               deadline_del_drq_rb(dd, drq);
-               deadline_add_drq_rb(dd, drq);
+       if (type == ELEVATOR_FRONT_MERGE) {
+               elv_rb_del(RQ_RB_ROOT(dd, req), req);
+               deadline_add_rq_rb(dd, req);
        }
 }
 
@@ -363,33 +162,14 @@ static void
 deadline_merged_requests(request_queue_t *q, struct request *req,
                         struct request *next)
 {
-       struct deadline_data *dd = q->elevator->elevator_data;
-       struct deadline_rq *drq = RQ_DATA(req);
-       struct deadline_rq *dnext = RQ_DATA(next);
-
-       BUG_ON(!drq);
-       BUG_ON(!dnext);
-
        /*
-        * reposition drq (this is the merged request) in hash, and in rbtree
-        * in case of a front merge
+        * if next expires before rq, assign its expire time to rq
+        * and move into next position (next will be deleted) in fifo
         */
-       deadline_del_drq_hash(drq);
-       deadline_add_drq_hash(dd, drq);
-
-       if (rq_rb_key(req) != drq->rb_key) {
-               deadline_del_drq_rb(dd, drq);
-               deadline_add_drq_rb(dd, drq);
-       }
-
-       /*
-        * if dnext expires before drq, assign its expire time to drq
-        * and move into dnext position (dnext will be deleted) in fifo
-        */
-       if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
-               if (time_before(dnext->expires, drq->expires)) {
-                       list_move(&drq->fifo, &dnext->fifo);
-                       drq->expires = dnext->expires;
+       if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
+               if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
+                       list_move(&req->queuelist, &next->queuelist);
+                       rq_set_fifo_time(req, rq_fifo_time(next));
                }
        }
 
@@ -403,52 +183,50 @@ deadline_merged_requests(request_queue_t *q, struct request *req,
  * move request from sort list to dispatch queue.
  */
 static inline void
-deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
 {
-       request_queue_t *q = drq->request->q;
+       request_queue_t *q = rq->q;
 
-       deadline_remove_request(q, drq->request);
-       elv_dispatch_add_tail(q, drq->request);
+       deadline_remove_request(q, rq);
+       elv_dispatch_add_tail(q, rq);
 }
 
 /*
  * move an entry to dispatch queue
  */
 static void
-deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_move_request(struct deadline_data *dd, struct request *rq)
 {
-       const int data_dir = rq_data_dir(drq->request);
-       struct rb_node *rbnext = rb_next(&drq->rb_node);
+       const int data_dir = rq_data_dir(rq);
+       struct rb_node *rbnext = rb_next(&rq->rb_node);
 
-       dd->next_drq[READ] = NULL;
-       dd->next_drq[WRITE] = NULL;
+       dd->next_rq[READ] = NULL;
+       dd->next_rq[WRITE] = NULL;
 
        if (rbnext)
-               dd->next_drq[data_dir] = rb_entry_drq(rbnext);
+               dd->next_rq[data_dir] = rb_entry_rq(rbnext);
        
-       dd->last_sector = drq->request->sector + drq->request->nr_sectors;
+       dd->last_sector = rq->sector + rq->nr_sectors;
 
        /*
         * take it off the sort and fifo list, move
         * to dispatch queue
         */
-       deadline_move_to_dispatch(dd, drq);
+       deadline_move_to_dispatch(dd, rq);
 }
 
-#define list_entry_fifo(ptr)   list_entry((ptr), struct deadline_rq, fifo)
-
 /*
  * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
  */
 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 {
-       struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next);
+       struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
 
        /*
-        * drq is expired!
+        * rq is expired!
         */
-       if (time_after(jiffies, drq->expires))
+       if (time_after(jiffies, rq_fifo_time(rq)))
                return 1;
 
        return 0;
@@ -463,21 +241,21 @@ static int deadline_dispatch_requests(request_queue_t *q, int force)
        struct deadline_data *dd = q->elevator->elevator_data;
        const int reads = !list_empty(&dd->fifo_list[READ]);
        const int writes = !list_empty(&dd->fifo_list[WRITE]);
-       struct deadline_rq *drq;
+       struct request *rq;
        int data_dir;
 
        /*
         * batches are currently reads XOR writes
         */
-       if (dd->next_drq[WRITE])
-               drq = dd->next_drq[WRITE];
+       if (dd->next_rq[WRITE])
+               rq = dd->next_rq[WRITE];
        else
-               drq = dd->next_drq[READ];
+               rq = dd->next_rq[READ];
 
-       if (drq) {
+       if (rq) {
                /* we have a "next request" */
                
-               if (dd->last_sector != drq->request->sector)
+               if (dd->last_sector != rq->sector)
                        /* end the batch on a non sequential request */
                        dd->batching += dd->fifo_batch;
                
@@ -526,30 +304,33 @@ dispatch_find_request:
        if (deadline_check_fifo(dd, data_dir)) {
                /* An expired request exists - satisfy it */
                dd->batching = 0;
-               drq = list_entry_fifo(dd->fifo_list[data_dir].next);
+               rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
                
-       } else if (dd->next_drq[data_dir]) {
+       } else if (dd->next_rq[data_dir]) {
                /*
                 * The last req was the same dir and we have a next request in
                 * sort order. No expired requests so continue on from here.
                 */
-               drq = dd->next_drq[data_dir];
+               rq = dd->next_rq[data_dir];
        } else {
+               struct rb_node *node;
                /*
                 * The last req was the other direction or we have run out of
                 * higher-sectored requests. Go back to the lowest sectored
                 * request (1 way elevator) and start a new batch.
                 */
                dd->batching = 0;
-               drq = deadline_find_first_drq(dd, data_dir);
+               node = rb_first(&dd->sort_list[data_dir]);
+               if (node)
+                       rq = rb_entry_rq(node);
        }
 
 dispatch_request:
        /*
-        * drq is the selected appropriate request.
+        * rq is the selected appropriate request.
         */
        dd->batching++;
-       deadline_move_request(dd, drq);
+       deadline_move_request(dd, rq);
 
        return 1;
 }
@@ -562,30 +343,6 @@ static int deadline_queue_empty(request_queue_t *q)
                && list_empty(&dd->fifo_list[READ]);
 }
 
-static struct request *
-deadline_former_request(request_queue_t *q, struct request *rq)
-{
-       struct deadline_rq *drq = RQ_DATA(rq);
-       struct rb_node *rbprev = rb_prev(&drq->rb_node);
-
-       if (rbprev)
-               return rb_entry_drq(rbprev)->request;
-
-       return NULL;
-}
-
-static struct request *
-deadline_latter_request(request_queue_t *q, struct request *rq)
-{
-       struct deadline_rq *drq = RQ_DATA(rq);
-       struct rb_node *rbnext = rb_next(&drq->rb_node);
-
-       if (rbnext)
-               return rb_entry_drq(rbnext)->request;
-
-       return NULL;
-}
-
 static void deadline_exit_queue(elevator_t *e)
 {
        struct deadline_data *dd = e->elevator_data;
@@ -593,46 +350,21 @@ static void deadline_exit_queue(elevator_t *e)
        BUG_ON(!list_empty(&dd->fifo_list[READ]));
        BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
 
-       mempool_destroy(dd->drq_pool);
-       kfree(dd->hash);
        kfree(dd);
 }
 
 /*
- * initialize elevator private data (deadline_data), and alloc a drq for
- * each request on the free lists
+ * initialize elevator private data (deadline_data).
  */
 static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
 {
        struct deadline_data *dd;
-       int i;
-
-       if (!drq_pool)
-               return NULL;
 
        dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
        if (!dd)
                return NULL;
        memset(dd, 0, sizeof(*dd));
 
-       dd->hash = kmalloc_node(sizeof(struct hlist_head)*DL_HASH_ENTRIES,
-                               GFP_KERNEL, q->node);
-       if (!dd->hash) {
-               kfree(dd);
-               return NULL;
-       }
-
-       dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
-                                       mempool_free_slab, drq_pool, q->node);
-       if (!dd->drq_pool) {
-               kfree(dd->hash);
-               kfree(dd);
-               return NULL;
-       }
-
-       for (i = 0; i < DL_HASH_ENTRIES; i++)
-               INIT_HLIST_HEAD(&dd->hash[i]);
-
        INIT_LIST_HEAD(&dd->fifo_list[READ]);
        INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
        dd->sort_list[READ] = RB_ROOT;
@@ -645,39 +377,6 @@ static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
        return dd;
 }
 
-static void deadline_put_request(request_queue_t *q, struct request *rq)
-{
-       struct deadline_data *dd = q->elevator->elevator_data;
-       struct deadline_rq *drq = RQ_DATA(rq);
-
-       mempool_free(drq, dd->drq_pool);
-       rq->elevator_private = NULL;
-}
-
-static int
-deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-                    gfp_t gfp_mask)
-{
-       struct deadline_data *dd = q->elevator->elevator_data;
-       struct deadline_rq *drq;
-
-       drq = mempool_alloc(dd->drq_pool, gfp_mask);
-       if (drq) {
-               memset(drq, 0, sizeof(*drq));
-               RB_CLEAR_NODE(&drq->rb_node);
-               drq->request = rq;
-
-               INIT_HLIST_NODE(&drq->hash);
-
-               INIT_LIST_HEAD(&drq->fifo);
-
-               rq->elevator_private = drq;
-               return 0;
-       }
-
-       return 1;
-}
-
 /*
  * sysfs parts below
  */
@@ -757,10 +456,8 @@ static struct elevator_type iosched_deadline = {
                .elevator_dispatch_fn =         deadline_dispatch_requests,
                .elevator_add_req_fn =          deadline_add_request,
                .elevator_queue_empty_fn =      deadline_queue_empty,
-               .elevator_former_req_fn =       deadline_former_request,
-               .elevator_latter_req_fn =       deadline_latter_request,
-               .elevator_set_req_fn =          deadline_set_request,
-               .elevator_put_req_fn =          deadline_put_request,
+               .elevator_former_req_fn =       elv_rb_former_request,
+               .elevator_latter_req_fn =       elv_rb_latter_request,
                .elevator_init_fn =             deadline_init_queue,
                .elevator_exit_fn =             deadline_exit_queue,
        },
@@ -772,24 +469,11 @@ static struct elevator_type iosched_deadline = {
 
 static int __init deadline_init(void)
 {
-       int ret;
-
-       drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
-                                    0, 0, NULL, NULL);
-
-       if (!drq_pool)
-               return -ENOMEM;
-
-       ret = elv_register(&iosched_deadline);
-       if (ret)
-               kmem_cache_destroy(drq_pool);
-
-       return ret;
+       return elv_register(&iosched_deadline);
 }
 
 static void __exit deadline_exit(void)
 {
-       kmem_cache_destroy(drq_pool);
        elv_unregister(&iosched_deadline);
 }
 
index 9b72dc7c8a5c98dcde87dc0d7e42d9a56f447776..487dd3da8853971d9bcb77cf6ecf51eb39faedf3 100644 (file)
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  *
- * 30042000 Jens Axboe <axboe@suse.de> :
+ * 30042000 Jens Axboe <axboe@kernel.dk> :
  *
  * Split the elevator a bit so that it is possible to choose a different
  * one or even write a new "plug in". There are three pieces:
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
+#include <linux/hash.h>
 
 #include <asm/uaccess.h>
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
+/*
+ * Merge hash stuff.
+ */
+static const int elv_hash_shift = 6;
+#define ELV_HASH_BLOCK(sec)    ((sec) >> 3)
+#define ELV_HASH_FN(sec)       (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
+#define ELV_HASH_ENTRIES       (1 << elv_hash_shift)
+#define rq_hash_key(rq)                ((rq)->sector + (rq)->nr_sectors)
+#define ELV_ON_HASH(rq)                (!hlist_unhashed(&(rq)->hash))
+
 /*
  * can we safely merge with this request?
  */
@@ -56,8 +67,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
        /*
         * same device and no special stuff set, merge is ok
         */
-       if (rq->rq_disk == bio->bi_bdev->bd_disk &&
-           !rq->waiting && !rq->special)
+       if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special)
                return 1;
 
        return 0;
@@ -151,27 +161,44 @@ __setup("elevator=", elevator_setup);
 
 static struct kobj_type elv_ktype;
 
-static elevator_t *elevator_alloc(struct elevator_type *e)
-{
-       elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
-       if (eq) {
-               memset(eq, 0, sizeof(*eq));
-               eq->ops = &e->ops;
-               eq->elevator_type = e;
-               kobject_init(&eq->kobj);
-               snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
-               eq->kobj.ktype = &elv_ktype;
-               mutex_init(&eq->sysfs_lock);
-       } else {
-               elevator_put(e);
-       }
+static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e)
+{
+       elevator_t *eq;
+       int i;
+
+       eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node);
+       if (unlikely(!eq))
+               goto err;
+
+       memset(eq, 0, sizeof(*eq));
+       eq->ops = &e->ops;
+       eq->elevator_type = e;
+       kobject_init(&eq->kobj);
+       snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
+       eq->kobj.ktype = &elv_ktype;
+       mutex_init(&eq->sysfs_lock);
+
+       eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
+                                       GFP_KERNEL, q->node);
+       if (!eq->hash)
+               goto err;
+
+       for (i = 0; i < ELV_HASH_ENTRIES; i++)
+               INIT_HLIST_HEAD(&eq->hash[i]);
+
        return eq;
+err:
+       kfree(eq);
+       elevator_put(e);
+       return NULL;
 }
 
 static void elevator_release(struct kobject *kobj)
 {
        elevator_t *e = container_of(kobj, elevator_t, kobj);
+
        elevator_put(e->elevator_type);
+       kfree(e->hash);
        kfree(e);
 }
 
@@ -198,7 +225,7 @@ int elevator_init(request_queue_t *q, char *name)
                e = elevator_get("noop");
        }
 
-       eq = elevator_alloc(e);
+       eq = elevator_alloc(q, e);
        if (!eq)
                return -ENOMEM;
 
@@ -212,6 +239,8 @@ int elevator_init(request_queue_t *q, char *name)
        return ret;
 }
 
+EXPORT_SYMBOL(elevator_init);
+
 void elevator_exit(elevator_t *e)
 {
        mutex_lock(&e->sysfs_lock);
@@ -223,10 +252,118 @@ void elevator_exit(elevator_t *e)
        kobject_put(&e->kobj);
 }
 
+EXPORT_SYMBOL(elevator_exit);
+
+static inline void __elv_rqhash_del(struct request *rq)
+{
+       hlist_del_init(&rq->hash);
+}
+
+static void elv_rqhash_del(request_queue_t *q, struct request *rq)
+{
+       if (ELV_ON_HASH(rq))
+               __elv_rqhash_del(rq);
+}
+
+static void elv_rqhash_add(request_queue_t *q, struct request *rq)
+{
+       elevator_t *e = q->elevator;
+
+       BUG_ON(ELV_ON_HASH(rq));
+       hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
+}
+
+static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
+{
+       __elv_rqhash_del(rq);
+       elv_rqhash_add(q, rq);
+}
+
+static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
+{
+       elevator_t *e = q->elevator;
+       struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
+       struct hlist_node *entry, *next;
+       struct request *rq;
+
+       hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
+               BUG_ON(!ELV_ON_HASH(rq));
+
+               if (unlikely(!rq_mergeable(rq))) {
+                       __elv_rqhash_del(rq);
+                       continue;
+               }
+
+               if (rq_hash_key(rq) == offset)
+                       return rq;
+       }
+
+       return NULL;
+}
+
+/*
+ * RB-tree support functions for inserting/lookup/removal of requests
+ * in a sorted RB tree.
+ */
+struct request *elv_rb_add(struct rb_root *root, struct request *rq)
+{
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct request *__rq;
+
+       while (*p) {
+               parent = *p;
+               __rq = rb_entry(parent, struct request, rb_node);
+
+               if (rq->sector < __rq->sector)
+                       p = &(*p)->rb_left;
+               else if (rq->sector > __rq->sector)
+                       p = &(*p)->rb_right;
+               else
+                       return __rq;
+       }
+
+       rb_link_node(&rq->rb_node, parent, p);
+       rb_insert_color(&rq->rb_node, root);
+       return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_add);
+
+void elv_rb_del(struct rb_root *root, struct request *rq)
+{
+       BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
+       rb_erase(&rq->rb_node, root);
+       RB_CLEAR_NODE(&rq->rb_node);
+}
+
+EXPORT_SYMBOL(elv_rb_del);
+
+struct request *elv_rb_find(struct rb_root *root, sector_t sector)
+{
+       struct rb_node *n = root->rb_node;
+       struct request *rq;
+
+       while (n) {
+               rq = rb_entry(n, struct request, rb_node);
+
+               if (sector < rq->sector)
+                       n = n->rb_left;
+               else if (sector > rq->sector)
+                       n = n->rb_right;
+               else
+                       return rq;
+       }
+
+       return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_find);
+
 /*
  * Insert rq into dispatch queue of q.  Queue lock must be held on
- * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
- * appended to the dispatch queue.  To be used by specific elevators.
+ * entry.  rq is sort insted into the dispatch queue. To be used by
+ * specific elevators.
  */
 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 {
@@ -235,6 +372,9 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 
        if (q->last_merge == rq)
                q->last_merge = NULL;
+
+       elv_rqhash_del(q, rq);
+
        q->nr_sorted--;
 
        boundary = q->end_sector;
@@ -242,7 +382,7 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
        list_for_each_prev(entry, &q->queue_head) {
                struct request *pos = list_entry_rq(entry);
 
-               if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
+               if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
                        break;
                if (rq->sector >= boundary) {
                        if (pos->sector < boundary)
@@ -258,11 +398,38 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
        list_add(&rq->queuelist, entry);
 }
 
+EXPORT_SYMBOL(elv_dispatch_sort);
+
+/*
+ * Insert rq into dispatch queue of q.  Queue lock must be held on
+ * entry.  rq is added to the back of the dispatch queue. To be used by
+ * specific elevators.
+ */
+void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
+{
+       if (q->last_merge == rq)
+               q->last_merge = NULL;
+
+       elv_rqhash_del(q, rq);
+
+       q->nr_sorted--;
+
+       q->end_sector = rq_end_sector(rq);
+       q->boundary_rq = rq;
+       list_add_tail(&rq->queuelist, &q->queue_head);
+}
+
+EXPORT_SYMBOL(elv_dispatch_add_tail);
+
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
        elevator_t *e = q->elevator;
+       struct request *__rq;
        int ret;
 
+       /*
+        * First try one-hit cache.
+        */
        if (q->last_merge) {
                ret = elv_try_merge(q->last_merge, bio);
                if (ret != ELEVATOR_NO_MERGE) {
@@ -271,18 +438,30 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
                }
        }
 
+       /*
+        * See if our hash lookup can find a potential backmerge.
+        */
+       __rq = elv_rqhash_find(q, bio->bi_sector);
+       if (__rq && elv_rq_merge_ok(__rq, bio)) {
+               *req = __rq;
+               return ELEVATOR_BACK_MERGE;
+       }
+
        if (e->ops->elevator_merge_fn)
                return e->ops->elevator_merge_fn(q, req, bio);
 
        return ELEVATOR_NO_MERGE;
 }
 
-void elv_merged_request(request_queue_t *q, struct request *rq)
+void elv_merged_request(request_queue_t *q, struct request *rq, int type)
 {
        elevator_t *e = q->elevator;
 
        if (e->ops->elevator_merged_fn)
-               e->ops->elevator_merged_fn(q, rq);
+               e->ops->elevator_merged_fn(q, rq, type);
+
+       if (type == ELEVATOR_BACK_MERGE)
+               elv_rqhash_reposition(q, rq);
 
        q->last_merge = rq;
 }
@@ -294,8 +473,11 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
 
        if (e->ops->elevator_merge_req_fn)
                e->ops->elevator_merge_req_fn(q, rq, next);
-       q->nr_sorted--;
 
+       elv_rqhash_reposition(q, rq);
+       elv_rqhash_del(q, next);
+
+       q->nr_sorted--;
        q->last_merge = rq;
 }
 
@@ -313,7 +495,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
                        e->ops->elevator_deactivate_req_fn(q, rq);
        }
 
-       rq->flags &= ~REQ_STARTED;
+       rq->cmd_flags &= ~REQ_STARTED;
 
        elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 }
@@ -344,13 +526,13 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 
        switch (where) {
        case ELEVATOR_INSERT_FRONT:
-               rq->flags |= REQ_SOFTBARRIER;
+               rq->cmd_flags |= REQ_SOFTBARRIER;
 
                list_add(&rq->queuelist, &q->queue_head);
                break;
 
        case ELEVATOR_INSERT_BACK:
-               rq->flags |= REQ_SOFTBARRIER;
+               rq->cmd_flags |= REQ_SOFTBARRIER;
                elv_drain_elevator(q);
                list_add_tail(&rq->queuelist, &q->queue_head);
                /*
@@ -369,10 +551,14 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 
        case ELEVATOR_INSERT_SORT:
                BUG_ON(!blk_fs_request(rq));
-               rq->flags |= REQ_SORTED;
+               rq->cmd_flags |= REQ_SORTED;
                q->nr_sorted++;
-               if (q->last_merge == NULL && rq_mergeable(rq))
-                       q->last_merge = rq;
+               if (rq_mergeable(rq)) {
+                       elv_rqhash_add(q, rq);
+                       if (!q->last_merge)
+                               q->last_merge = rq;
+               }
+
                /*
                 * Some ioscheds (cfq) run q->request_fn directly, so
                 * rq cannot be accessed after calling
@@ -387,7 +573,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
                 * insertion; otherwise, requests should be requeued
                 * in ordseq order.
                 */
-               rq->flags |= REQ_SOFTBARRIER;
+               rq->cmd_flags |= REQ_SOFTBARRIER;
 
                if (q->ordseq == 0) {
                        list_add(&rq->queuelist, &q->queue_head);
@@ -429,9 +615,9 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
                       int plug)
 {
        if (q->ordcolor)
-               rq->flags |= REQ_ORDERED_COLOR;
+               rq->cmd_flags |= REQ_ORDERED_COLOR;
 
-       if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+       if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
                /*
                 * toggle ordered color
                 */
@@ -452,7 +638,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
                        q->end_sector = rq_end_sector(rq);
                        q->boundary_rq = rq;
                }
-       } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
+       } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
                where = ELEVATOR_INSERT_BACK;
 
        if (plug)
@@ -461,6 +647,8 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
        elv_insert(q, rq, where);
 }
 
+EXPORT_SYMBOL(__elv_add_request);
+
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
                     int plug)
 {
@@ -471,6 +659,8 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
+EXPORT_SYMBOL(elv_add_request);
+
 static inline struct request *__elv_next_request(request_queue_t *q)
 {
        struct request *rq;
@@ -493,7 +683,7 @@ struct request *elv_next_request(request_queue_t *q)
        int ret;
 
        while ((rq = __elv_next_request(q)) != NULL) {
-               if (!(rq->flags & REQ_STARTED)) {
+               if (!(rq->cmd_flags & REQ_STARTED)) {
                        elevator_t *e = q->elevator;
 
                        /*
@@ -510,7 +700,7 @@ struct request *elv_next_request(request_queue_t *q)
                         * it, a request that has been delayed should
                         * not be passed by new incoming requests
                         */
-                       rq->flags |= REQ_STARTED;
+                       rq->cmd_flags |= REQ_STARTED;
                        blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
                }
 
@@ -519,7 +709,7 @@ struct request *elv_next_request(request_queue_t *q)
                        q->boundary_rq = NULL;
                }
 
-               if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
+               if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
                        break;
 
                ret = q->prep_rq_fn(q, rq);
@@ -541,7 +731,7 @@ struct request *elv_next_request(request_queue_t *q)
                                nr_bytes = rq->data_len;
 
                        blkdev_dequeue_request(rq);
-                       rq->flags |= REQ_QUIET;
+                       rq->cmd_flags |= REQ_QUIET;
                        end_that_request_chunk(rq, 0, nr_bytes);
                        end_that_request_last(rq, 0);
                } else {
@@ -554,9 +744,12 @@ struct request *elv_next_request(request_queue_t *q)
        return rq;
 }
 
+EXPORT_SYMBOL(elv_next_request);
+
 void elv_dequeue_request(request_queue_t *q, struct request *rq)
 {
        BUG_ON(list_empty(&rq->queuelist));
+       BUG_ON(ELV_ON_HASH(rq));
 
        list_del_init(&rq->queuelist);
 
@@ -569,6 +762,8 @@ void elv_dequeue_request(request_queue_t *q, struct request *rq)
                q->in_flight++;
 }
 
+EXPORT_SYMBOL(elv_dequeue_request);
+
 int elv_queue_empty(request_queue_t *q)
 {
        elevator_t *e = q->elevator;
@@ -582,6 +777,8 @@ int elv_queue_empty(request_queue_t *q)
        return 1;
 }
 
+EXPORT_SYMBOL(elv_queue_empty);
+
 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
 {
        elevator_t *e = q->elevator;
@@ -600,13 +797,12 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
        return NULL;
 }
 
-int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-                   gfp_t gfp_mask)
+int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
 {
        elevator_t *e = q->elevator;
 
        if (e->ops->elevator_set_req_fn)
-               return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
+               return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
 
        rq->elevator_private = NULL;
        return 0;
@@ -620,12 +816,12 @@ void elv_put_request(request_queue_t *q, struct request *rq)
                e->ops->elevator_put_req_fn(q, rq);
 }
 
-int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
+int elv_may_queue(request_queue_t *q, int rw)
 {
        elevator_t *e = q->elevator;
 
        if (e->ops->elevator_may_queue_fn)
-               return e->ops->elevator_may_queue_fn(q, rw, bio);
+               return e->ops->elevator_may_queue_fn(q, rw);
 
        return ELV_MQUEUE_MAY;
 }
@@ -792,7 +988,7 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
        /*
         * Allocate new elevator
         */
-       e = elevator_alloc(new_e);
+       e = elevator_alloc(q, new_e);
        if (!e)
                return 0;
 
@@ -908,11 +1104,26 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
        return len;
 }
 
-EXPORT_SYMBOL(elv_dispatch_sort);
-EXPORT_SYMBOL(elv_add_request);
-EXPORT_SYMBOL(__elv_add_request);
-EXPORT_SYMBOL(elv_next_request);
-EXPORT_SYMBOL(elv_dequeue_request);
-EXPORT_SYMBOL(elv_queue_empty);
-EXPORT_SYMBOL(elevator_exit);
-EXPORT_SYMBOL(elevator_init);
+struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
+{
+       struct rb_node *rbprev = rb_prev(&rq->rb_node);
+
+       if (rbprev)
+               return rb_entry_rq(rbprev);
+
+       return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_former_request);
+
+struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
+{
+       struct rb_node *rbnext = rb_next(&rq->rb_node);
+
+       if (rbnext)
+               return rb_entry_rq(rbnext);
+
+       return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_latter_request);
index 51dc0edf76e0281f52e53da15576e4ffa62b0eab..83425fb3c8dba6e2b62122aaa6a02a25d2436ab4 100644 (file)
@@ -39,6 +39,7 @@ static void blk_unplug_timeout(unsigned long data);
 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
 static void init_request_from_bio(struct request *req, struct bio *bio);
 static int __make_request(request_queue_t *q, struct bio *bio);
+static struct io_context *current_io_context(gfp_t gfp_flags, int node);
 
 /*
  * For the allocated request tables
@@ -277,19 +278,19 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 
 EXPORT_SYMBOL(blk_queue_make_request);
 
-static inline void rq_init(request_queue_t *q, struct request *rq)
+static void rq_init(request_queue_t *q, struct request *rq)
 {
        INIT_LIST_HEAD(&rq->queuelist);
        INIT_LIST_HEAD(&rq->donelist);
 
        rq->errors = 0;
-       rq->rq_status = RQ_ACTIVE;
        rq->bio = rq->biotail = NULL;
+       INIT_HLIST_NODE(&rq->hash);
+       RB_CLEAR_NODE(&rq->rb_node);
        rq->ioprio = 0;
        rq->buffer = NULL;
        rq->ref_count = 1;
        rq->q = q;
-       rq->waiting = NULL;
        rq->special = NULL;
        rq->data_len = 0;
        rq->data = NULL;
@@ -382,8 +383,8 @@ unsigned blk_ordered_req_seq(struct request *rq)
        if (rq == &q->post_flush_rq)
                return QUEUE_ORDSEQ_POSTFLUSH;
 
-       if ((rq->flags & REQ_ORDERED_COLOR) ==
-           (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
+       if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
+           (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
                return QUEUE_ORDSEQ_DRAIN;
        else
                return QUEUE_ORDSEQ_DONE;
@@ -446,11 +447,11 @@ static void queue_flush(request_queue_t *q, unsigned which)
                end_io = post_flush_end_io;
        }
 
+       rq->cmd_flags = REQ_HARDBARRIER;
        rq_init(q, rq);
-       rq->flags = REQ_HARDBARRIER;
        rq->elevator_private = NULL;
+       rq->elevator_private2 = NULL;
        rq->rq_disk = q->bar_rq.rq_disk;
-       rq->rl = NULL;
        rq->end_io = end_io;
        q->prepare_flush_fn(q, rq);
 
@@ -471,11 +472,13 @@ static inline struct request *start_ordered(request_queue_t *q,
        blkdev_dequeue_request(rq);
        q->orig_bar_rq = rq;
        rq = &q->bar_rq;
+       rq->cmd_flags = 0;
        rq_init(q, rq);
-       rq->flags = bio_data_dir(q->orig_bar_rq->bio);
-       rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
+       if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
+               rq->cmd_flags |= REQ_RW;
+       rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
        rq->elevator_private = NULL;
-       rq->rl = NULL;
+       rq->elevator_private2 = NULL;
        init_request_from_bio(rq, q->orig_bar_rq->bio);
        rq->end_io = bar_end_io;
 
@@ -587,8 +590,8 @@ static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
        return 0;
 }
 
-static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
-                                   unsigned int nbytes, int error)
+static int ordered_bio_endio(struct request *rq, struct bio *bio,
+                            unsigned int nbytes, int error)
 {
        request_queue_t *q = rq->q;
        bio_end_io_t *endio;
@@ -1124,7 +1127,7 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq)
        }
 
        list_del_init(&rq->queuelist);
-       rq->flags &= ~REQ_QUEUED;
+       rq->cmd_flags &= ~REQ_QUEUED;
        rq->tag = -1;
 
        if (unlikely(bqt->tag_index[tag] == NULL))
@@ -1160,7 +1163,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
        struct blk_queue_tag *bqt = q->queue_tags;
        int tag;
 
-       if (unlikely((rq->flags & REQ_QUEUED))) {
+       if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
                printk(KERN_ERR 
                       "%s: request %p for device [%s] already tagged %d",
                       __FUNCTION__, rq,
@@ -1168,13 +1171,18 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
                BUG();
        }
 
-       tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
-       if (tag >= bqt->max_depth)
-               return 1;
+       /*
+        * Protect against shared tag maps, as we may not have exclusive
+        * access to the tag map.
+        */
+       do {
+               tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
+               if (tag >= bqt->max_depth)
+                       return 1;
 
-       __set_bit(tag, bqt->tag_map);
+       } while (test_and_set_bit(tag, bqt->tag_map));
 
-       rq->flags |= REQ_QUEUED;
+       rq->cmd_flags |= REQ_QUEUED;
        rq->tag = tag;
        bqt->tag_index[tag] = rq;
        blkdev_dequeue_request(rq);
@@ -1210,65 +1218,31 @@ void blk_queue_invalidate_tags(request_queue_t *q)
                        printk(KERN_ERR
                               "%s: bad tag found on list\n", __FUNCTION__);
                        list_del_init(&rq->queuelist);
-                       rq->flags &= ~REQ_QUEUED;
+                       rq->cmd_flags &= ~REQ_QUEUED;
                } else
                        blk_queue_end_tag(q, rq);
 
-               rq->flags &= ~REQ_STARTED;
+               rq->cmd_flags &= ~REQ_STARTED;
                __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
        }
 }
 
 EXPORT_SYMBOL(blk_queue_invalidate_tags);
 
-static const char * const rq_flags[] = {
-       "REQ_RW",
-       "REQ_FAILFAST",
-       "REQ_SORTED",
-       "REQ_SOFTBARRIER",
-       "REQ_HARDBARRIER",
-       "REQ_FUA",
-       "REQ_CMD",
-       "REQ_NOMERGE",
-       "REQ_STARTED",
-       "REQ_DONTPREP",
-       "REQ_QUEUED",
-       "REQ_ELVPRIV",
-       "REQ_PC",
-       "REQ_BLOCK_PC",
-       "REQ_SENSE",
-       "REQ_FAILED",
-       "REQ_QUIET",
-       "REQ_SPECIAL",
-       "REQ_DRIVE_CMD",
-       "REQ_DRIVE_TASK",
-       "REQ_DRIVE_TASKFILE",
-       "REQ_PREEMPT",
-       "REQ_PM_SUSPEND",
-       "REQ_PM_RESUME",
-       "REQ_PM_SHUTDOWN",
-       "REQ_ORDERED_COLOR",
-};
-
 void blk_dump_rq_flags(struct request *rq, char *msg)
 {
        int bit;
 
-       printk("%s: dev %s: flags = ", msg,
-               rq->rq_disk ? rq->rq_disk->disk_name : "?");
-       bit = 0;
-       do {
-               if (rq->flags & (1 << bit))
-                       printk("%s ", rq_flags[bit]);
-               bit++;
-       } while (bit < __REQ_NR_BITS);
+       printk("%s: dev %s: type=%x, flags=%x\n", msg,
+               rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
+               rq->cmd_flags);
 
        printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
                                                       rq->nr_sectors,
                                                       rq->current_nr_sectors);
        printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
 
-       if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
+       if (blk_pc_request(rq)) {
                printk("cdb: ");
                for (bit = 0; bit < sizeof(rq->cmd); bit++)
                        printk("%02x ", rq->cmd[bit]);
@@ -1441,7 +1415,7 @@ static inline int ll_new_mergeable(request_queue_t *q,
        int nr_phys_segs = bio_phys_segments(q, bio);
 
        if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
-               req->flags |= REQ_NOMERGE;
+               req->cmd_flags |= REQ_NOMERGE;
                if (req == q->last_merge)
                        q->last_merge = NULL;
                return 0;
@@ -1464,7 +1438,7 @@ static inline int ll_new_hw_segment(request_queue_t *q,
 
        if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
            || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
-               req->flags |= REQ_NOMERGE;
+               req->cmd_flags |= REQ_NOMERGE;
                if (req == q->last_merge)
                        q->last_merge = NULL;
                return 0;
@@ -1491,7 +1465,7 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
                max_sectors = q->max_sectors;
 
        if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
-               req->flags |= REQ_NOMERGE;
+               req->cmd_flags |= REQ_NOMERGE;
                if (req == q->last_merge)
                        q->last_merge = NULL;
                return 0;
@@ -1530,7 +1504,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req,
 
 
        if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
-               req->flags |= REQ_NOMERGE;
+               req->cmd_flags |= REQ_NOMERGE;
                if (req == q->last_merge)
                        q->last_merge = NULL;
                return 0;
@@ -2029,14 +2003,13 @@ EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(request_queue_t *q, struct request *rq)
 {
-       if (rq->flags & REQ_ELVPRIV)
+       if (rq->cmd_flags & REQ_ELVPRIV)
                elv_put_request(q, rq);
        mempool_free(rq, q->rq.rq_pool);
 }
 
-static inline struct request *
-blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
-                 int priv, gfp_t gfp_mask)
+static struct request *
+blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask)
 {
        struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -2044,17 +2017,17 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
                return NULL;
 
        /*
-        * first three bits are identical in rq->flags and bio->bi_rw,
+        * first three bits are identical in rq->cmd_flags and bio->bi_rw,
         * see bio.h and blkdev.h
         */
-       rq->flags = rw;
+       rq->cmd_flags = rw | REQ_ALLOCED;
 
        if (priv) {
-               if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
+               if (unlikely(elv_set_request(q, rq, gfp_mask))) {
                        mempool_free(rq, q->rq.rq_pool);
                        return NULL;
                }
-               rq->flags |= REQ_ELVPRIV;
+               rq->cmd_flags |= REQ_ELVPRIV;
        }
 
        return rq;
@@ -2141,13 +2114,13 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
        struct io_context *ioc = NULL;
        int may_queue, priv;
 
-       may_queue = elv_may_queue(q, rw, bio);
+       may_queue = elv_may_queue(q, rw);
        if (may_queue == ELV_MQUEUE_NO)
                goto rq_starved;
 
        if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
                if (rl->count[rw]+1 >= q->nr_requests) {
-                       ioc = current_io_context(GFP_ATOMIC);
+                       ioc = current_io_context(GFP_ATOMIC, q->node);
                        /*
                         * The queue will fill after this allocation, so set
                         * it as full, and mark this process as "batching".
@@ -2189,7 +2162,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
 
        spin_unlock_irq(q->queue_lock);
 
-       rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
+       rq = blk_alloc_request(q, rw, priv, gfp_mask);
        if (unlikely(!rq)) {
                /*
                 * Allocation failed presumably due to memory. Undo anything
@@ -2225,7 +2198,6 @@ rq_starved:
                ioc->nr_batch_requests--;
        
        rq_init(q, rq);
-       rq->rl = rl;
 
        blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
 out:
@@ -2268,7 +2240,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
                         * up to a big batch of them for a small period time.
                         * See ioc_batching, ioc_set_batching
                         */
-                       ioc = current_io_context(GFP_NOIO);
+                       ioc = current_io_context(GFP_NOIO, q->node);
                        ioc_set_batching(q, ioc);
 
                        spin_lock_irq(q->queue_lock);
@@ -2299,6 +2271,25 @@ struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_get_request);
 
+/**
+ * blk_start_queueing - initiate dispatch of requests to device
+ * @q:         request queue to kick into gear
+ *
+ * This is basically a helper to remove the need to know whether a queue
+ * is plugged or not if someone just wants to initiate dispatch of requests
+ * for this queue.
+ *
+ * The queue lock must be held with interrupts disabled.
+ */
+void blk_start_queueing(request_queue_t *q)
+{
+       if (!blk_queue_plugged(q))
+               q->request_fn(q);
+       else
+               __generic_unplug_device(q);
+}
+EXPORT_SYMBOL(blk_start_queueing);
+
 /**
  * blk_requeue_request - put a request back on queue
  * @q:         request queue where request should be inserted
@@ -2351,7 +2342,8 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
         * must not attempt merges on this) and that it acts as a soft
         * barrier
         */
-       rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;
+       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_flags |= REQ_SOFTBARRIER;
 
        rq->special = data;
 
@@ -2365,11 +2357,7 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
 
        drive_stat_acct(rq, rq->nr_sectors, 1);
        __elv_add_request(q, rq, where, 0);
-
-       if (blk_queue_plugged(q))
-               __generic_unplug_device(q);
-       else
-               q->request_fn(q);
+       blk_start_queueing(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -2558,7 +2546,7 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
        int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 
        rq->rq_disk = bd_disk;
-       rq->flags |= REQ_NOMERGE;
+       rq->cmd_flags |= REQ_NOMERGE;
        rq->end_io = done;
        WARN_ON(irqs_disabled());
        spin_lock_irq(q->queue_lock);
@@ -2598,10 +2586,9 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
                rq->sense_len = 0;
        }
 
-       rq->waiting = &wait;
+       rq->end_io_data = &wait;
        blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
        wait_for_completion(&wait);
-       rq->waiting = NULL;
 
        if (rq->errors)
                err = -EIO;
@@ -2710,8 +2697,6 @@ EXPORT_SYMBOL_GPL(disk_round_stats);
  */
 void __blk_put_request(request_queue_t *q, struct request *req)
 {
-       struct request_list *rl = req->rl;
-
        if (unlikely(!q))
                return;
        if (unlikely(--req->ref_count))
@@ -2719,18 +2704,16 @@ void __blk_put_request(request_queue_t *q, struct request *req)
 
        elv_completed_request(q, req);
 
-       req->rq_status = RQ_INACTIVE;
-       req->rl = NULL;
-
        /*
         * Request may not have originated from ll_rw_blk. if not,
         * it didn't come out of our reserved rq pools
         */
-       if (rl) {
+       if (req->cmd_flags & REQ_ALLOCED) {
                int rw = rq_data_dir(req);
-               int priv = req->flags & REQ_ELVPRIV;
+               int priv = req->cmd_flags & REQ_ELVPRIV;
 
                BUG_ON(!list_empty(&req->queuelist));
+               BUG_ON(!hlist_unhashed(&req->hash));
 
                blk_free_request(q, req);
                freed_request(q, rw, priv);
@@ -2764,9 +2747,9 @@ EXPORT_SYMBOL(blk_put_request);
  */
 void blk_end_sync_rq(struct request *rq, int error)
 {
-       struct completion *waiting = rq->waiting;
+       struct completion *waiting = rq->end_io_data;
 
-       rq->waiting = NULL;
+       rq->end_io_data = NULL;
        __blk_put_request(rq->q, rq);
 
        /*
@@ -2829,7 +2812,7 @@ static int attempt_merge(request_queue_t *q, struct request *req,
 
        if (rq_data_dir(req) != rq_data_dir(next)
            || req->rq_disk != next->rq_disk
-           || next->waiting || next->special)
+           || next->special)
                return 0;
 
        /*
@@ -2890,22 +2873,24 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
 
 static void init_request_from_bio(struct request *req, struct bio *bio)
 {
-       req->flags |= REQ_CMD;
+       req->cmd_type = REQ_TYPE_FS;
 
        /*
         * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
         */
        if (bio_rw_ahead(bio) || bio_failfast(bio))
-               req->flags |= REQ_FAILFAST;
+               req->cmd_flags |= REQ_FAILFAST;
 
        /*
         * REQ_BARRIER implies no merging, but lets make it explicit
         */
        if (unlikely(bio_barrier(bio)))
-               req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
+               req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 
        if (bio_sync(bio))
-               req->flags |= REQ_RW_SYNC;
+               req->cmd_flags |= REQ_RW_SYNC;
+       if (bio_rw_meta(bio))
+               req->cmd_flags |= REQ_RW_META;
 
        req->errors = 0;
        req->hard_sector = req->sector = bio->bi_sector;
@@ -2914,7 +2899,6 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
        req->nr_phys_segments = bio_phys_segments(req->q, bio);
        req->nr_hw_segments = bio_hw_segments(req->q, bio);
        req->buffer = bio_data(bio);    /* see ->buffer comment above */
-       req->waiting = NULL;
        req->bio = req->biotail = bio;
        req->ioprio = bio_prio(bio);
        req->rq_disk = bio->bi_bdev->bd_disk;
@@ -2924,17 +2908,11 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
 static int __make_request(request_queue_t *q, struct bio *bio)
 {
        struct request *req;
-       int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
-       unsigned short prio;
-       sector_t sector;
+       int el_ret, nr_sectors, barrier, err;
+       const unsigned short prio = bio_prio(bio);
+       const int sync = bio_sync(bio);
 
-       sector = bio->bi_sector;
        nr_sectors = bio_sectors(bio);
-       cur_nr_sectors = bio_cur_sectors(bio);
-       prio = bio_prio(bio);
-
-       rw = bio_data_dir(bio);
-       sync = bio_sync(bio);
 
        /*
         * low level driver can indicate that it wants pages above a
@@ -2943,8 +2921,6 @@ static int __make_request(request_queue_t *q, struct bio *bio)
         */
        blk_queue_bounce(q, &bio);
 
-       spin_lock_prefetch(q->queue_lock);
-
        barrier = bio_barrier(bio);
        if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
                err = -EOPNOTSUPP;
@@ -2972,7 +2948,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
                        req->ioprio = ioprio_best(req->ioprio, prio);
                        drive_stat_acct(req, nr_sectors, 0);
                        if (!attempt_back_merge(q, req))
-                               elv_merged_request(q, req);
+                               elv_merged_request(q, req, el_ret);
                        goto out;
 
                case ELEVATOR_FRONT_MERGE:
@@ -2992,14 +2968,14 @@ static int __make_request(request_queue_t *q, struct bio *bio)
                         * not touch req->buffer either...
                         */
                        req->buffer = bio_data(bio);
-                       req->current_nr_sectors = cur_nr_sectors;
-                       req->hard_cur_sectors = cur_nr_sectors;
-                       req->sector = req->hard_sector = sector;
+                       req->current_nr_sectors = bio_cur_sectors(bio);
+                       req->hard_cur_sectors = req->current_nr_sectors;
+                       req->sector = req->hard_sector = bio->bi_sector;
                        req->nr_sectors = req->hard_nr_sectors += nr_sectors;
                        req->ioprio = ioprio_best(req->ioprio, prio);
                        drive_stat_acct(req, nr_sectors, 0);
                        if (!attempt_front_merge(q, req))
-                               elv_merged_request(q, req);
+                               elv_merged_request(q, req, el_ret);
                        goto out;
 
                /* ELV_NO_MERGE: elevator says don't/can't merge. */
@@ -3012,7 +2988,7 @@ get_rq:
         * Grab a free request. This is might sleep but can not fail.
         * Returns with the queue unlocked.
         */
-       req = get_request_wait(q, rw, bio);
+       req = get_request_wait(q, bio_data_dir(bio), bio);
 
        /*
         * After dropping the lock and possibly sleeping here, our request
@@ -3306,7 +3282,7 @@ static int __end_that_request_first(struct request *req, int uptodate,
                req->errors = 0;
 
        if (!uptodate) {
-               if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
+               if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
                        printk("end_request: I/O error, dev %s, sector %llu\n",
                                req->rq_disk ? req->rq_disk->disk_name : "?",
                                (unsigned long long)req->sector);
@@ -3569,8 +3545,8 @@ EXPORT_SYMBOL(end_request);
 
 void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
 {
-       /* first two bits are identical in rq->flags and bio->bi_rw */
-       rq->flags |= (bio->bi_rw & 3);
+       /* first two bits are identical in rq->cmd_flags and bio->bi_rw */
+       rq->cmd_flags |= (bio->bi_rw & 3);
 
        rq->nr_phys_segments = bio_phys_segments(q, bio);
        rq->nr_hw_segments = bio_hw_segments(q, bio);
@@ -3658,25 +3634,22 @@ EXPORT_SYMBOL(put_io_context);
 /* Called by the exitting task */
 void exit_io_context(void)
 {
-       unsigned long flags;
        struct io_context *ioc;
        struct cfq_io_context *cic;
 
-       local_irq_save(flags);
        task_lock(current);
        ioc = current->io_context;
        current->io_context = NULL;
-       ioc->task = NULL;
        task_unlock(current);
-       local_irq_restore(flags);
 
+       ioc->task = NULL;
        if (ioc->aic && ioc->aic->exit)
                ioc->aic->exit(ioc->aic);
        if (ioc->cic_root.rb_node != NULL) {
                cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
                cic->exit(ioc);
        }
+
        put_io_context(ioc);
 }
 
@@ -3688,7 +3661,7 @@ void exit_io_context(void)
  * but since the current task itself holds a reference, the context can be
  * used in general code, so long as it stays within `current` context.
  */
-struct io_context *current_io_context(gfp_t gfp_flags)
+static struct io_context *current_io_context(gfp_t gfp_flags, int node)
 {
        struct task_struct *tsk = current;
        struct io_context *ret;
@@ -3697,11 +3670,11 @@ struct io_context *current_io_context(gfp_t gfp_flags)
        if (likely(ret))
                return ret;
 
-       ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
+       ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
        if (ret) {
                atomic_set(&ret->refcount, 1);
                ret->task = current;
-               ret->set_ioprio = NULL;
+               ret->ioprio_changed = 0;
                ret->last_waited = jiffies; /* doesn't matter... */
                ret->nr_batch_requests = 0; /* because this is 0 */
                ret->aic = NULL;
@@ -3721,10 +3694,10 @@ EXPORT_SYMBOL(current_io_context);
  *
  * This is always called in the context of the task which submitted the I/O.
  */
-struct io_context *get_io_context(gfp_t gfp_flags)
+struct io_context *get_io_context(gfp_t gfp_flags, int node)
 {
        struct io_context *ret;
-       ret = current_io_context(gfp_flags);
+       ret = current_io_context(gfp_flags, node);
        if (likely(ret))
                atomic_inc(&ret->refcount);
        return ret;
@@ -3837,9 +3810,6 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
        ssize_t ret = queue_var_store(&ra_kb, page, count);
 
        spin_lock_irq(q->queue_lock);
-       if (ra_kb > (q->max_sectors >> 1))
-               ra_kb = (q->max_sectors >> 1);
-
        q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
        spin_unlock_irq(q->queue_lock);
 
index 56a7c620574f86c0338a431354344d149314ce0f..79af431794213867e5ac1c457103d62b0d74cb43 100644 (file)
@@ -69,7 +69,7 @@ static void *noop_init_queue(request_queue_t *q, elevator_t *e)
 {
        struct noop_data *nd;
 
-       nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+       nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
        if (!nd)
                return NULL;
        INIT_LIST_HEAD(&nd->queue);
index b33eda26e2053e84f85473a13cd657cf920e3fe0..2dc326421a24af9c745e9f4eb34f7a80f1a41d0a 100644 (file)
@@ -294,7 +294,7 @@ static int sg_io(struct file *file, request_queue_t *q,
        rq->sense = sense;
        rq->sense_len = 0;
 
-       rq->flags |= REQ_BLOCK_PC;
+       rq->cmd_type = REQ_TYPE_BLOCK_PC;
        bio = rq->bio;
 
        /*
@@ -470,7 +470,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
        memset(sense, 0, sizeof(sense));
        rq->sense = sense;
        rq->sense_len = 0;
-       rq->flags |= REQ_BLOCK_PC;
+       rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
        blk_execute_rq(q, disk, rq, 0);
 
@@ -502,7 +502,7 @@ static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int c
        int err;
 
        rq = blk_get_request(q, WRITE, __GFP_WAIT);
-       rq->flags |= REQ_BLOCK_PC;
+       rq->cmd_type = REQ_TYPE_BLOCK_PC;
        rq->data = NULL;
        rq->data_len = 0;
        rq->timeout = BLK_DEFAULT_TIMEOUT;
index a360215dbce79cc1ff85f4b03651000ae17436b0..2568640430fb06e927d41f67918eb29a46d7163d 100644 (file)
@@ -3331,7 +3331,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_
                Command->DmaDirection = PCI_DMA_TODEVICE;
                Command->CommandType = DAC960_WriteCommand;
        }
-       Command->Completion = Request->waiting;
+       Command->Completion = Request->end_io_data;
        Command->LogicalDriveNumber = (long)Request->rq_disk->private_data;
        Command->BlockNumber = Request->sector;
        Command->BlockCount = Request->nr_sectors;
index b5382cedf0c09cad0376df575c32b194d5ba7c0d..422e31d5f8e5c4627c15c88ecc38ff8627276069 100644 (file)
@@ -2,6 +2,8 @@
 # Block device driver configuration
 #
 
+if BLOCK
+
 menu "Block devices"
 
 config BLK_DEV_FD
@@ -468,3 +470,5 @@ config ATA_OVER_ETH
        devices like the Coraid EtherDrive (R) Storage Blade.
 
 endmenu
+
+endif
index 2cd3391ff8783dcb8497600d833f4cedf4a24eb4..c211065ad8294cc337711aea863f4df6f8d1bbc0 100644 (file)
@@ -1229,7 +1229,6 @@ static inline void complete_buffers(struct bio *bio, int status)
                int nr_sectors = bio_sectors(bio);
 
                bio->bi_next = NULL;
-               blk_finished_io(len);
                bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
                bio = xbh;
        }
index 78082edc14b4f6fdaec8652f12b45e3a921c3223..4abc193314ee6c34f7a6e93f5e5faaaca8c3761f 100644 (file)
@@ -989,7 +989,6 @@ static inline void complete_buffers(struct bio *bio, int ok)
                xbh = bio->bi_next;
                bio->bi_next = NULL;
                
-               blk_finished_io(nr_sectors);
                bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO);
 
                bio = xbh;
index ad1d7065a1b20ef10028c8723bf63fab817ade4a..629c5769d994e3a45aa9063cfd5cf08532f2c494 100644 (file)
@@ -2991,8 +2991,8 @@ static void do_fd_request(request_queue_t * q)
        if (usage_count == 0) {
                printk("warning: usage count=0, current_req=%p exiting\n",
                       current_req);
-               printk("sect=%ld flags=%lx\n", (long)current_req->sector,
-                      current_req->flags);
+               printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector,
+                      current_req->cmd_type, current_req->cmd_flags);
                return;
        }
        if (test_bit(0, &fdc_busy)) {
index 68b0471ad5a64e52a2b53135ab58523507fb700d..d6bb8da955a213c0c192dd3272ea435393a0a180 100644 (file)
@@ -66,6 +66,7 @@
 #include <linux/swap.h>
 #include <linux/slab.h>
 #include <linux/loop.h>
+#include <linux/compat.h>
 #include <linux/suspend.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>         /* for invalidate_bdev() */
@@ -1165,6 +1166,162 @@ static int lo_ioctl(struct inode * inode, struct file * file,
        return err;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_loop_info {
+       compat_int_t    lo_number;      /* ioctl r/o */
+       compat_dev_t    lo_device;      /* ioctl r/o */
+       compat_ulong_t  lo_inode;       /* ioctl r/o */
+       compat_dev_t    lo_rdevice;     /* ioctl r/o */
+       compat_int_t    lo_offset;
+       compat_int_t    lo_encrypt_type;
+       compat_int_t    lo_encrypt_key_size;    /* ioctl w/o */
+       compat_int_t    lo_flags;       /* ioctl r/o */
+       char            lo_name[LO_NAME_SIZE];
+       unsigned char   lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
+       compat_ulong_t  lo_init[2];
+       char            reserved[4];
+};
+
+/*
+ * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
+ * - noinlined to reduce stack space usage in main part of driver
+ */
+static noinline int
+loop_info64_from_compat(const struct compat_loop_info *arg,
+                       struct loop_info64 *info64)
+{
+       struct compat_loop_info info;
+
+       if (copy_from_user(&info, arg, sizeof(info)))
+               return -EFAULT;
+
+       memset(info64, 0, sizeof(*info64));
+       info64->lo_number = info.lo_number;
+       info64->lo_device = info.lo_device;
+       info64->lo_inode = info.lo_inode;
+       info64->lo_rdevice = info.lo_rdevice;
+       info64->lo_offset = info.lo_offset;
+       info64->lo_sizelimit = 0;
+       info64->lo_encrypt_type = info.lo_encrypt_type;
+       info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
+       info64->lo_flags = info.lo_flags;
+       info64->lo_init[0] = info.lo_init[0];
+       info64->lo_init[1] = info.lo_init[1];
+       if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+               memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
+       else
+               memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
+       memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
+       return 0;
+}
+
+/*
+ * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
+ * - noinlined to reduce stack space usage in main part of driver
+ */
+static noinline int
+loop_info64_to_compat(const struct loop_info64 *info64,
+                     struct compat_loop_info __user *arg)
+{
+       struct compat_loop_info info;
+
+       memset(&info, 0, sizeof(info));
+       info.lo_number = info64->lo_number;
+       info.lo_device = info64->lo_device;
+       info.lo_inode = info64->lo_inode;
+       info.lo_rdevice = info64->lo_rdevice;
+       info.lo_offset = info64->lo_offset;
+       info.lo_encrypt_type = info64->lo_encrypt_type;
+       info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
+       info.lo_flags = info64->lo_flags;
+       info.lo_init[0] = info64->lo_init[0];
+       info.lo_init[1] = info64->lo_init[1];
+       if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+               memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
+       else
+               memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
+       memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
+
+       /* error in case values were truncated */
+       if (info.lo_device != info64->lo_device ||
+           info.lo_rdevice != info64->lo_rdevice ||
+           info.lo_inode != info64->lo_inode ||
+           info.lo_offset != info64->lo_offset ||
+           info.lo_init[0] != info64->lo_init[0] ||
+           info.lo_init[1] != info64->lo_init[1])
+               return -EOVERFLOW;
+
+       if (copy_to_user(arg, &info, sizeof(info)))
+               return -EFAULT;
+       return 0;
+}
+
+static int
+loop_set_status_compat(struct loop_device *lo,
+                      const struct compat_loop_info __user *arg)
+{
+       struct loop_info64 info64;
+       int ret;
+
+       ret = loop_info64_from_compat(arg, &info64);
+       if (ret < 0)
+               return ret;
+       return loop_set_status(lo, &info64);
+}
+
+static int
+loop_get_status_compat(struct loop_device *lo,
+                      struct compat_loop_info __user *arg)
+{
+       struct loop_info64 info64;
+       int err = 0;
+
+       if (!arg)
+               err = -EINVAL;
+       if (!err)
+               err = loop_get_status(lo, &info64);
+       if (!err)
+               err = loop_info64_to_compat(&info64, arg);
+       return err;
+}
+
+static long lo_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       struct inode *inode = file->f_dentry->d_inode;
+       struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
+       int err;
+
+       lock_kernel();
+       switch(cmd) {
+       case LOOP_SET_STATUS:
+               mutex_lock(&lo->lo_ctl_mutex);
+               err = loop_set_status_compat(
+                       lo, (const struct compat_loop_info __user *) arg);
+               mutex_unlock(&lo->lo_ctl_mutex);
+               break;
+       case LOOP_GET_STATUS:
+               mutex_lock(&lo->lo_ctl_mutex);
+               err = loop_get_status_compat(
+                       lo, (struct compat_loop_info __user *) arg);
+               mutex_unlock(&lo->lo_ctl_mutex);
+               break;
+       case LOOP_CLR_FD:
+       case LOOP_GET_STATUS64:
+       case LOOP_SET_STATUS64:
+               arg = (unsigned long) compat_ptr(arg);
+       case LOOP_SET_FD:
+       case LOOP_CHANGE_FD:
+               err = lo_ioctl(inode, file, cmd, arg);
+               break;
+       default:
+               err = -ENOIOCTLCMD;
+               break;
+       }
+       unlock_kernel();
+       return err;
+}
+#endif
+
 static int lo_open(struct inode *inode, struct file *file)
 {
        struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
@@ -1192,6 +1349,9 @@ static struct block_device_operations lo_fops = {
        .open =         lo_open,
        .release =      lo_release,
        .ioctl =        lo_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl = lo_compat_ioctl,
+#endif
 };
 
 /*
index bdbade9a5cf50a99e3b2e3fa3f860d71f36bfe82..9d1035e8d9d8c713148aa589abbf2c738022c197 100644 (file)
@@ -407,10 +407,10 @@ static void do_nbd_request(request_queue_t * q)
                struct nbd_device *lo;
 
                blkdev_dequeue_request(req);
-               dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n",
-                               req->rq_disk->disk_name, req, req->flags);
+               dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
+                               req->rq_disk->disk_name, req, req->cmd_type);
 
-               if (!(req->flags & REQ_CMD))
+               if (!blk_fs_request(req))
                        goto error_out;
 
                lo = req->rq_disk->private_data;
@@ -489,7 +489,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
        switch (cmd) {
        case NBD_DISCONNECT:
                printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
-               sreq.flags = REQ_SPECIAL;
+               sreq.cmd_type = REQ_TYPE_SPECIAL;
                nbd_cmd(&sreq) = NBD_CMD_DISC;
                /*
                 * Set these to sane values in case server implementation
index 2403721f9db107509cc4ff8b628795a70290597e..38578b9dbfd1a2bf137dab3c76b6d6235148a82f 100644 (file)
@@ -437,7 +437,7 @@ static char *pd_buf;                /* buffer for request in progress */
 
 static enum action do_pd_io_start(void)
 {
-       if (pd_req->flags & REQ_SPECIAL) {
+       if (blk_special_request(pd_req)) {
                phase = pd_special;
                return pd_special();
        }
@@ -719,14 +719,12 @@ static int pd_special_command(struct pd_unit *disk,
 
        memset(&rq, 0, sizeof(rq));
        rq.errors = 0;
-       rq.rq_status = RQ_ACTIVE;
        rq.rq_disk = disk->gd;
        rq.ref_count = 1;
-       rq.waiting = &wait;
+       rq.end_io_data = &wait;
        rq.end_io = blk_end_sync_rq;
        blk_insert_request(disk->gd->queue, &rq, 0, func);
        wait_for_completion(&wait);
-       rq.waiting = NULL;
        if (rq.errors)
                err = -EIO;
        blk_put_request(&rq);
index 451b996bba91e53bc934cc54048951015acc9868..888d1aceeeff4084e1b48eff355cac3bdabedcd7 100644 (file)
@@ -365,17 +365,17 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
        rq->sense = sense;
        memset(sense, 0, sizeof(sense));
        rq->sense_len = 0;
-       rq->flags |= REQ_BLOCK_PC | REQ_HARDBARRIER;
+       rq->cmd_type = REQ_TYPE_BLOCK_PC;
+       rq->cmd_flags |= REQ_HARDBARRIER;
        if (cgc->quiet)
-               rq->flags |= REQ_QUIET;
+               rq->cmd_flags |= REQ_QUIET;
        memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
        if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
                memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
        rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
 
        rq->ref_count++;
-       rq->flags |= REQ_NOMERGE;
-       rq->waiting = &wait;
+       rq->end_io_data = &wait;
        rq->end_io = blk_end_sync_rq;
        elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
        generic_unplug_device(q);
index cc42e762396f28fccff473f32b9cc1c73ef862fb..f2305ee792a142b32d700473222ffe530742f31b 100644 (file)
@@ -319,8 +319,8 @@ static void start_request(struct floppy_state *fs)
                printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
                       req->rq_disk->disk_name, req->cmd,
                       (long)req->sector, req->nr_sectors, req->buffer);
-               printk("           rq_status=%d errors=%d current_nr_sectors=%ld\n",
-                      req->rq_status, req->errors, req->current_nr_sectors);
+               printk("           errors=%d current_nr_sectors=%ld\n",
+                      req->errors, req->current_nr_sectors);
 #endif
 
                if (req->sector < 0 || req->sector >= fs->total_secs) {
index 89e3c2f8b77681bacd376ba480732d4e84b97b9d..dfda796eba563df8fbfa53b476f7abc0f48d9372 100644 (file)
@@ -529,8 +529,8 @@ static void start_request(struct floppy_state *fs)
                printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
                       CURRENT->rq_disk->disk_name, CURRENT->cmd,
                       CURRENT->sector, CURRENT->nr_sectors, CURRENT->buffer);
-               printk("           rq_status=%d errors=%d current_nr_sectors=%ld\n",
-                      CURRENT->rq_status, CURRENT->errors, CURRENT->current_nr_sectors);
+               printk("           errors=%d current_nr_sectors=%ld\n",
+                     CURRENT->errors, CURRENT->current_nr_sectors);
 #endif
 
                if (CURRENT->sector < 0 || CURRENT->sector >= fs->total_secs) {
index e828e4cbd3e1122a0ca3abb48e7ed2de51a6bf81..ebf3025721d148866aeb0f5be68fbd4e6fdfb7ea 100644 (file)
@@ -313,7 +313,7 @@ static void do_xd_request (request_queue_t * q)
                int res = 0;
                int retry;
 
-               if (!(req->flags & REQ_CMD)) {
+               if (!blk_fs_request(req)) {
                        end_request(req, 0);
                        continue;
                }
index ff5652d40619e018a83a29b7bed703531e37e85f..4b12e9031fb3cecbf6259c457aa77c8a394a3674 100644 (file)
@@ -3,7 +3,7 @@
 #
 
 menu "Old CD-ROM drivers (not SCSI, not IDE)"
-       depends on ISA
+       depends on ISA && BLOCK
 
 config CD_NO_IDESCSI
        bool "Support non-SCSI/IDE/ATAPI CDROM drives"
index d239cf8b20bd1e769a84612959e55eccfe8e1510..b38c84a7a8e3e30e7d8080fafa209562d7e7f673 100644 (file)
@@ -2129,7 +2129,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
                rq->cmd[9] = 0xf8;
 
                rq->cmd_len = 12;
-               rq->flags |= REQ_BLOCK_PC;
+               rq->cmd_type = REQ_TYPE_BLOCK_PC;
                rq->timeout = 60 * HZ;
                bio = rq->bio;
 
index 37bdb0163f0d1b47acf2df88392b318de984480a..ccd91c1a84bd10d2e0c2ca42a7cbd161f93146af 100644 (file)
@@ -1338,8 +1338,10 @@ static void do_cdu31a_request(request_queue_t * q)
                }
 
                /* WTF??? */
-               if (!(req->flags & REQ_CMD))
+               if (!blk_fs_request(req)) {
+                       end_request(req, 0);
                        continue;
+               }
                if (rq_data_dir(req) == WRITE) {
                        end_request(req, 0);
                        continue;
index 4cc619edf42466e02f08079f186338a7eb7a5959..bde1c665d9f4b1cf6e70cfa9ba32ec7bd765d3cc 100644 (file)
@@ -1006,6 +1006,7 @@ config GPIO_VR41XX
 
 config RAW_DRIVER
        tristate "RAW driver (/dev/raw/rawN) (OBSOLETE)"
+       depends on BLOCK
        help
          The raw driver permits block devices to be bound to /dev/raw/rawN. 
          Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O. 
index 4c3a5ca9d8f7a98f882cdbeed12f0949af940a55..b430a12eb8190d9266ccd0de322c243021a1086e 100644 (file)
@@ -655,6 +655,7 @@ void add_interrupt_randomness(int irq)
        add_timer_randomness(irq_timer_state[irq], 0x100 + irq);
 }
 
+#ifdef CONFIG_BLOCK
 void add_disk_randomness(struct gendisk *disk)
 {
        if (!disk || !disk->random)
@@ -667,6 +668,7 @@ void add_disk_randomness(struct gendisk *disk)
 }
 
 EXPORT_SYMBOL(add_disk_randomness);
+#endif
 
 #define EXTRACT_SIZE 10
 
@@ -918,6 +920,7 @@ void rand_initialize_irq(int irq)
        }
 }
 
+#ifdef CONFIG_BLOCK
 void rand_initialize_disk(struct gendisk *disk)
 {
        struct timer_rand_state *state;
@@ -932,6 +935,7 @@ void rand_initialize_disk(struct gendisk *disk)
                disk->random = state;
        }
 }
+#endif
 
 static ssize_t
 random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
index 1a159e8843ca47dd27778937d350df400448c48f..22d17474755f73ac86660c499e486750132f3c60 100644 (file)
@@ -974,7 +974,6 @@ int fcp_scsi_dev_reset(Scsi_Cmnd *SCpnt)
         */
 
        fc->rst_pkt->device->host->eh_action = &sem;
-       fc->rst_pkt->request->rq_status = RQ_SCSI_BUSY;
 
        fc->rst_pkt->done = fcp_scsi_reset_done;
 
index b6fb167e20f681980a54c9baa7ef14a0de4fea83..69d627bd537a8cb7ca1095a7915291995c9dfbc6 100644 (file)
@@ -4,6 +4,8 @@
 # Andre Hedrick <andre@linux-ide.org>
 #
 
+if BLOCK
+
 menu "ATA/ATAPI/MFM/RLL support"
 
 config IDE
@@ -1082,3 +1084,5 @@ config BLK_DEV_HD
 endif
 
 endmenu
+
+endif
index 654d4cd09847cebdcad66b77fc5c9f0167f78149..69bbb6206a00ef5673638c0ccfb4a59bfa272b1d 100644 (file)
@@ -372,7 +372,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq,
 {
        int log = 0;
 
-       if (!sense || !rq || (rq->flags & REQ_QUIET))
+       if (!sense || !rq || (rq->cmd_flags & REQ_QUIET))
                return 0;
 
        switch (sense->sense_key) {
@@ -597,7 +597,7 @@ static void cdrom_prepare_request(ide_drive_t *drive, struct request *rq)
        struct cdrom_info *cd = drive->driver_data;
 
        ide_init_drive_cmd(rq);
-       rq->flags = REQ_PC;
+       rq->cmd_type = REQ_TYPE_BLOCK_PC;
        rq->rq_disk = cd->disk;
 }
 
@@ -617,7 +617,7 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
        rq->cmd[0] = GPCMD_REQUEST_SENSE;
        rq->cmd[4] = rq->data_len = 18;
 
-       rq->flags = REQ_SENSE;
+       rq->cmd_type = REQ_TYPE_SENSE;
 
        /* NOTE! Save the failed command in "rq->buffer" */
        rq->buffer = (void *) failed_command;
@@ -630,10 +630,10 @@ static void cdrom_end_request (ide_drive_t *drive, int uptodate)
        struct request *rq = HWGROUP(drive)->rq;
        int nsectors = rq->hard_cur_sectors;
 
-       if ((rq->flags & REQ_SENSE) && uptodate) {
+       if (blk_sense_request(rq) && uptodate) {
                /*
-                * For REQ_SENSE, "rq->buffer" points to the original failed
-                * request
+                * For REQ_TYPE_SENSE, "rq->buffer" points to the original
+                * failed request
                 */
                struct request *failed = (struct request *) rq->buffer;
                struct cdrom_info *info = drive->driver_data;
@@ -706,17 +706,17 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
                return 1;
        }
 
-       if (rq->flags & REQ_SENSE) {
+       if (blk_sense_request(rq)) {
                /* We got an error trying to get sense info
                   from the drive (probably while trying
                   to recover from a former error).  Just give up. */
 
-               rq->flags |= REQ_FAILED;
+               rq->cmd_flags |= REQ_FAILED;
                cdrom_end_request(drive, 0);
                ide_error(drive, "request sense failure", stat);
                return 1;
 
-       } else if (rq->flags & (REQ_PC | REQ_BLOCK_PC)) {
+       } else if (blk_pc_request(rq)) {
                /* All other functions, except for READ. */
                unsigned long flags;
 
@@ -724,7 +724,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
                 * if we have an error, pass back CHECK_CONDITION as the
                 * scsi status byte
                 */
-               if ((rq->flags & REQ_BLOCK_PC) && !rq->errors)
+               if (!rq->errors)
                        rq->errors = SAM_STAT_CHECK_CONDITION;
 
                /* Check for tray open. */
@@ -735,12 +735,12 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
                        cdrom_saw_media_change (drive);
                        /*printk("%s: media changed\n",drive->name);*/
                        return 0;
-               } else if (!(rq->flags & REQ_QUIET)) {
+               } else if (!(rq->cmd_flags & REQ_QUIET)) {
                        /* Otherwise, print an error. */
                        ide_dump_status(drive, "packet command error", stat);
                }
                
-               rq->flags |= REQ_FAILED;
+               rq->cmd_flags |= REQ_FAILED;
 
                /*
                 * instead of playing games with moving completions around,
@@ -881,7 +881,7 @@ static int cdrom_timer_expiry(ide_drive_t *drive)
                        wait = ATAPI_WAIT_PC;
                        break;
                default:
-                       if (!(rq->flags & REQ_QUIET))
+                       if (!(rq->cmd_flags & REQ_QUIET))
                                printk(KERN_INFO "ide-cd: cmd 0x%x timed out\n", rq->cmd[0]);
                        wait = 0;
                        break;
@@ -1124,7 +1124,7 @@ static ide_startstop_t cdrom_read_intr (ide_drive_t *drive)
                if (rq->current_nr_sectors > 0) {
                        printk (KERN_ERR "%s: cdrom_read_intr: data underrun (%d blocks)\n",
                                drive->name, rq->current_nr_sectors);
-                       rq->flags |= REQ_FAILED;
+                       rq->cmd_flags |= REQ_FAILED;
                        cdrom_end_request(drive, 0);
                } else
                        cdrom_end_request(drive, 1);
@@ -1456,7 +1456,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive)
                        printk ("%s: cdrom_pc_intr: data underrun %d\n",
                                drive->name, pc->buflen);
                        */
-                       rq->flags |= REQ_FAILED;
+                       rq->cmd_flags |= REQ_FAILED;
                        cdrom_end_request(drive, 0);
                }
                return ide_stopped;
@@ -1509,7 +1509,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive)
                rq->data += thislen;
                rq->data_len -= thislen;
 
-               if (rq->flags & REQ_SENSE)
+               if (blk_sense_request(rq))
                        rq->sense_len += thislen;
        } else {
 confused:
@@ -1517,7 +1517,7 @@ confused:
                        "appears confused (ireason = 0x%02x). "
                        "Trying to recover by ending request.\n",
                        drive->name, ireason);
-               rq->flags |= REQ_FAILED;
+               rq->cmd_flags |= REQ_FAILED;
                cdrom_end_request(drive, 0);
                return ide_stopped;
        }
@@ -1546,7 +1546,7 @@ static ide_startstop_t cdrom_do_packet_command (ide_drive_t *drive)
        struct cdrom_info *info = drive->driver_data;
 
        info->dma = 0;
-       rq->flags &= ~REQ_FAILED;
+       rq->cmd_flags &= ~REQ_FAILED;
        len = rq->data_len;
 
        /* Start sending the command to the drive. */
@@ -1558,7 +1558,7 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
 {
        struct request_sense sense;
        int retries = 10;
-       unsigned int flags = rq->flags;
+       unsigned int flags = rq->cmd_flags;
 
        if (rq->sense == NULL)
                rq->sense = &sense;
@@ -1567,14 +1567,14 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
        do {
                int error;
                unsigned long time = jiffies;
-               rq->flags = flags;
+               rq->cmd_flags = flags;
 
                error = ide_do_drive_cmd(drive, rq, ide_wait);
                time = jiffies - time;
 
                /* FIXME: we should probably abort/retry or something 
                 * in case of failure */
-               if (rq->flags & REQ_FAILED) {
+               if (rq->cmd_flags & REQ_FAILED) {
                        /* The request failed.  Retry if it was due to a unit
                           attention status
                           (usually means media was changed). */
@@ -1596,10 +1596,10 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
                }
 
                /* End of retry loop. */
-       } while ((rq->flags & REQ_FAILED) && retries >= 0);
+       } while ((rq->cmd_flags & REQ_FAILED) && retries >= 0);
 
        /* Return an error if the command failed. */
-       return (rq->flags & REQ_FAILED) ? -EIO : 0;
+       return (rq->cmd_flags & REQ_FAILED) ? -EIO : 0;
 }
 
 /*
@@ -1963,7 +1963,7 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 {
        struct cdrom_info *info = drive->driver_data;
 
-       rq->flags |= REQ_QUIET;
+       rq->cmd_flags |= REQ_QUIET;
 
        info->dma = 0;
 
@@ -2023,11 +2023,11 @@ ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, sector_t block)
                }
                info->last_block = block;
                return action;
-       } else if (rq->flags & (REQ_PC | REQ_SENSE)) {
+       } else if (rq->cmd_type == REQ_TYPE_SENSE) {
                return cdrom_do_packet_command(drive);
-       } else if (rq->flags & REQ_BLOCK_PC) {
+       } else if (blk_pc_request(rq)) {
                return cdrom_do_block_pc(drive, rq);
-       } else if (rq->flags & REQ_SPECIAL) {
+       } else if (blk_special_request(rq)) {
                /*
                 * right now this can only be a reset...
                 */
@@ -2105,7 +2105,7 @@ static int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
 
        req.sense = sense;
        req.cmd[0] = GPCMD_TEST_UNIT_READY;
-       req.flags |= REQ_QUIET;
+       req.cmd_flags |= REQ_QUIET;
 
 #if ! STANDARD_ATAPI
         /* the Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to 
@@ -2207,7 +2207,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
        req.cmd[0] = GPCMD_READ_CDVD_CAPACITY;
        req.data = (char *)&capbuf;
        req.data_len = sizeof(capbuf);
-       req.flags |= REQ_QUIET;
+       req.cmd_flags |= REQ_QUIET;
 
        stat = cdrom_queue_packet_command(drive, &req);
        if (stat == 0) {
@@ -2230,7 +2230,7 @@ static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag,
        req.sense = sense;
        req.data =  buf;
        req.data_len = buflen;
-       req.flags |= REQ_QUIET;
+       req.cmd_flags |= REQ_QUIET;
        req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
        req.cmd[6] = trackno;
        req.cmd[7] = (buflen >> 8);
@@ -2531,7 +2531,7 @@ static int ide_cdrom_packet(struct cdrom_device_info *cdi,
        req.timeout = cgc->timeout;
 
        if (cgc->quiet)
-               req.flags |= REQ_QUIET;
+               req.cmd_flags |= REQ_QUIET;
 
        req.sense = cgc->sense;
        cgc->stat = cdrom_queue_packet_command(drive, &req);
@@ -2629,7 +2629,8 @@ int ide_cdrom_reset (struct cdrom_device_info *cdi)
        int ret;
 
        cdrom_prepare_request(drive, &req);
-       req.flags = REQ_SPECIAL | REQ_QUIET;
+       req.cmd_type = REQ_TYPE_SPECIAL;
+       req.cmd_flags = REQ_QUIET;
        ret = ide_do_drive_cmd(drive, &req, ide_wait);
 
        /*
@@ -3116,9 +3117,9 @@ static int ide_cdrom_prep_pc(struct request *rq)
 
 static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq)
 {
-       if (rq->flags & REQ_CMD)
+       if (blk_fs_request(rq))
                return ide_cdrom_prep_fs(q, rq);
-       else if (rq->flags & REQ_BLOCK_PC)
+       else if (blk_pc_request(rq))
                return ide_cdrom_prep_pc(rq);
 
        return 0;
index 7cf3eb02352140d8f8558533aee4107c8fd1a445..0a05a377d66ac54a77056fd67cf484cf5bdf8fec 100644 (file)
@@ -699,7 +699,8 @@ static void idedisk_prepare_flush(request_queue_t *q, struct request *rq)
                rq->cmd[0] = WIN_FLUSH_CACHE;
 
 
-       rq->flags |= REQ_DRIVE_TASK;
+       rq->cmd_type = REQ_TYPE_ATA_TASK;
+       rq->cmd_flags |= REQ_SOFTBARRIER;
        rq->buffer = rq->cmd;
 }
 
@@ -740,7 +741,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
        if (drive->special.b.set_multmode)
                return -EBUSY;
        ide_init_drive_cmd (&rq);
-       rq.flags = REQ_DRIVE_CMD;
+       rq.cmd_type = REQ_TYPE_ATA_CMD;
        drive->mult_req = arg;
        drive->special.b.set_multmode = 1;
        (void) ide_do_drive_cmd (drive, &rq, ide_wait);
index 7c3a13e1cf647a4cbca6ee39c327c36f9de8d422..c3546fe9af63dbc718447523d116ed75ede19083 100644 (file)
@@ -205,7 +205,7 @@ int ide_build_sglist(ide_drive_t *drive, struct request *rq)
        ide_hwif_t *hwif = HWIF(drive);
        struct scatterlist *sg = hwif->sg_table;
 
-       BUG_ON((rq->flags & REQ_DRIVE_TASKFILE) && rq->nr_sectors > 256);
+       BUG_ON((rq->cmd_type == REQ_TYPE_ATA_TASKFILE) && rq->nr_sectors > 256);
 
        ide_map_sg(drive, rq);
 
index adbe9f76a50533c64355078e77bf1ce564de746e..8ccee9c769f8ad54ee80f539a16eaa3a15615684 100644 (file)
@@ -588,7 +588,7 @@ static int idefloppy_do_end_request(ide_drive_t *drive, int uptodate, int nsecs)
        /* Why does this happen? */
        if (!rq)
                return 0;
-       if (!(rq->flags & REQ_SPECIAL)) { //if (!IDEFLOPPY_RQ_CMD (rq->cmd)) {
+       if (!blk_special_request(rq)) {
                /* our real local end request function */
                ide_end_request(drive, uptodate, nsecs);
                return 0;
@@ -689,7 +689,7 @@ static void idefloppy_queue_pc_head (ide_drive_t *drive,idefloppy_pc_t *pc,struc
 
        ide_init_drive_cmd(rq);
        rq->buffer = (char *) pc;
-       rq->flags = REQ_SPECIAL;        //rq->cmd = IDEFLOPPY_PC_RQ;
+       rq->cmd_type = REQ_TYPE_SPECIAL;
        rq->rq_disk = floppy->disk;
        (void) ide_do_drive_cmd(drive, rq, ide_preempt);
 }
@@ -1250,7 +1250,7 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t
        pc->callback = &idefloppy_rw_callback;
        pc->rq = rq;
        pc->b_count = cmd == READ ? 0 : rq->bio->bi_size;
-       if (rq->flags & REQ_RW)
+       if (rq->cmd_flags & REQ_RW)
                set_bit(PC_WRITING, &pc->flags);
        pc->buffer = NULL;
        pc->request_transfer = pc->buffer_size = blocks * floppy->block_size;
@@ -1281,8 +1281,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
        idefloppy_pc_t *pc;
        unsigned long block = (unsigned long)block_s;
 
-       debug_log(KERN_INFO "rq_status: %d, dev: %s, flags: %lx, errors: %d\n",
-                       rq->rq_status,
+       debug_log(KERN_INFO "dev: %s, flags: %lx, errors: %d\n",
                        rq->rq_disk ? rq->rq_disk->disk_name : "?",
                        rq->flags, rq->errors);
        debug_log(KERN_INFO "sector: %ld, nr_sectors: %ld, "
@@ -1303,7 +1302,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
                idefloppy_do_end_request(drive, 0, 0);
                return ide_stopped;
        }
-       if (rq->flags & REQ_CMD) {
+       if (blk_fs_request(rq)) {
                if (((long)rq->sector % floppy->bs_factor) ||
                    (rq->nr_sectors % floppy->bs_factor)) {
                        printk("%s: unsupported r/w request size\n",
@@ -1313,9 +1312,9 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
                }
                pc = idefloppy_next_pc_storage(drive);
                idefloppy_create_rw_cmd(floppy, pc, rq, block);
-       } else if (rq->flags & REQ_SPECIAL) {
+       } else if (blk_special_request(rq)) {
                pc = (idefloppy_pc_t *) rq->buffer;
-       } else if (rq->flags & REQ_BLOCK_PC) {
+       } else if (blk_pc_request(rq)) {
                pc = idefloppy_next_pc_storage(drive);
                if (idefloppy_blockpc_cmd(floppy, pc, rq)) {
                        idefloppy_do_end_request(drive, 0, 0);
@@ -1343,7 +1342,7 @@ static int idefloppy_queue_pc_tail (ide_drive_t *drive,idefloppy_pc_t *pc)
 
        ide_init_drive_cmd (&rq);
        rq.buffer = (char *) pc;
-       rq.flags = REQ_SPECIAL;         //      rq.cmd = IDEFLOPPY_PC_RQ;
+       rq.cmd_type = REQ_TYPE_SPECIAL;
        rq.rq_disk = floppy->disk;
 
        return ide_do_drive_cmd(drive, &rq, ide_wait);
index fb6795236e76c677c9bf990a48dc62ec675da8da..38479a29d3e10a4e07236c0466fc0b81b4ba4a3b 100644 (file)
@@ -59,7 +59,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq,
 {
        int ret = 1;
 
-       BUG_ON(!(rq->flags & REQ_STARTED));
+       BUG_ON(!blk_rq_started(rq));
 
        /*
         * if failfast is set on a request, override number of sectors and
@@ -141,7 +141,7 @@ enum {
 
 static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 stat, u8 error)
 {
-       struct request_pm_state *pm = rq->end_io_data;
+       struct request_pm_state *pm = rq->data;
 
        if (drive->media != ide_disk)
                return;
@@ -164,7 +164,7 @@ static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 s
 
 static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 {
-       struct request_pm_state *pm = rq->end_io_data;
+       struct request_pm_state *pm = rq->data;
        ide_task_t *args = rq->special;
 
        memset(args, 0, sizeof(*args));
@@ -244,7 +244,7 @@ int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq,
 
        spin_lock_irqsave(&ide_lock, flags);
 
-       BUG_ON(!(rq->flags & REQ_STARTED));
+       BUG_ON(!blk_rq_started(rq));
 
        /*
         * if failfast is set on a request, override number of sectors and
@@ -366,7 +366,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
        rq = HWGROUP(drive)->rq;
        spin_unlock_irqrestore(&ide_lock, flags);
 
-       if (rq->flags & REQ_DRIVE_CMD) {
+       if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
                u8 *args = (u8 *) rq->buffer;
                if (rq->errors == 0)
                        rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -376,7 +376,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
                        args[1] = err;
                        args[2] = hwif->INB(IDE_NSECTOR_REG);
                }
-       } else if (rq->flags & REQ_DRIVE_TASK) {
+       } else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
                u8 *args = (u8 *) rq->buffer;
                if (rq->errors == 0)
                        rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -390,7 +390,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
                        args[5] = hwif->INB(IDE_HCYL_REG);
                        args[6] = hwif->INB(IDE_SELECT_REG);
                }
-       } else if (rq->flags & REQ_DRIVE_TASKFILE) {
+       } else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
                ide_task_t *args = (ide_task_t *) rq->special;
                if (rq->errors == 0)
                        rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -421,7 +421,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
                        }
                }
        } else if (blk_pm_request(rq)) {
-               struct request_pm_state *pm = rq->end_io_data;
+               struct request_pm_state *pm = rq->data;
 #ifdef DEBUG_PM
                printk("%s: complete_power_step(step: %d, stat: %x, err: %x)\n",
                        drive->name, rq->pm->pm_step, stat, err);
@@ -587,7 +587,7 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat)
                return ide_stopped;
 
        /* retry only "normal" I/O: */
-       if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+       if (!blk_fs_request(rq)) {
                rq->errors = 1;
                ide_end_drive_cmd(drive, stat, err);
                return ide_stopped;
@@ -638,7 +638,7 @@ ide_startstop_t ide_abort(ide_drive_t *drive, const char *msg)
                return ide_stopped;
 
        /* retry only "normal" I/O: */
-       if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+       if (!blk_fs_request(rq)) {
                rq->errors = 1;
                ide_end_drive_cmd(drive, BUSY_STAT, 0);
                return ide_stopped;
@@ -808,7 +808,7 @@ void ide_map_sg(ide_drive_t *drive, struct request *rq)
        if (hwif->sg_mapped)    /* needed by ide-scsi */
                return;
 
-       if ((rq->flags & REQ_DRIVE_TASKFILE) == 0) {
+       if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
                hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
        } else {
                sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
@@ -844,7 +844,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
                struct request *rq)
 {
        ide_hwif_t *hwif = HWIF(drive);
-       if (rq->flags & REQ_DRIVE_TASKFILE) {
+       if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
                ide_task_t *args = rq->special;
  
                if (!args)
@@ -866,7 +866,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
                if (args->tf_out_flags.all != 0) 
                        return flagged_taskfile(drive, args);
                return do_rw_taskfile(drive, args);
-       } else if (rq->flags & REQ_DRIVE_TASK) {
+       } else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
                u8 *args = rq->buffer;
                u8 sel;
  
@@ -892,7 +892,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
                hwif->OUTB(sel, IDE_SELECT_REG);
                ide_cmd(drive, args[0], args[2], &drive_cmd_intr);
                return ide_started;
-       } else if (rq->flags & REQ_DRIVE_CMD) {
+       } else if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
                u8 *args = rq->buffer;
 
                if (!args)
@@ -933,7 +933,7 @@ done:
 
 static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
 {
-       struct request_pm_state *pm = rq->end_io_data;
+       struct request_pm_state *pm = rq->data;
 
        if (blk_pm_suspend_request(rq) &&
            pm->pm_step == ide_pm_state_start_suspend)
@@ -980,7 +980,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
        ide_startstop_t startstop;
        sector_t block;
 
-       BUG_ON(!(rq->flags & REQ_STARTED));
+       BUG_ON(!blk_rq_started(rq));
 
 #ifdef DEBUG
        printk("%s: start_request: current=0x%08lx\n",
@@ -1013,12 +1013,12 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
        if (!drive->special.all) {
                ide_driver_t *drv;
 
-               if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK))
-                       return execute_drive_cmd(drive, rq);
-               else if (rq->flags & REQ_DRIVE_TASKFILE)
+               if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+                   rq->cmd_type == REQ_TYPE_ATA_TASK ||
+                   rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
                        return execute_drive_cmd(drive, rq);
                else if (blk_pm_request(rq)) {
-                       struct request_pm_state *pm = rq->end_io_data;
+                       struct request_pm_state *pm = rq->data;
 #ifdef DEBUG_PM
                        printk("%s: start_power_step(step: %d)\n",
                                drive->name, rq->pm->pm_step);
@@ -1264,7 +1264,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
                 * We count how many times we loop here to make sure we service
                 * all drives in the hwgroup without looping for ever
                 */
-               if (drive->blocked && !blk_pm_request(rq) && !(rq->flags & REQ_PREEMPT)) {
+               if (drive->blocked && !blk_pm_request(rq) && !(rq->cmd_flags & REQ_PREEMPT)) {
                        drive = drive->next ? drive->next : hwgroup->drive;
                        if (loops++ < 4 && !blk_queue_plugged(drive->queue))
                                goto again;
@@ -1670,7 +1670,7 @@ irqreturn_t ide_intr (int irq, void *dev_id, struct pt_regs *regs)
 void ide_init_drive_cmd (struct request *rq)
 {
        memset(rq, 0, sizeof(*rq));
-       rq->flags = REQ_DRIVE_CMD;
+       rq->cmd_type = REQ_TYPE_ATA_CMD;
        rq->ref_count = 1;
 }
 
@@ -1710,7 +1710,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
        int must_wait = (action == ide_wait || action == ide_head_wait);
 
        rq->errors = 0;
-       rq->rq_status = RQ_ACTIVE;
 
        /*
         * we need to hold an extra reference to request for safe inspection
@@ -1718,7 +1717,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
         */
        if (must_wait) {
                rq->ref_count++;
-               rq->waiting = &wait;
+               rq->end_io_data = &wait;
                rq->end_io = blk_end_sync_rq;
        }
 
@@ -1727,7 +1726,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
                hwgroup->rq = NULL;
        if (action == ide_preempt || action == ide_head_wait) {
                where = ELEVATOR_INSERT_FRONT;
-               rq->flags |= REQ_PREEMPT;
+               rq->cmd_flags |= REQ_PREEMPT;
        }
        __elv_add_request(drive->queue, rq, where, 0);
        ide_do_request(hwgroup, IDE_NO_IRQ);
@@ -1736,7 +1735,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
        err = 0;
        if (must_wait) {
                wait_for_completion(&wait);
-               rq->waiting = NULL;
                if (rq->errors)
                        err = -EIO;
 
index 1feff23487d44f515574809e153744a615c7ad26..850ef63cc986ba695ba643f5b13483c49a430a6c 100644 (file)
@@ -456,13 +456,14 @@ static void ide_dump_opcode(ide_drive_t *drive)
        spin_unlock(&ide_lock);
        if (!rq)
                return;
-       if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) {
+       if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+           rq->cmd_type == REQ_TYPE_ATA_TASK) {
                char *args = rq->buffer;
                if (args) {
                        opcode = args[0];
                        found = 1;
                }
-       } else if (rq->flags & REQ_DRIVE_TASKFILE) {
+       } else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
                ide_task_t *args = rq->special;
                if (args) {
                        task_struct_t *tf = (task_struct_t *) args->tfRegister;
index 7067ab997927d5515f32f2b09c8a48af1cc34cdf..2ebc3760f261dc7ed15f819f0c50492cb87bdca4 100644 (file)
@@ -1776,7 +1776,7 @@ static void idetape_create_request_sense_cmd (idetape_pc_t *pc)
 static void idetape_init_rq(struct request *rq, u8 cmd)
 {
        memset(rq, 0, sizeof(*rq));
-       rq->flags = REQ_SPECIAL;
+       rq->cmd_type = REQ_TYPE_SPECIAL;
        rq->cmd[0] = cmd;
 }
 
@@ -2423,8 +2423,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 #if IDETAPE_DEBUG_LOG
 #if 0
        if (tape->debug_level >= 5)
-               printk(KERN_INFO "ide-tape: rq_status: %d, "
-                       "dev: %s, cmd: %ld, errors: %d\n", rq->rq_status,
+               printk(KERN_INFO "ide-tape:  %d, "
+                       "dev: %s, cmd: %ld, errors: %d\n",
                         rq->rq_disk->disk_name, rq->cmd[0], rq->errors);
 #endif
        if (tape->debug_level >= 2)
@@ -2433,12 +2433,12 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
                        rq->sector, rq->nr_sectors, rq->current_nr_sectors);
 #endif /* IDETAPE_DEBUG_LOG */
 
-       if ((rq->flags & REQ_SPECIAL) == 0) {
+       if (!blk_special_request(rq)) {
                /*
                 * We do not support buffer cache originated requests.
                 */
                printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
-                       "request queue (%ld)\n", drive->name, rq->flags);
+                       "request queue (%d)\n", drive->name, rq->cmd_type);
                ide_end_request(drive, 0, 0);
                return ide_stopped;
        }
@@ -2768,12 +2768,12 @@ static void idetape_wait_for_request (ide_drive_t *drive, struct request *rq)
        idetape_tape_t *tape = drive->driver_data;
 
 #if IDETAPE_DEBUG_BUGS
-       if (rq == NULL || (rq->flags & REQ_SPECIAL) == 0) {
+       if (rq == NULL || !blk_special_request(rq)) {
                printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n");
                return;
        }
 #endif /* IDETAPE_DEBUG_BUGS */
-       rq->waiting = &wait;
+       rq->end_io_data = &wait;
        rq->end_io = blk_end_sync_rq;
        spin_unlock_irq(&tape->spinlock);
        wait_for_completion(&wait);
index 97a9244312fc0492fc813e2effa99cbbe2542b55..1d0470c1f9579d262f0e93c2fcebab6dd89ad399 100644 (file)
@@ -363,7 +363,7 @@ static ide_startstop_t task_error(ide_drive_t *drive, struct request *rq,
 
 static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
 {
-       if (rq->flags & REQ_DRIVE_TASKFILE) {
+       if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
                ide_task_t *task = rq->special;
 
                if (task->tf_out_flags.all) {
@@ -474,7 +474,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long
        struct request rq;
 
        memset(&rq, 0, sizeof(rq));
-       rq.flags = REQ_DRIVE_TASKFILE;
+       rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
        rq.buffer = buf;
 
        /*
@@ -499,7 +499,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long
                rq.hard_cur_sectors = rq.current_nr_sectors = rq.nr_sectors;
 
                if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE)
-                       rq.flags |= REQ_RW;
+                       rq.cmd_flags |= REQ_RW;
        }
 
        rq.special = args;
@@ -737,7 +737,7 @@ static int ide_wait_cmd_task(ide_drive_t *drive, u8 *buf)
        struct request rq;
 
        ide_init_drive_cmd(&rq);
-       rq.flags = REQ_DRIVE_TASK;
+       rq.cmd_type = REQ_TYPE_ATA_TASK;
        rq.buffer = buf;
        return ide_do_drive_cmd(drive, &rq, ide_wait);
 }
index 9c8468de1a7521aa62c179cd6bf952fa461a2a4c..2b1a1389c31880b16d492f8145ddfe5ac96acd90 100644 (file)
@@ -1217,9 +1217,9 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
        memset(&rq, 0, sizeof(rq));
        memset(&rqpm, 0, sizeof(rqpm));
        memset(&args, 0, sizeof(args));
-       rq.flags = REQ_PM_SUSPEND;
+       rq.cmd_type = REQ_TYPE_PM_SUSPEND;
        rq.special = &args;
-       rq.end_io_data = &rqpm;
+       rq.data = &rqpm;
        rqpm.pm_step = ide_pm_state_start_suspend;
        if (mesg.event == PM_EVENT_PRETHAW)
                mesg.event = PM_EVENT_FREEZE;
@@ -1238,9 +1238,9 @@ static int generic_ide_resume(struct device *dev)
        memset(&rq, 0, sizeof(rq));
        memset(&rqpm, 0, sizeof(rqpm));
        memset(&args, 0, sizeof(args));
-       rq.flags = REQ_PM_RESUME;
+       rq.cmd_type = REQ_TYPE_PM_RESUME;
        rq.special = &args;
-       rq.end_io_data = &rqpm;
+       rq.data = &rqpm;
        rqpm.pm_step = ide_pm_state_start_resume;
        rqpm.pm_state = PM_EVENT_ON;
 
index aebecd8f51ccd7be2119164ff3bf3d58a9525b46..4ab93114567389122323e22a413280216c5729d1 100644 (file)
@@ -626,7 +626,7 @@ repeat:
                req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ",
                cyl, head, sec, nsect, req->buffer);
 #endif
-       if (req->flags & REQ_CMD) {
+       if (blk_fs_request(req)) {
                switch (rq_data_dir(req)) {
                case READ:
                        hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr);
index bf869ed03eed33c9b61c7c4c46d78e621b0e1b0e..6dd31a291d8436af9821d0b465866208daec3cd5 100644 (file)
@@ -2,6 +2,8 @@
 # Block device driver configuration
 #
 
+if BLOCK
+
 menu "Multi-device support (RAID and LVM)"
 
 config MD
@@ -251,3 +253,4 @@ config DM_MULTIPATH_EMC
 
 endmenu
 
+endif
index 2a374ccb30ddc0dcb165c7736c5d3abe6eab1fb0..2b2d45d7baaaaf55f9cfb45ff9b755587fc83d61 100644 (file)
@@ -126,7 +126,8 @@ static struct request *get_failover_req(struct emc_handler *h,
        memset(&rq->cmd, 0, BLK_MAX_CDB);
 
        rq->timeout = EMC_FAILOVER_TIMEOUT;
-       rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE);
+       rq->cmd_type = REQ_TYPE_BLOCK_PC;
+       rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
 
        return rq;
 }
index fef677103880b5e94a80c84bf52dcb692fbc4340..6443392bffff17402d59b2ff39f3700a7229e7fe 100644 (file)
@@ -88,7 +88,7 @@ config I2O_BUS
 
 config I2O_BLOCK
        tristate "I2O Block OSM"
-       depends on I2O
+       depends on I2O && BLOCK
        ---help---
          Include support for the I2O Block OSM. The Block OSM presents disk
          and other structured block devices to the operating system. If you
index 1ddc2fb429d5668a2f653c2c3e5d7561e5a4a92d..eaba81bf2ecad7a023d6e1ae22563ed9900bfcab 100644 (file)
@@ -390,9 +390,9 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
        }
 
        /* request is already processed by us, so return */
-       if (req->flags & REQ_SPECIAL) {
+       if (blk_special_request(req)) {
                osm_debug("REQ_SPECIAL already set!\n");
-               req->flags |= REQ_DONTPREP;
+               req->cmd_flags |= REQ_DONTPREP;
                return BLKPREP_OK;
        }
 
@@ -411,7 +411,8 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
                ireq = req->special;
 
        /* do not come back here */
-       req->flags |= REQ_DONTPREP | REQ_SPECIAL;
+       req->cmd_type = REQ_TYPE_SPECIAL;
+       req->cmd_flags |= REQ_DONTPREP;
 
        return BLKPREP_OK;
 };
index 45bcf098e762c500e3777facb3a85c8f841ab385..f540bd88dc5a23aa12216079cf7ad675b2eff092 100644 (file)
@@ -21,7 +21,7 @@ config MMC_DEBUG
 
 config MMC_BLOCK
        tristate "MMC block device driver"
-       depends on MMC
+       depends on MMC && BLOCK
        default y
        help
          Say Y here to enable the MMC block device driver support.
index d2957e35cc6f2d40c6a9031c98a2a88aad3ba0ef..b1f6e03e7aa9cd216a6a12ccc9518956f0cb2fbb 100644 (file)
@@ -24,7 +24,8 @@ obj-$(CONFIG_MMC_AU1X)                += au1xmmc.o
 obj-$(CONFIG_MMC_OMAP)         += omap.o
 obj-$(CONFIG_MMC_AT91RM9200)   += at91_mci.o
 
-mmc_core-y := mmc.o mmc_queue.o mmc_sysfs.o
+mmc_core-y := mmc.o mmc_sysfs.o
+mmc_core-$(CONFIG_BLOCK) += mmc_queue.o
 
 ifeq ($(CONFIG_MMC_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
index 74f8cdeeff0f8501f379d01037144ecd404c6ff4..4ccdd82b680f8f22ed78e27a89a7d9c418df859a 100644 (file)
@@ -28,7 +28,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
        struct mmc_queue *mq = q->queuedata;
        int ret = BLKPREP_KILL;
 
-       if (req->flags & REQ_SPECIAL) {
+       if (blk_special_request(req)) {
                /*
                 * Special commands already have the command
                 * blocks already setup in req->special.
@@ -36,7 +36,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
                BUG_ON(!req->special);
 
                ret = BLKPREP_OK;
-       } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+       } else if (blk_fs_request(req) || blk_pc_request(req)) {
                /*
                 * Block I/O requests need translating according
                 * to the protocol.
@@ -50,7 +50,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
        }
 
        if (ret == BLKPREP_OK)
-               req->flags |= REQ_DONTPREP;
+               req->cmd_flags |= REQ_DONTPREP;
 
        return ret;
 }
index a03e862851db65bdd9783685b55e2cf4ec37ce2c..a304b34c2632f32c1713a9d337c02a4e27453bdd 100644 (file)
@@ -166,7 +166,7 @@ config MTD_CHAR
 
 config MTD_BLOCK
        tristate "Caching block device access to MTD devices"
-       depends on MTD
+       depends on MTD && BLOCK
        ---help---
          Although most flash chips have an erase size too large to be useful
          as block devices, it is possible to use MTD devices which are based
@@ -188,7 +188,7 @@ config MTD_BLOCK
 
 config MTD_BLOCK_RO
        tristate "Readonly block device access to MTD devices"
-       depends on MTD_BLOCK!=y && MTD
+       depends on MTD_BLOCK!=y && MTD && BLOCK
        help
          This allows you to mount read-only file systems (such as cramfs)
          from an MTD device, without the overhead (and danger) of the caching
@@ -199,7 +199,7 @@ config MTD_BLOCK_RO
 
 config FTL
        tristate "FTL (Flash Translation Layer) support"
-       depends on MTD
+       depends on MTD && BLOCK
        ---help---
          This provides support for the original Flash Translation Layer which
          is part of the PCMCIA specification. It uses a kind of pseudo-
@@ -215,7 +215,7 @@ config FTL
 
 config NFTL
        tristate "NFTL (NAND Flash Translation Layer) support"
-       depends on MTD
+       depends on MTD && BLOCK
        ---help---
          This provides support for the NAND Flash Translation Layer which is
          used on M-Systems' DiskOnChip devices. It uses a kind of pseudo-
@@ -238,7 +238,7 @@ config NFTL_RW
 
 config INFTL
        tristate "INFTL (Inverse NAND Flash Translation Layer) support"
-       depends on MTD
+       depends on MTD && BLOCK
        ---help---
          This provides support for the Inverse NAND Flash Translation
          Layer which is used on M-Systems' newer DiskOnChip devices. It
@@ -255,7 +255,7 @@ config INFTL
 
 config RFD_FTL
         tristate "Resident Flash Disk (Flash Translation Layer) support"
-       depends on MTD
+       depends on MTD && BLOCK
        ---help---
          This provides support for the flash translation layer known
          as the Resident Flash Disk (RFD), as used by the Embedded BIOS
index 16c02b5ccf7ec30c994a7fb5a938e4fcb2d8278b..440f6851da6997ac296be09773ada5d54a00bb4c 100644 (file)
@@ -136,7 +136,7 @@ config MTDRAM_ABS_POS
 
 config MTD_BLOCK2MTD
        tristate "MTD using block device"
-       depends on MTD
+       depends on MTD && BLOCK
        help
          This driver allows a block device to appear as an MTD. It would
          generally be used in the following cases:
index 458d3c8ae1eee3904fa34cbd414d44e6a28633b3..6baf5fe142305cfa1131a0be93a838e3e803a9a4 100644 (file)
@@ -46,7 +46,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
        nsect = req->current_nr_sectors;
        buf = req->buffer;
 
-       if (!(req->flags & REQ_CMD))
+       if (!blk_fs_request(req))
                return 0;
 
        if (block + nsect > get_capacity(req->rq_disk))
index 929d6fff6152a642b929d8854e515d3f8152bc1d..b250c53545033cba2a019ee4e5973bf1bac04551 100644 (file)
@@ -1,4 +1,4 @@
-if S390
+if S390 && BLOCK
 
 comment "S/390 block device drivers"
        depends on S390
index 9d051e5687ea433cb6852f1748f5ed216ff22b6c..222a8a71a5e8b439dd11df7e31ee389270a1dab9 100644 (file)
@@ -529,7 +529,7 @@ dasd_diag_build_cp(struct dasd_device * device, struct request *req)
        }
        cqr->retries = DIAG_MAX_RETRIES;
        cqr->buildclk = get_clock();
-       if (req->flags & REQ_FAILFAST)
+       if (req->cmd_flags & REQ_FAILFAST)
                set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
        cqr->device = device;
        cqr->expires = DIAG_TIMEOUT;
index b7a7fac3f7c3a05ae3448e5d172db199578a4dc6..5ecea3e4fdefd3a5bdaa8ef0cebc0711dc91028f 100644 (file)
@@ -1266,7 +1266,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req)
                        recid++;
                }
        }
-       if (req->flags & REQ_FAILFAST)
+       if (req->cmd_flags & REQ_FAILFAST)
                set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
        cqr->device = device;
        cqr->expires = 5 * 60 * HZ;     /* 5 minutes */
index e85015be109bb918366a62c83fec616512a6bb30..80926c5482281293644f52e06888060e804a9f2a 100644 (file)
@@ -344,7 +344,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req)
                        recid++;
                }
        }
-       if (req->flags & REQ_FAILFAST)
+       if (req->cmd_flags & REQ_FAILFAST)
                set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
        cqr->device = device;
        cqr->expires = 5 * 60 * HZ;     /* 5 minutes */
index c4dfcc91dddaa07ea474037fd60f9245ae962d98..dab082002e6fe572f10e9e46f4cafecba2f3dbc7 100644 (file)
@@ -3,11 +3,13 @@ menu "SCSI device support"
 config RAID_ATTRS
        tristate "RAID Transport Class"
        default n
+       depends on BLOCK
        ---help---
          Provides RAID
 
 config SCSI
        tristate "SCSI device support"
+       depends on BLOCK
        ---help---
          If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or
          any other SCSI device under Linux, say Y and make sure that you know
index 5dcef48d414faeaf9ce636c093e924a719d9eea0..10353379a0741e5f4f4f784ad230c4cba6249192 100644 (file)
@@ -2862,7 +2862,7 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb)
       aic_dev->r_total++;
       ptr = aic_dev->r_bins;
     }
-    if(cmd->device->simple_tags && cmd->request->flags & REQ_HARDBARRIER)
+    if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER)
     {
       aic_dev->barrier_total++;
       if(scb->tag_action == MSG_ORDERED_Q_TAG)
@@ -10158,7 +10158,7 @@ aic7xxx_buildscb(struct aic7xxx_host *p, Scsi_Cmnd *cmd,
     /* We always force TEST_UNIT_READY to untagged */
     if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags)
     {
-      if (req->flags & REQ_HARDBARRIER)
+      if (req->cmd_flags & REQ_HARDBARRIER)
       {
        if(sdptr->ordered_tags)
        {
index 94d1de55607f2dc00a05605259b0d8e864937062..1427a41e844104149a7e71c4a3e639f01711718b 100644 (file)
@@ -344,7 +344,7 @@ static int idescsi_check_condition(ide_drive_t *drive, struct request *failed_co
        pc->buffer = buf;
        pc->c[0] = REQUEST_SENSE;
        pc->c[4] = pc->request_transfer = pc->buffer_size = SCSI_SENSE_BUFFERSIZE;
-       rq->flags = REQ_SENSE;
+       rq->cmd_type = REQ_TYPE_SENSE;
        pc->timeout = jiffies + WAIT_READY;
        /* NOTE! Save the failed packet command in "rq->buffer" */
        rq->buffer = (void *) failed_command->special;
@@ -398,12 +398,12 @@ static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
        int errors = rq->errors;
        unsigned long flags;
 
-       if (!(rq->flags & (REQ_SPECIAL|REQ_SENSE))) {
+       if (!blk_special_request(rq) && !blk_sense_request(rq)) {
                ide_end_request(drive, uptodate, nrsecs);
                return 0;
        }
        ide_end_drive_cmd (drive, 0, 0);
-       if (rq->flags & REQ_SENSE) {
+       if (blk_sense_request(rq)) {
                idescsi_pc_t *opc = (idescsi_pc_t *) rq->buffer;
                if (log) {
                        printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number);
@@ -708,11 +708,11 @@ static ide_startstop_t idescsi_issue_pc (ide_drive_t *drive, idescsi_pc_t *pc)
 static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block)
 {
 #if IDESCSI_DEBUG_LOG
-       printk (KERN_INFO "rq_status: %d, dev: %s, cmd: %x, errors: %d\n",rq->rq_status, rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
+       printk (KERN_INFO "dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
        printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
 #endif /* IDESCSI_DEBUG_LOG */
 
-       if (rq->flags & (REQ_SPECIAL|REQ_SENSE)) {
+       if (blk_sense_request(rq) || blk_special_request(rq)) {
                return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special);
        }
        blk_dump_rq_flags(rq, "ide-scsi: unsup command");
@@ -938,7 +938,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
 
        ide_init_drive_cmd (rq);
        rq->special = (char *) pc;
-       rq->flags = REQ_SPECIAL;
+       rq->cmd_type = REQ_TYPE_SPECIAL;
        spin_unlock_irq(host->host_lock);
        rq->rq_disk = scsi->disk;
        (void) ide_do_drive_cmd (drive, rq, ide_end);
@@ -992,7 +992,7 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
                 */
                printk (KERN_ERR "ide-scsi: cmd aborted!\n");
 
-               if (scsi->pc->rq->flags & REQ_SENSE)
+               if (blk_sense_request(scsi->pc->rq))
                        kfree(scsi->pc->buffer);
                kfree(scsi->pc->rq);
                kfree(scsi->pc);
@@ -1042,7 +1042,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
        /* kill current request */
        blkdev_dequeue_request(req);
        end_that_request_last(req, 0);
-       if (req->flags & REQ_SENSE)
+       if (blk_sense_request(req))
                kfree(scsi->pc->buffer);
        kfree(scsi->pc);
        scsi->pc = NULL;
index 0bd9c60e6455b879b831c4b7a64854422213ee8c..aa60a5f1fbc3b5b639cf0123900b0ba862a36518 100644 (file)
@@ -67,7 +67,6 @@ static void __init pluto_detect_done(Scsi_Cmnd *SCpnt)
 
 static void __init pluto_detect_scsi_done(Scsi_Cmnd *SCpnt)
 {
-       SCpnt->request->rq_status = RQ_SCSI_DONE;
        PLND(("Detect done %08lx\n", (long)SCpnt))
        if (atomic_dec_and_test (&fcss))
                up(&fc_sem);
@@ -166,7 +165,7 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
                
                SCpnt->cmd_len = COMMAND_SIZE(INQUIRY);
        
-               SCpnt->request->rq_status = RQ_SCSI_BUSY;
+               SCpnt->request->cmd_flags &= ~REQ_STARTED;
                
                SCpnt->done = pluto_detect_done;
                SCpnt->request_bufflen = 256;
@@ -178,7 +177,8 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
        for (retry = 0; retry < 5; retry++) {
                for (i = 0; i < fcscount; i++) {
                        if (!fcs[i].fc) break;
-                       if (fcs[i].cmd.request->rq_status != RQ_SCSI_DONE) {
+                       if (!(fcs[i].cmd.request->cmd_flags & REQ_STARTED)) {
+                               fcs[i].cmd.request->cmd_flags |= REQ_STARTED;
                                disable_irq(fcs[i].fc->irq);
                                PLND(("queuecommand %d %d\n", retry, i))
                                fcp_scsi_queuecommand (&(fcs[i].cmd), 
index 7a054f9d1ee38230a7b59344e3e791d217f46d79..da95bce907dd3a23257637510bb12e3613bc6a6c 100644 (file)
@@ -592,12 +592,6 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
        return rtn;
 }
 
-
-/*
- * Per-CPU I/O completion queue.
- */
-static DEFINE_PER_CPU(struct list_head, scsi_done_q);
-
 /**
  * scsi_req_abort_cmd -- Request command recovery for the specified command
  * cmd: pointer to the SCSI command of interest
@@ -1065,7 +1059,7 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery)
 
        spin_lock_irqsave(&sdev->list_lock, flags);
        list_for_each_entry(scmd, &sdev->cmd_list, list) {
-               if (scmd->request && scmd->request->rq_status != RQ_INACTIVE) {
+               if (scmd->request) {
                        /*
                         * If we are unable to remove the timer, it means
                         * that the command has already timed out or
@@ -1102,7 +1096,7 @@ MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels");
 
 static int __init init_scsi(void)
 {
-       int error, i;
+       int error;
 
        error = scsi_init_queue();
        if (error)
@@ -1123,9 +1117,6 @@ static int __init init_scsi(void)
        if (error)
                goto cleanup_sysctl;
 
-       for_each_possible_cpu(i)
-               INIT_LIST_HEAD(&per_cpu(scsi_done_q, i));
-
        scsi_netlink_init();
 
        printk(KERN_NOTICE "SCSI subsystem initialized\n");
index d6743b959a72b151f1d6ade6d70b6d3b70c44d92..71084728eb42322462af7a6f0980d627af54e725 100644 (file)
@@ -82,7 +82,7 @@ static void scsi_unprep_request(struct request *req)
 {
        struct scsi_cmnd *cmd = req->special;
 
-       req->flags &= ~REQ_DONTPREP;
+       req->cmd_flags &= ~REQ_DONTPREP;
        req->special = NULL;
 
        scsi_put_command(cmd);
@@ -196,7 +196,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
        req->sense_len = 0;
        req->retries = retries;
        req->timeout = timeout;
-       req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET;
+       req->cmd_type = REQ_TYPE_BLOCK_PC;
+       req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
 
        /*
         * head injection *required* here otherwise quiesce won't work
@@ -397,7 +398,8 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
        req = blk_get_request(sdev->request_queue, write, gfp);
        if (!req)
                goto free_sense;
-       req->flags |= REQ_BLOCK_PC | REQ_QUIET;
+       req->cmd_type = REQ_TYPE_BLOCK_PC;
+       req->cmd_flags |= REQ_QUIET;
 
        if (use_sg)
                err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
@@ -933,7 +935,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
                                        break;
                                }
                        }
-                       if (!(req->flags & REQ_QUIET)) {
+                       if (!(req->cmd_flags & REQ_QUIET)) {
                                scmd_printk(KERN_INFO, cmd,
                                            "Device not ready: ");
                                scsi_print_sense_hdr("", &sshdr);
@@ -941,7 +943,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
                        scsi_end_request(cmd, 0, this_count, 1);
                        return;
                case VOLUME_OVERFLOW:
-                       if (!(req->flags & REQ_QUIET)) {
+                       if (!(req->cmd_flags & REQ_QUIET)) {
                                scmd_printk(KERN_INFO, cmd,
                                            "Volume overflow, CDB: ");
                                __scsi_print_command(cmd->cmnd);
@@ -963,7 +965,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
                return;
        }
        if (result) {
-               if (!(req->flags & REQ_QUIET)) {
+               if (!(req->cmd_flags & REQ_QUIET)) {
                        scmd_printk(KERN_INFO, cmd,
                                    "SCSI error: return code = 0x%08x\n",
                                    result);
@@ -995,7 +997,7 @@ static int scsi_init_io(struct scsi_cmnd *cmd)
        /*
         * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
         */
-       if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
+       if (blk_pc_request(req) && !req->bio) {
                cmd->request_bufflen = req->data_len;
                cmd->request_buffer = req->data;
                req->buffer = req->data;
@@ -1139,13 +1141,12 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
         * these two cases differently.  We differentiate by looking
         * at request->cmd, as this tells us the real story.
         */
-       if (req->flags & REQ_SPECIAL && req->special) {
+       if (blk_special_request(req) && req->special)
                cmd = req->special;
-       } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
-
-               if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) {
-                       if(specials_only == SDEV_QUIESCE ||
-                                       specials_only == SDEV_BLOCK)
+       else if (blk_pc_request(req) || blk_fs_request(req)) {
+               if (unlikely(specials_only) && !(req->cmd_flags & REQ_PREEMPT)){
+                       if (specials_only == SDEV_QUIESCE ||
+                           specials_only == SDEV_BLOCK)
                                goto defer;
                        
                        sdev_printk(KERN_ERR, sdev,
@@ -1153,7 +1154,6 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
                        goto kill;
                }
                        
-                       
                /*
                 * Now try and find a command block that we can use.
                 */
@@ -1184,7 +1184,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
         * lock.  We hope REQ_STARTED prevents anything untoward from
         * happening now.
         */
-       if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+       if (blk_fs_request(req) || blk_pc_request(req)) {
                int ret;
 
                /*
@@ -1216,7 +1216,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
                /*
                 * Initialize the actual SCSI command for this request.
                 */
-               if (req->flags & REQ_BLOCK_PC) {
+               if (blk_pc_request(req)) {
                        scsi_setup_blk_pc_cmnd(cmd);
                } else if (req->rq_disk) {
                        struct scsi_driver *drv;
@@ -1233,7 +1233,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
        /*
         * The request is now prepped, no need to come back here
         */
-       req->flags |= REQ_DONTPREP;
+       req->cmd_flags |= REQ_DONTPREP;
        return BLKPREP_OK;
 
  defer:
@@ -1454,8 +1454,9 @@ static void scsi_request_fn(struct request_queue *q)
                if (unlikely(cmd == NULL)) {
                        printk(KERN_CRIT "impossible request in %s.\n"
                                         "please mail a stack trace to "
-                                        "linux-scsi@vger.kernel.org",
+                                        "linux-scsi@vger.kernel.org\n",
                                         __FUNCTION__);
+                       blk_dump_rq_flags(req, "foo");
                        BUG();
                }
                spin_lock(shost->host_lock);
index 638cff41d436777fa85afc31dfd6213f227b2637..10bc99c911faf55744dc226f04b4fe84fc210800 100644 (file)
@@ -443,8 +443,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
                SCpnt->cmnd[0] = READ_6;
                SCpnt->sc_data_direction = DMA_FROM_DEVICE;
        } else {
-               printk(KERN_ERR "sd: Unknown command %lx\n", rq->flags);
-/* overkill    panic("Unknown sd command %lx\n", rq->flags); */
+               printk(KERN_ERR "sd: Unknown command %x\n", rq->cmd_flags);
                return 0;
        }
 
@@ -840,7 +839,7 @@ static int sd_issue_flush(struct device *dev, sector_t *error_sector)
 static void sd_prepare_flush(request_queue_t *q, struct request *rq)
 {
        memset(rq->cmd, 0, sizeof(rq->cmd));
-       rq->flags |= REQ_BLOCK_PC;
+       rq->cmd_type = REQ_TYPE_BLOCK_PC;
        rq->timeout = SD_TIMEOUT;
        rq->cmd[0] = SYNCHRONIZE_CACHE;
        rq->cmd_len = 10;
index 2f8073b73bf30d1d5924498ec899c8984c0549ff..7f9bcef6adfa941cfa07cc3301a57b62177cd7ca 100644 (file)
@@ -2017,7 +2017,7 @@ static void NCR5380_information_transfer (struct Scsi_Host *instance)
                if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done
                                                  != cmd))
                {
-                       if(cmd->request->flags & REQ_CMD) {
+                       if(blk_fs_request(cmd->request)) {
                                sun3scsi_dma_setup(d, count,
                                                   rq_data_dir(cmd->request));
                                sun3_dma_setup_done = cmd;
index 837173415d4c8442ae7bb036be84452b7c1d615e..44a99aeb818046355a17a4fbb9b94b34506878f7 100644 (file)
@@ -524,7 +524,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
                                    int write_flag)
 {
-       if(cmd->request->flags & REQ_CMD)
+       if(blk_fs_request(cmd->request))
                return wanted;
        else
                return 0;
index 008a82ab8521e9142bc4845e0f03298a7645ca75..f5742b84b27aa28bf4b6ed0e16f23a4c20cc1745 100644 (file)
@@ -458,7 +458,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
                                    int write_flag)
 {
-       if(cmd->request->flags & REQ_CMD)
+       if(blk_fs_request(cmd->request))
                return wanted;
        else
                return 0;
index 86e48c42d6af3cf955c2cb4320bb33eeda29f77e..422a4b288e346f589687aada8950d294dfbf469a 100644 (file)
@@ -8,8 +8,7 @@ comment "may also be needed; see USB_STORAGE Help for more information"
 
 config USB_STORAGE
        tristate "USB Mass Storage support"
-       depends on USB
-       select SCSI
+       depends on USB && SCSI
        ---help---
          Say Y here if you want to connect USB mass storage devices to your
          computer's USB port. This is the driver you need for USB
@@ -18,7 +17,7 @@ config USB_STORAGE
          similar devices. This driver may also be used for some cameras
          and card readers.
 
-         This option 'selects' (turns on, enables) 'SCSI', but you
+         This option depends on 'SCSI' support being enabled, but you
          probably also need 'SCSI device support: SCSI disk support'
          (BLK_DEV_SD) for most USB storage devices.
 
index 4fd9efac29abad9fc5d1f3f75f862f8283f6a59c..1453d2d164f7c2d5a94e5da75d41dbba9d94a66e 100644 (file)
@@ -4,6 +4,8 @@
 
 menu "File systems"
 
+if BLOCK
+
 config EXT2_FS
        tristate "Second extended fs support"
        help
@@ -399,6 +401,8 @@ config ROMFS_FS
          If you don't know whether you need it, then you don't need it:
          answer N.
 
+endif
+
 config INOTIFY
        bool "Inotify file change notification support"
        default y
@@ -530,6 +534,7 @@ config FUSE_FS
          If you want to develop a userspace FS, or if you want to use
          a filesystem based on FUSE, answer Y or M.
 
+if BLOCK
 menu "CD-ROM/DVD Filesystems"
 
 config ISO9660_FS
@@ -597,7 +602,9 @@ config UDF_NLS
        depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y)
 
 endmenu
+endif
 
+if BLOCK
 menu "DOS/FAT/NT Filesystems"
 
 config FAT_FS
@@ -782,6 +789,7 @@ config NTFS_RW
          It is perfectly safe to say N here.
 
 endmenu
+endif
 
 menu "Pseudo filesystems"
 
@@ -939,7 +947,7 @@ menu "Miscellaneous filesystems"
 
 config ADFS_FS
        tristate "ADFS file system support (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       depends on BLOCK && EXPERIMENTAL
        help
          The Acorn Disc Filing System is the standard file system of the
          RiscOS operating system which runs on Acorn's ARM-based Risc PC
@@ -967,7 +975,7 @@ config ADFS_FS_RW
 
 config AFFS_FS
        tristate "Amiga FFS file system support (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       depends on BLOCK && EXPERIMENTAL
        help
          The Fast File System (FFS) is the common file system used on hard
          disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20).  Say Y
@@ -989,7 +997,7 @@ config AFFS_FS
 
 config HFS_FS
        tristate "Apple Macintosh file system support (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       depends on BLOCK && EXPERIMENTAL
        select NLS
        help
          If you say Y here, you will be able to mount Macintosh-formatted
@@ -1002,6 +1010,7 @@ config HFS_FS
 
 config HFSPLUS_FS
        tristate "Apple Extended HFS file system support"
+       depends on BLOCK
        select NLS
        select NLS_UTF8
        help
@@ -1015,7 +1024,7 @@ config HFSPLUS_FS
 
 config BEFS_FS
        tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       depends on BLOCK && EXPERIMENTAL
        select NLS
        help
          The BeOS File System (BeFS) is the native file system of Be, Inc's
@@ -1042,7 +1051,7 @@ config BEFS_DEBUG
 
 config BFS_FS
        tristate "BFS file system support (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       depends on BLOCK && EXPERIMENTAL
        help
          Boot File System (BFS) is a file system used under SCO UnixWare to
          allow the bootloader access to the kernel image and other important
@@ -1064,7 +1073,7 @@ config BFS_FS
 
 config EFS_FS
        tristate "EFS file system support (read only) (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       depends on BLOCK && EXPERIMENTAL
        help
          EFS is an older file system used for non-ISO9660 CD-ROMs and hard
          disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
@@ -1079,7 +1088,7 @@ config EFS_FS
 
 config JFFS_FS
        tristate "Journalling Flash File System (JFFS) support"
-       depends on MTD
+       depends on MTD && BLOCK
        help
          JFFS is the Journaling Flash File System developed by Axis
          Communications in Sweden, aimed at providing a crash/powerdown-safe
@@ -1264,6 +1273,7 @@ endchoice
 
 config CRAMFS
        tristate "Compressed ROM file system support (cramfs)"
+       depends on BLOCK
        select ZLIB_INFLATE
        help
          Saying Y here includes support for CramFs (Compressed ROM File
@@ -1283,6 +1293,7 @@ config CRAMFS
 
 config VXFS_FS
        tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
+       depends on BLOCK
        help
          FreeVxFS is a file system driver that support the VERITAS VxFS(TM)
          file system format.  VERITAS VxFS(TM) is the standard file system
@@ -1300,6 +1311,7 @@ config VXFS_FS
 
 config HPFS_FS
        tristate "OS/2 HPFS file system support"
+       depends on BLOCK
        help
          OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
          is the file system used for organizing files on OS/2 hard disk
@@ -1316,6 +1328,7 @@ config HPFS_FS
 
 config QNX4FS_FS
        tristate "QNX4 file system support (read only)"
+       depends on BLOCK
        help
          This is the file system used by the real-time operating systems
          QNX 4 and QNX 6 (the latter is also called QNX RTP).
@@ -1343,6 +1356,7 @@ config QNX4FS_RW
 
 config SYSV_FS
        tristate "System V/Xenix/V7/Coherent file system support"
+       depends on BLOCK
        help
          SCO, Xenix and Coherent are commercial Unix systems for Intel
          machines, and Version 7 was used on the DEC PDP-11. Saying Y
@@ -1381,6 +1395,7 @@ config SYSV_FS
 
 config UFS_FS
        tristate "UFS file system support (read only)"
+       depends on BLOCK
        help
          BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
          OpenBSD and NeXTstep) use a file system called UFS. Some System V
@@ -1959,11 +1974,13 @@ config GENERIC_ACL
 
 endmenu
 
+if BLOCK
 menu "Partition Types"
 
 source "fs/partitions/Kconfig"
 
 endmenu
+endif
 
 source "fs/nls/Kconfig"
 
index 46b8cfe497b2206e66fd986617e5b554116174a9..a503e6ce0f329944fd658e1e049662ab8d27f3cb 100644 (file)
@@ -5,12 +5,18 @@
 # Rewritten to use lists instead of if-statements.
 # 
 
-obj-y :=       open.o read_write.o file_table.o buffer.o  bio.o super.o \
-               block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
+obj-y :=       open.o read_write.o file_table.o super.o \
+               char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
                ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
                attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
-               seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-               ioprio.o pnode.o drop_caches.o splice.o sync.o
+               seq_file.o xattr.o libfs.o fs-writeback.o \
+               pnode.o drop_caches.o splice.o sync.o
+
+ifeq ($(CONFIG_BLOCK),y)
+obj-y +=       buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
+else
+obj-y +=       no-block.o
+endif
 
 obj-$(CONFIG_INOTIFY)          += inotify.o
 obj-$(CONFIG_INOTIFY_USER)     += inotify_user.o
index 67d6634101fdcc81e75787c87d8084f5c13256c9..2e8c42639eaa54803887b441758d6e176b4cb58b 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <linux/buffer_head.h>
 #include "volume.h"
 #include "vnode.h"
 #include <rxrpc/call.h>
@@ -37,7 +36,6 @@ struct inode_operations afs_file_inode_operations = {
 
 const struct address_space_operations afs_fs_aops = {
        .readpage       = afs_file_readpage,
-       .sync_page      = block_sync_page,
        .set_page_dirty = __set_page_dirty_nobuffers,
        .releasepage    = afs_file_releasepage,
        .invalidatepage = afs_file_invalidatepage,
index 6eb48e1446ecb4fd7c239f2cdca17e7faef89c28..bad52433de69dc01cc836292938607fa02628e20 100644 (file)
@@ -46,7 +46,6 @@
 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_library(struct file *);
 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
-extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
 
 #ifndef elf_addr_t
 #define elf_addr_t unsigned long
index 6a0b9ad8f8c9d031d4c32fb32e1c53e0677d1ef2..8f93e939f21375abe2c205fd2783c319098cc87f 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1142,7 +1142,7 @@ static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale)
                struct biovec_slab *bp = bvec_slabs + i;
                mempool_t **bvp = bs->bvec_pools + i;
 
-               if (i >= scale)
+               if (pool_entries > 1 && i >= scale)
                        pool_entries >>= 1;
 
                *bvp = mempool_create_slab_pool(pool_entries, bp->slab);
index 4346468139e80247e6db85d389a072cedf0d52a0..0c361ea7e5a6e13811da039c8e235b0acb670340 100644 (file)
 #include <linux/module.h>
 #include <linux/blkpg.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/mount.h>
 #include <linux/uio.h>
 #include <linux/namei.h>
 #include <asm/uaccess.h>
+#include "internal.h"
 
 struct bdev_inode {
        struct block_device bdev;
@@ -1313,3 +1315,24 @@ void close_bdev_excl(struct block_device *bdev)
 }
 
 EXPORT_SYMBOL(close_bdev_excl);
+
+int __invalidate_device(struct block_device *bdev)
+{
+       struct super_block *sb = get_super(bdev);
+       int res = 0;
+
+       if (sb) {
+               /*
+                * no need to lock the super, get_super holds the
+                * read mutex so the filesystem cannot go away
+                * under us (->put_super runs with the write lock
+                * hold).
+                */
+               shrink_dcache_sb(sb);
+               res = invalidate_inodes(sb);
+               drop_super(sb);
+       }
+       invalidate_bdev(bdev, 0);
+       return res;
+}
+EXPORT_SYMBOL(__invalidate_device);
index 3b6d701073e7f09a311f10297a58d8c06e715df5..16cfbcd254f15fa05eb452eb512297c439605035 100644 (file)
@@ -159,31 +159,6 @@ int sync_blockdev(struct block_device *bdev)
 }
 EXPORT_SYMBOL(sync_blockdev);
 
-static void __fsync_super(struct super_block *sb)
-{
-       sync_inodes_sb(sb, 0);
-       DQUOT_SYNC(sb);
-       lock_super(sb);
-       if (sb->s_dirt && sb->s_op->write_super)
-               sb->s_op->write_super(sb);
-       unlock_super(sb);
-       if (sb->s_op->sync_fs)
-               sb->s_op->sync_fs(sb, 1);
-       sync_blockdev(sb->s_bdev);
-       sync_inodes_sb(sb, 1);
-}
-
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_super(struct super_block *sb)
-{
-       __fsync_super(sb);
-       return sync_blockdev(sb->s_bdev);
-}
-
 /*
  * Write out and wait upon all dirty data associated with this
  * device.   Filesystem data as well as the underlying block
@@ -259,118 +234,6 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
 }
 EXPORT_SYMBOL(thaw_bdev);
 
-/*
- * sync everything.  Start out by waking pdflush, because that writes back
- * all queues in parallel.
- */
-static void do_sync(unsigned long wait)
-{
-       wakeup_pdflush(0);
-       sync_inodes(0);         /* All mappings, inodes and their blockdevs */
-       DQUOT_SYNC(NULL);
-       sync_supers();          /* Write the superblocks */
-       sync_filesystems(0);    /* Start syncing the filesystems */
-       sync_filesystems(wait); /* Waitingly sync the filesystems */
-       sync_inodes(wait);      /* Mappings, inodes and blockdevs, again. */
-       if (!wait)
-               printk("Emergency Sync complete\n");
-       if (unlikely(laptop_mode))
-               laptop_sync_completion();
-}
-
-asmlinkage long sys_sync(void)
-{
-       do_sync(1);
-       return 0;
-}
-
-void emergency_sync(void)
-{
-       pdflush_operation(do_sync, 0);
-}
-
-/*
- * Generic function to fsync a file.
- *
- * filp may be NULL if called via the msync of a vma.
- */
-int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
-{
-       struct inode * inode = dentry->d_inode;
-       struct super_block * sb;
-       int ret, err;
-
-       /* sync the inode to buffers */
-       ret = write_inode_now(inode, 0);
-
-       /* sync the superblock to buffers */
-       sb = inode->i_sb;
-       lock_super(sb);
-       if (sb->s_op->write_super)
-               sb->s_op->write_super(sb);
-       unlock_super(sb);
-
-       /* .. finally sync the buffers to disk */
-       err = sync_blockdev(sb->s_bdev);
-       if (!ret)
-               ret = err;
-       return ret;
-}
-
-long do_fsync(struct file *file, int datasync)
-{
-       int ret;
-       int err;
-       struct address_space *mapping = file->f_mapping;
-
-       if (!file->f_op || !file->f_op->fsync) {
-               /* Why?  We can still call filemap_fdatawrite */
-               ret = -EINVAL;
-               goto out;
-       }
-
-       ret = filemap_fdatawrite(mapping);
-
-       /*
-        * We need to protect against concurrent writers, which could cause
-        * livelocks in fsync_buffers_list().
-        */
-       mutex_lock(&mapping->host->i_mutex);
-       err = file->f_op->fsync(file, file->f_dentry, datasync);
-       if (!ret)
-               ret = err;
-       mutex_unlock(&mapping->host->i_mutex);
-       err = filemap_fdatawait(mapping);
-       if (!ret)
-               ret = err;
-out:
-       return ret;
-}
-
-static long __do_fsync(unsigned int fd, int datasync)
-{
-       struct file *file;
-       int ret = -EBADF;
-
-       file = fget(fd);
-       if (file) {
-               ret = do_fsync(file, datasync);
-               fput(file);
-       }
-       return ret;
-}
-
-asmlinkage long sys_fsync(unsigned int fd)
-{
-       return __do_fsync(fd, 0);
-}
-
-asmlinkage long sys_fdatasync(unsigned int fd)
-{
-       return __do_fsync(fd, 1);
-}
-
 /*
  * Various filesystems appear to want __find_get_block to be non-blocking.
  * But it's the page lock which protects the buffers.  To get around this,
@@ -1550,35 +1413,6 @@ static void discard_buffer(struct buffer_head * bh)
        unlock_buffer(bh);
 }
 
-/**
- * try_to_release_page() - release old fs-specific metadata on a page
- *
- * @page: the page which the kernel is trying to free
- * @gfp_mask: memory allocation flags (and I/O mode)
- *
- * The address_space is to try to release any data against the page
- * (presumably at page->private).  If the release was successful, return `1'.
- * Otherwise return zero.
- *
- * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
- *
- * NOTE: @gfp_mask may go away, and this function may become non-blocking.
- */
-int try_to_release_page(struct page *page, gfp_t gfp_mask)
-{
-       struct address_space * const mapping = page->mapping;
-
-       BUG_ON(!PageLocked(page));
-       if (PageWriteback(page))
-               return 0;
-       
-       if (mapping && mapping->a_ops->releasepage)
-               return mapping->a_ops->releasepage(page, gfp_mask);
-       return try_to_free_buffers(page);
-}
-EXPORT_SYMBOL(try_to_release_page);
-
 /**
  * block_invalidatepage - invalidate part of all of a buffer-backed page
  *
@@ -1630,14 +1464,6 @@ out:
 }
 EXPORT_SYMBOL(block_invalidatepage);
 
-void do_invalidatepage(struct page *page, unsigned long offset)
-{
-       void (*invalidatepage)(struct page *, unsigned long);
-       invalidatepage = page->mapping->a_ops->invalidatepage ? :
-               block_invalidatepage;
-       (*invalidatepage)(page, offset);
-}
-
 /*
  * We attach and possibly dirty the buffers atomically wrt
  * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
index 1f3285affa39c9d06da9fb3af7bee91094d894f9..a885f46ca001f117a9b8bc3bd4ac0e140b5c9e3e 100644 (file)
@@ -24,6 +24,7 @@
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
+#include "internal.h"
 
 /*
  * capabilities for /dev/mem, /dev/kmem and similar directly mappable character
index ddb012a68023fe0818d50670f2fb81a990dbab56..976a691c5a680561a97b1aede8840e292b23ce73 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/backing-dev.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
-#include <linux/mpage.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/smp_lock.h>
index b88147c1dc27f0df435b6251376a73b7dfc531cb..05f874c7441bbd388087dd0be9a1197628144373 100644 (file)
@@ -19,7 +19,6 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
-#include <linux/buffer_head.h>
 #include <linux/stat.h>
 #include <linux/pagemap.h>
 #include <asm/div64.h>
index b0ea6687ab55e8116a4790a52682e24c71146c34..e34c7db00f6feccaba099b9604352e7745322a03 100644 (file)
@@ -22,7 +22,6 @@
  */
 
 #include <linux/fs.h>
-#include <linux/ext2_fs.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
 #include "cifsproto.h"
@@ -74,7 +73,7 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
                        }
                        break;
 #ifdef CONFIG_CIFS_POSIX
-               case EXT2_IOC_GETFLAGS:
+               case FS_IOC_GETFLAGS:
                        if(CIFS_UNIX_EXTATTR_CAP & caps) {
                                if (pSMBFile == NULL)
                                        break;
@@ -82,12 +81,12 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
                                        &ExtAttrBits, &ExtAttrMask);
                                if(rc == 0)
                                        rc = put_user(ExtAttrBits &
-                                               EXT2_FL_USER_VISIBLE,
+                                               FS_FL_USER_VISIBLE,
                                                (int __user *)arg);
                        }
                        break;
 
-               case EXT2_IOC_SETFLAGS:
+               case FS_IOC_SETFLAGS:
                        if(CIFS_UNIX_EXTATTR_CAP & caps) {
                                if(get_user(ExtAttrBits,(int __user *)arg)) {
                                        rc = -EFAULT;
index ce982f6e8c80270be3fd23f8f9553c486626986e..122b4e3992b54b322d40ef84814c95048ca00472 100644 (file)
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/ioctls.h>
-
-extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+#include "internal.h"
 
 int compat_log = 1;
 
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
 int compat_printk(const char *fmt, ...)
 {
        va_list ap;
@@ -313,9 +314,6 @@ out:
 #define IOCTL_HASHSIZE 256
 static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE];
 
-extern struct ioctl_trans ioctl_start[];
-extern int ioctl_table_size;
-
 static inline unsigned long ioctl32_hash(unsigned long cmd)
 {
        return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE;
@@ -838,8 +836,6 @@ static int do_nfs4_super_data_conv(void *raw_data)
        return 0;
 }
 
-extern int copy_mount_options (const void __user *, unsigned long *);
-
 #define SMBFS_NAME      "smbfs"
 #define NCPFS_NAME      "ncpfs"
 #define NFS4_NAME      "nfs4"
index 4063a939697768d990faa971a4488efe21f80dcb..64b34533edea67e2ed28f0c85fa406d3135de23c 100644 (file)
 #include <linux/if_pppox.h>
 #include <linux/mtio.h>
 #include <linux/cdrom.h>
-#include <linux/loop.h>
 #include <linux/auto_fs.h>
 #include <linux/auto_fs4.h>
 #include <linux/tty.h>
 #include <linux/vt_kern.h>
 #include <linux/fb.h>
-#include <linux/ext2_fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
 #include <linux/videodev.h>
 #include <linux/netdevice.h>
 #include <linux/raw.h>
@@ -60,7 +56,6 @@
 #include <linux/pci.h>
 #include <linux/module.h>
 #include <linux/serial.h>
-#include <linux/reiserfs_fs.h>
 #include <linux/if_tun.h>
 #include <linux/ctype.h>
 #include <linux/ioctl32.h>
 #include <linux/nbd.h>
 #include <linux/random.h>
 #include <linux/filter.h>
-#include <linux/msdos_fs.h>
 #include <linux/pktcdvd.h>
 
 #include <linux/hiddev.h>
 #include <linux/dvb/video.h>
 #include <linux/lp.h>
 
-/* Aiee. Someone does not find a difference between int and long */
-#define EXT2_IOC32_GETFLAGS               _IOR('f', 1, int)
-#define EXT2_IOC32_SETFLAGS               _IOW('f', 2, int)
-#define EXT3_IOC32_GETVERSION             _IOR('f', 3, int)
-#define EXT3_IOC32_SETVERSION             _IOW('f', 4, int)
-#define EXT3_IOC32_GETRSVSZ               _IOR('f', 5, int)
-#define EXT3_IOC32_SETRSVSZ               _IOW('f', 6, int)
-#define EXT3_IOC32_GROUP_EXTEND           _IOW('f', 7, unsigned int)
-#ifdef CONFIG_JBD_DEBUG
-#define EXT3_IOC32_WAIT_FOR_READONLY      _IOR('f', 99, int)
-#endif
-
-#define EXT2_IOC32_GETVERSION             _IOR('v', 1, int)
-#define EXT2_IOC32_SETVERSION             _IOW('v', 2, int)
-
 static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
                              unsigned long arg, struct file *f)
 {
@@ -176,34 +155,6 @@ static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg)
        return err;
 }
 
-static int do_ext2_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-       /* These are just misnamed, they actually get/put from/to user an int */
-       switch (cmd) {
-       case EXT2_IOC32_GETFLAGS: cmd = EXT2_IOC_GETFLAGS; break;
-       case EXT2_IOC32_SETFLAGS: cmd = EXT2_IOC_SETFLAGS; break;
-       case EXT2_IOC32_GETVERSION: cmd = EXT2_IOC_GETVERSION; break;
-       case EXT2_IOC32_SETVERSION: cmd = EXT2_IOC_SETVERSION; break;
-       }
-       return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
-}
-
-static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-       /* These are just misnamed, they actually get/put from/to user an int */
-       switch (cmd) {
-       case EXT3_IOC32_GETVERSION: cmd = EXT3_IOC_GETVERSION; break;
-       case EXT3_IOC32_SETVERSION: cmd = EXT3_IOC_SETVERSION; break;
-       case EXT3_IOC32_GETRSVSZ: cmd = EXT3_IOC_GETRSVSZ; break;
-       case EXT3_IOC32_SETRSVSZ: cmd = EXT3_IOC_SETRSVSZ; break;
-       case EXT3_IOC32_GROUP_EXTEND: cmd = EXT3_IOC_GROUP_EXTEND; break;
-#ifdef CONFIG_JBD_DEBUG
-       case EXT3_IOC32_WAIT_FOR_READONLY: cmd = EXT3_IOC_WAIT_FOR_READONLY; break;
-#endif
-       }
-       return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
-}
-
 struct compat_video_event {
        int32_t         type;
        compat_time_t   timestamp;
@@ -694,6 +645,7 @@ out:
 }
 #endif
 
+#ifdef CONFIG_BLOCK
 struct hd_geometry32 {
        unsigned char heads;
        unsigned char sectors;
@@ -918,6 +870,7 @@ static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
        }
        return err;
 }
+#endif /* CONFIG_BLOCK */
 
 struct sock_fprog32 {
        unsigned short  len;
@@ -1041,6 +994,7 @@ static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 }
 
 
+#ifdef CONFIG_BLOCK
 struct mtget32 {
        compat_long_t   mt_type;
        compat_long_t   mt_resid;
@@ -1213,73 +1167,7 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
 
        return err;
 }
-
-struct loop_info32 {
-       compat_int_t    lo_number;      /* ioctl r/o */
-       compat_dev_t    lo_device;      /* ioctl r/o */
-       compat_ulong_t  lo_inode;       /* ioctl r/o */
-       compat_dev_t    lo_rdevice;     /* ioctl r/o */
-       compat_int_t    lo_offset;
-       compat_int_t    lo_encrypt_type;
-       compat_int_t    lo_encrypt_key_size;    /* ioctl w/o */
-       compat_int_t    lo_flags;       /* ioctl r/o */
-       char            lo_name[LO_NAME_SIZE];
-       unsigned char   lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
-       compat_ulong_t  lo_init[2];
-       char            reserved[4];
-};
-
-static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-       mm_segment_t old_fs = get_fs();
-       struct loop_info l;
-       struct loop_info32 __user *ul;
-       int err = -EINVAL;
-
-       ul = compat_ptr(arg);
-       switch(cmd) {
-       case LOOP_SET_STATUS:
-               err = get_user(l.lo_number, &ul->lo_number);
-               err |= __get_user(l.lo_device, &ul->lo_device);
-               err |= __get_user(l.lo_inode, &ul->lo_inode);
-               err |= __get_user(l.lo_rdevice, &ul->lo_rdevice);
-               err |= __copy_from_user(&l.lo_offset, &ul->lo_offset,
-                       8 + (unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
-               if (err) {
-                       err = -EFAULT;
-               } else {
-                       set_fs (KERNEL_DS);
-                       err = sys_ioctl (fd, cmd, (unsigned long)&l);
-                       set_fs (old_fs);
-               }
-               break;
-       case LOOP_GET_STATUS:
-               set_fs (KERNEL_DS);
-               err = sys_ioctl (fd, cmd, (unsigned long)&l);
-               set_fs (old_fs);
-               if (!err) {
-                       err = put_user(l.lo_number, &ul->lo_number);
-                       err |= __put_user(l.lo_device, &ul->lo_device);
-                       err |= __put_user(l.lo_inode, &ul->lo_inode);
-                       err |= __put_user(l.lo_rdevice, &ul->lo_rdevice);
-                       err |= __copy_to_user(&ul->lo_offset, &l.lo_offset,
-                               (unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
-                       if (err)
-                               err = -EFAULT;
-               }
-               break;
-       default: {
-               static int count;
-               if (++count <= 20)
-                       printk("%s: Unknown loop ioctl cmd, fd(%d) "
-                              "cmd(%08x) arg(%08lx)\n",
-                              __FUNCTION__, fd, cmd, arg);
-       }
-       }
-       return err;
-}
-
-extern int tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg);
+#endif /* CONFIG_BLOCK */
 
 #ifdef CONFIG_VT
 
@@ -1607,6 +1495,7 @@ ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
        return -EINVAL;
 }
 
+#ifdef CONFIG_BLOCK
 static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
        /* The mkswap binary hard codes it to Intel value :-((( */
@@ -1641,12 +1530,14 @@ static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
 
        return sys_ioctl(fd, cmd, (unsigned long)a);
 }
+#endif
 
 static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
        return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
 }
 
+#ifdef CONFIG_BLOCK
 /* Fix sizeof(sizeof()) breakage */
 #define BLKBSZGET_32   _IOR(0x12,112,int)
 #define BLKBSZSET_32   _IOW(0x12,113,int)
@@ -1667,6 +1558,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
 {
        return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg));
 }
+#endif
 
 /* Bluetooth ioctls */
 #define HCIUARTSETPROTO        _IOW('U', 200, int)
@@ -1687,6 +1579,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
 #define HIDPGETCONNLIST        _IOR('H', 210, int)
 #define HIDPGETCONNINFO        _IOR('H', 211, int)
 
+#ifdef CONFIG_BLOCK
 struct floppy_struct32 {
        compat_uint_t   size;
        compat_uint_t   sect;
@@ -2011,6 +1904,7 @@ out:
        kfree(karg);
        return err;
 }
+#endif
 
 struct mtd_oob_buf32 {
        u_int32_t start;
@@ -2052,61 +1946,7 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg)
        return err;
 }      
 
-#define        VFAT_IOCTL_READDIR_BOTH32       _IOR('r', 1, struct compat_dirent[2])
-#define        VFAT_IOCTL_READDIR_SHORT32      _IOR('r', 2, struct compat_dirent[2])
-
-static long
-put_dirent32 (struct dirent *d, struct compat_dirent __user *d32)
-{
-        if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent)))
-                return -EFAULT;
-
-        __put_user(d->d_ino, &d32->d_ino);
-        __put_user(d->d_off, &d32->d_off);
-        __put_user(d->d_reclen, &d32->d_reclen);
-        if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen))
-               return -EFAULT;
-
-        return 0;
-}
-
-static int vfat_ioctl32(unsigned fd, unsigned cmd, unsigned long arg)
-{
-       struct compat_dirent __user *p = compat_ptr(arg);
-       int ret;
-       mm_segment_t oldfs = get_fs();
-       struct dirent d[2];
-
-       switch(cmd)
-       {
-               case VFAT_IOCTL_READDIR_BOTH32:
-                       cmd = VFAT_IOCTL_READDIR_BOTH;
-                       break;
-               case VFAT_IOCTL_READDIR_SHORT32:
-                       cmd = VFAT_IOCTL_READDIR_SHORT;
-                       break;
-       }
-
-       set_fs(KERNEL_DS);
-       ret = sys_ioctl(fd,cmd,(unsigned long)&d);
-       set_fs(oldfs);
-       if (ret >= 0) {
-               ret |= put_dirent32(&d[0], p);
-               ret |= put_dirent32(&d[1], p + 1);
-       }
-       return ret;
-}
-
-#define REISERFS_IOC_UNPACK32               _IOW(0xCD,1,int)
-
-static int reiserfs_ioctl32(unsigned fd, unsigned cmd, unsigned long ptr)
-{
-        if (cmd == REISERFS_IOC_UNPACK32)
-                cmd = REISERFS_IOC_UNPACK;
-
-        return sys_ioctl(fd,cmd,ptr);
-}
-
+#ifdef CONFIG_BLOCK
 struct raw32_config_request
 {
         compat_int_t    raw_minor;
@@ -2171,6 +2011,7 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
         }
         return ret;
 }
+#endif /* CONFIG_BLOCK */
 
 struct serial_struct32 {
         compat_int_t    type;
@@ -2777,6 +2618,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
 HANDLE_IOCTL(SIOCRTMSG, ret_einval)
 HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
 #endif
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
 HANDLE_IOCTL(BLKRAGET, w_long)
 HANDLE_IOCTL(BLKGETSIZE, w_long)
@@ -2802,16 +2644,17 @@ HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans)
 HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
 HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
 HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
+#endif
 HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
 HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
 HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
 HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
-HANDLE_IOCTL(LOOP_SET_STATUS, loop_status)
-HANDLE_IOCTL(LOOP_GET_STATUS, loop_status)
+#endif
 #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
 HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
 #ifdef CONFIG_VT
@@ -2821,19 +2664,6 @@ HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl)
 HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl)
 HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl)
 #endif
-HANDLE_IOCTL(EXT2_IOC32_GETFLAGS, do_ext2_ioctl)
-HANDLE_IOCTL(EXT2_IOC32_SETFLAGS, do_ext2_ioctl)
-HANDLE_IOCTL(EXT2_IOC32_GETVERSION, do_ext2_ioctl)
-HANDLE_IOCTL(EXT2_IOC32_SETVERSION, do_ext2_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_GETVERSION, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_SETVERSION, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_GETRSVSZ, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_SETRSVSZ, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_GROUP_EXTEND, do_ext3_ioctl)
-COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD)
-#ifdef CONFIG_JBD_DEBUG
-HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl)
-#endif
 /* One SMB ioctl needs translations. */
 #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
 HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
@@ -2863,16 +2693,14 @@ HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
 /* block stuff */
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
 HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
 HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64)
-/* vfat */
-HANDLE_IOCTL(VFAT_IOCTL_READDIR_BOTH32, vfat_ioctl32)
-HANDLE_IOCTL(VFAT_IOCTL_READDIR_SHORT32, vfat_ioctl32)
-HANDLE_IOCTL(REISERFS_IOC_UNPACK32, reiserfs_ioctl32)
 /* Raw devices */
 HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
 HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
+#endif
 /* Serial */
 HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
 HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
index 17b392a2049ebfb9b39103ecedbd9fe6ec884828..fc2faa44f8d185b6be9b8600a4ffb9991d5b980f 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/seqlock.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
+#include "internal.h"
 
 
 int sysctl_vfs_cache_pressure __read_mostly = 100;
@@ -1877,9 +1878,6 @@ kmem_cache_t *filp_cachep __read_mostly;
 
 EXPORT_SYMBOL(d_genocide);
 
-extern void bdev_cache_init(void);
-extern void chrdev_init(void);
-
 void __init vfs_caches_init_early(void)
 {
        dcache_init_early();
index 92ea8265d7d5248e046954369da84b86bb858753..3e7a84a1e509a4aad71656e7bfeb8432d8a61460 100644 (file)
@@ -661,5 +661,8 @@ const struct file_operations ext2_dir_operations = {
        .read           = generic_read_dir,
        .readdir        = ext2_readdir,
        .ioctl          = ext2_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = ext2_compat_ioctl,
+#endif
        .fsync          = ext2_sync_file,
 };
index e65a019fc7a58b87f80c64fcaa7944b06bd30242..c19ac153f56b72c38d564de76a4616a4f599d9e6 100644 (file)
@@ -137,6 +137,7 @@ extern void ext2_set_inode_flags(struct inode *inode);
 /* ioctl.c */
 extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
                       unsigned long);
+extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long);
 
 /* namei.c */
 struct dentry *ext2_get_parent(struct dentry *child);
index 23e2c7ccec1d15794c2679c4215a917b8f0edb25..e8bbed9dd2680d094b550228694fe66136b7ea80 100644 (file)
@@ -46,6 +46,9 @@ const struct file_operations ext2_file_operations = {
        .aio_read       = generic_file_aio_read,
        .aio_write      = generic_file_aio_write,
        .ioctl          = ext2_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = ext2_compat_ioctl,
+#endif
        .mmap           = generic_file_mmap,
        .open           = generic_file_open,
        .release        = ext2_release_file,
@@ -63,6 +66,9 @@ const struct file_operations ext2_xip_file_operations = {
        .read           = xip_file_read,
        .write          = xip_file_write,
        .ioctl          = ext2_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = ext2_compat_ioctl,
+#endif
        .mmap           = xip_file_mmap,
        .open           = generic_file_open,
        .release        = ext2_release_file,
index 3ca9afdf713d579d92f2037cc5ee4f69db9ab3da..1dfba77eab10dc348e0ac3c4fa4e23d2448ac281 100644 (file)
@@ -11,6 +11,8 @@
 #include <linux/capability.h>
 #include <linux/time.h>
 #include <linux/sched.h>
+#include <linux/compat.h>
+#include <linux/smp_lock.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
 
@@ -80,3 +82,33 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                return -ENOTTY;
        }
 }
+
+#ifdef CONFIG_COMPAT
+long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       struct inode *inode = file->f_dentry->d_inode;
+       int ret;
+
+       /* These are just misnamed, they actually get/put from/to user an int */
+       switch (cmd) {
+       case EXT2_IOC32_GETFLAGS:
+               cmd = EXT2_IOC_GETFLAGS;
+               break;
+       case EXT2_IOC32_SETFLAGS:
+               cmd = EXT2_IOC_SETFLAGS;
+               break;
+       case EXT2_IOC32_GETVERSION:
+               cmd = EXT2_IOC_GETVERSION;
+               break;
+       case EXT2_IOC32_SETVERSION:
+               cmd = EXT2_IOC_SETVERSION;
+               break;
+       default:
+               return -ENOIOCTLCMD;
+       }
+       lock_kernel();
+       ret = ext2_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+       unlock_kernel();
+       return ret;
+}
+#endif
index 429acbb4e064bd21352f8019e133a781523169fb..d0b54f30b914e5304f367252e5f6a3bd96ac8d38 100644 (file)
@@ -44,6 +44,9 @@ const struct file_operations ext3_dir_operations = {
        .read           = generic_read_dir,
        .readdir        = ext3_readdir,         /* we take BKL. needed?*/
        .ioctl          = ext3_ioctl,           /* BKL held */
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = ext3_compat_ioctl,
+#endif
        .fsync          = ext3_sync_file,       /* BKL held */
 #ifdef CONFIG_EXT3_INDEX
        .release        = ext3_release_dir,
index 994efd189f4e3c7bd667e866d359670189f7ea65..74ff20f9d09b927f982c0f5331f7b339286a1ad3 100644 (file)
@@ -114,6 +114,9 @@ const struct file_operations ext3_file_operations = {
        .readv          = generic_file_readv,
        .writev         = generic_file_writev,
        .ioctl          = ext3_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = ext3_compat_ioctl,
+#endif
        .mmap           = generic_file_mmap,
        .open           = generic_file_open,
        .release        = ext3_release_file,
index dcf4f1dd108b21acd60c4ea3c88b0d8780b319c3..03ba5bcab18633725372096cd94c9e52b6aa83cc 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/uio.h>
+#include <linux/bio.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -1073,7 +1074,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
                return bh;
        if (buffer_uptodate(bh))
                return bh;
-       ll_rw_block(READ, 1, &bh);
+       ll_rw_block(READ_META, 1, &bh);
        wait_on_buffer(bh);
        if (buffer_uptodate(bh))
                return bh;
@@ -2540,7 +2541,7 @@ make_io:
                 */
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
-               submit_bh(READ, bh);
+               submit_bh(READ_META, bh);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
                        ext3_error(inode->i_sb, "ext3_get_inode_loc",
index 3a6b012d120cedae637169ee747ec43b36b25e47..12daa68695721ffaf4a06bfa087d2018fceeb845 100644 (file)
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
 #include <linux/time.h>
+#include <linux/compat.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
-
 int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                unsigned long arg)
 {
@@ -252,3 +253,55 @@ flags_err:
                return -ENOTTY;
        }
 }
+
+#ifdef CONFIG_COMPAT
+long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       struct inode *inode = file->f_dentry->d_inode;
+       int ret;
+
+       /* These are just misnamed, they actually get/put from/to user an int */
+       switch (cmd) {
+       case EXT3_IOC32_GETFLAGS:
+               cmd = EXT3_IOC_GETFLAGS;
+               break;
+       case EXT3_IOC32_SETFLAGS:
+               cmd = EXT3_IOC_SETFLAGS;
+               break;
+       case EXT3_IOC32_GETVERSION:
+               cmd = EXT3_IOC_GETVERSION;
+               break;
+       case EXT3_IOC32_SETVERSION:
+               cmd = EXT3_IOC_SETVERSION;
+               break;
+       case EXT3_IOC32_GROUP_EXTEND:
+               cmd = EXT3_IOC_GROUP_EXTEND;
+               break;
+       case EXT3_IOC32_GETVERSION_OLD:
+               cmd = EXT3_IOC_GETVERSION_OLD;
+               break;
+       case EXT3_IOC32_SETVERSION_OLD:
+               cmd = EXT3_IOC_SETVERSION_OLD;
+               break;
+#ifdef CONFIG_JBD_DEBUG
+       case EXT3_IOC32_WAIT_FOR_READONLY:
+               cmd = EXT3_IOC_WAIT_FOR_READONLY;
+               break;
+#endif
+       case EXT3_IOC32_GETRSVSZ:
+               cmd = EXT3_IOC_GETRSVSZ;
+               break;
+       case EXT3_IOC32_SETRSVSZ:
+               cmd = EXT3_IOC_SETRSVSZ;
+               break;
+       case EXT3_IOC_GROUP_ADD:
+               break;
+       default:
+               return -ENOIOCTLCMD;
+       }
+       lock_kernel();
+       ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+       unlock_kernel();
+       return ret;
+}
+#endif
index 85d132c37ee0f1a8a8c840432d7839f4d9b97af6..235e77b52ea59b2610c2ed5cf4af464f4cd12359 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/string.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include <linux/bio.h>
 #include <linux/smp_lock.h>
 
 #include "namei.h"
@@ -870,7 +871,7 @@ restart:
                                bh = ext3_getblk(NULL, dir, b++, 0, &err);
                                bh_use[ra_max] = bh;
                                if (bh)
-                                       ll_rw_block(READ, 1, &bh);
+                                       ll_rw_block(READ_META, 1, &bh);
                        }
                }
                if ((bh = bh_use[ra_ptr++]) == NULL)
index 698b85bb1dd45855a709635eb6239a4c9bbef0d6..3e50a41662834834538886e5cd0aa4d48d0c3b08 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/dirent.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/compat.h>
 #include <asm/uaccess.h>
 
 static inline loff_t fat_make_i_pos(struct super_block *sb,
@@ -741,10 +742,65 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
        return ret;
 }
 
+#ifdef CONFIG_COMPAT
+#define        VFAT_IOCTL_READDIR_BOTH32       _IOR('r', 1, struct compat_dirent[2])
+#define        VFAT_IOCTL_READDIR_SHORT32      _IOR('r', 2, struct compat_dirent[2])
+
+static long fat_compat_put_dirent32(struct dirent *d,
+                                   struct compat_dirent __user *d32)
+{
+        if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent)))
+                return -EFAULT;
+
+        __put_user(d->d_ino, &d32->d_ino);
+        __put_user(d->d_off, &d32->d_off);
+        __put_user(d->d_reclen, &d32->d_reclen);
+        if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen))
+               return -EFAULT;
+
+        return 0;
+}
+
+static long fat_compat_dir_ioctl(struct file *file, unsigned cmd,
+                                unsigned long arg)
+{
+       struct compat_dirent __user *p = compat_ptr(arg);
+       int ret;
+       mm_segment_t oldfs = get_fs();
+       struct dirent d[2];
+
+       switch (cmd) {
+       case VFAT_IOCTL_READDIR_BOTH32:
+               cmd = VFAT_IOCTL_READDIR_BOTH;
+               break;
+       case VFAT_IOCTL_READDIR_SHORT32:
+               cmd = VFAT_IOCTL_READDIR_SHORT;
+               break;
+       default:
+               return -ENOIOCTLCMD;
+       }
+
+       set_fs(KERNEL_DS);
+       lock_kernel();
+       ret = fat_dir_ioctl(file->f_dentry->d_inode, file,
+                           cmd, (unsigned long) &d);
+       unlock_kernel();
+       set_fs(oldfs);
+       if (ret >= 0) {
+               ret |= fat_compat_put_dirent32(&d[0], p);
+               ret |= fat_compat_put_dirent32(&d[1], p + 1);
+       }
+       return ret;
+}
+#endif /* CONFIG_COMPAT */
+
 const struct file_operations fat_dir_operations = {
        .read           = generic_read_dir,
        .readdir        = fat_readdir,
        .ioctl          = fat_dir_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = fat_compat_dir_ioctl,
+#endif
        .fsync          = file_fsync,
 };
 
index 892643dc9af10a1a6fbf7c858fd430ab209a5724..c403b66ec83c6525532c25decbe923c32356649b 100644 (file)
@@ -22,8 +22,7 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
-
-extern struct super_block *blockdev_superblock;
+#include "internal.h"
 
 /**
  *     __mark_inode_dirty -    internal function
@@ -320,7 +319,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
 
                if (!bdi_cap_writeback_dirty(bdi)) {
                        list_move(&inode->i_list, &sb->s_dirty);
-                       if (sb == blockdev_superblock) {
+                       if (sb_is_blkdev_sb(sb)) {
                                /*
                                 * Dirty memory-backed blockdev: the ramdisk
                                 * driver does this.  Skip just this inode
@@ -337,14 +336,14 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
 
                if (wbc->nonblocking && bdi_write_congested(bdi)) {
                        wbc->encountered_congestion = 1;
-                       if (sb != blockdev_superblock)
+                       if (!sb_is_blkdev_sb(sb))
                                break;          /* Skip a congested fs */
                        list_move(&inode->i_list, &sb->s_dirty);
                        continue;               /* Skip a congested blockdev */
                }
 
                if (wbc->bdi && bdi != wbc->bdi) {
-                       if (sb != blockdev_superblock)
+                       if (!sb_is_blkdev_sb(sb))
                                break;          /* fs has the wrong queue */
                        list_move(&inode->i_list, &sb->s_dirty);
                        continue;               /* blockdev has wrong queue */
index 8a1ca5ef7ada1f26468d031ee0882d61e350ca36..3915635b4470bcaa29ce094acb10b9620bf8d2c9 100644 (file)
@@ -246,12 +246,8 @@ struct hfsplus_readdir_data {
 
 /* ext2 ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) to support
  * chattr/lsattr */
-#define HFSPLUS_IOC_EXT2_GETFLAGS      _IOR('f', 1, long)
-#define HFSPLUS_IOC_EXT2_SETFLAGS      _IOW('f', 2, long)
-
-#define EXT2_FLAG_IMMUTABLE            0x00000010 /* Immutable file */
-#define EXT2_FLAG_APPEND               0x00000020 /* writes to file may only append */
-#define EXT2_FLAG_NODUMP               0x00000040 /* do not dump file */
+#define HFSPLUS_IOC_EXT2_GETFLAGS      FS_IOC_GETFLAGS
+#define HFSPLUS_IOC_EXT2_SETFLAGS      FS_IOC_SETFLAGS
 
 
 /*
index 13cf848ac8330ef03ebfbce82e3ce3ba07d5bc2c..79fd10402ea3479d3aaad8b7f6b780fa6a362216 100644 (file)
@@ -28,11 +28,11 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
        case HFSPLUS_IOC_EXT2_GETFLAGS:
                flags = 0;
                if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE)
-                       flags |= EXT2_FLAG_IMMUTABLE; /* EXT2_IMMUTABLE_FL */
+                       flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */
                if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND)
-                       flags |= EXT2_FLAG_APPEND; /* EXT2_APPEND_FL */
+                       flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */
                if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP)
-                       flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */
+                       flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */
                return put_user(flags, (int __user *)arg);
        case HFSPLUS_IOC_EXT2_SETFLAGS: {
                if (IS_RDONLY(inode))
@@ -44,32 +44,31 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
                if (get_user(flags, (int __user *)arg))
                        return -EFAULT;
 
-               if (flags & (EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND) ||
+               if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) ||
                    HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
                        if (!capable(CAP_LINUX_IMMUTABLE))
                                return -EPERM;
                }
 
                /* don't silently ignore unsupported ext2 flags */
-               if (flags & ~(EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND|
-                             EXT2_FLAG_NODUMP))
+               if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL))
                        return -EOPNOTSUPP;
 
-               if (flags & EXT2_FLAG_IMMUTABLE) { /* EXT2_IMMUTABLE_FL */
+               if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */
                        inode->i_flags |= S_IMMUTABLE;
                        HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
                } else {
                        inode->i_flags &= ~S_IMMUTABLE;
                        HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE;
                }
-               if (flags & EXT2_FLAG_APPEND) { /* EXT2_APPEND_FL */
+               if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */
                        inode->i_flags |= S_APPEND;
                        HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND;
                } else {
                        inode->i_flags &= ~S_APPEND;
                        HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND;
                }
-               if (flags & EXT2_FLAG_NODUMP) /* EXT2_NODUMP_FL */
+               if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */
                        HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP;
                else
                        HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP;
index abf77471e6c4a68701421dc6df6994de6e7000eb..ada7643104e14dee4bcc8a5dc5bd87b7398aad5f 100644 (file)
@@ -362,27 +362,6 @@ int invalidate_inodes(struct super_block * sb)
 }
 
 EXPORT_SYMBOL(invalidate_inodes);
-int __invalidate_device(struct block_device *bdev)
-{
-       struct super_block *sb = get_super(bdev);
-       int res = 0;
-
-       if (sb) {
-               /*
-                * no need to lock the super, get_super holds the
-                * read mutex so the filesystem cannot go away
-                * under us (->put_super runs with the write lock
-                * hold).
-                */
-               shrink_dcache_sb(sb);
-               res = invalidate_inodes(sb);
-               drop_super(sb);
-       }
-       invalidate_bdev(bdev, 0);
-       return res;
-}
-EXPORT_SYMBOL(__invalidate_device);
 
 static int can_unuse(struct inode *inode)
 {
diff --git a/fs/internal.h b/fs/internal.h
new file mode 100644 (file)
index 0000000..ea00126
--- /dev/null
@@ -0,0 +1,55 @@
+/* fs/ internal definitions
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/ioctl32.h>
+
+struct super_block;
+
+/*
+ * block_dev.c
+ */
+#ifdef CONFIG_BLOCK
+extern struct super_block *blockdev_superblock;
+extern void __init bdev_cache_init(void);
+
+static inline int sb_is_blkdev_sb(struct super_block *sb)
+{
+       return sb == blockdev_superblock;
+}
+
+#else
+static inline void bdev_cache_init(void)
+{
+}
+
+static inline int sb_is_blkdev_sb(struct super_block *sb)
+{
+       return 0;
+}
+#endif
+
+/*
+ * char_dev.c
+ */
+extern void __init chrdev_init(void);
+
+/*
+ * compat_ioctl.c
+ */
+#ifdef CONFIG_COMPAT
+extern struct ioctl_trans ioctl_start[];
+extern int ioctl_table_size;
+#endif
+
+/*
+ * namespace.c
+ */
+extern int copy_mount_options(const void __user *, unsigned long *);
index 78b1deae3fa2e1a9aaa4142fa83050b434840b90..6dc6721d9e822d159fbb7e68cfc047aafe6c4e34 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * fs/ioprio.c
  *
- * Copyright (C) 2004 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk>
  *
  * Helper functions for setting/querying io priorities of processes. The
  * system calls closely mimmick getpriority/setpriority, see the man page for
@@ -47,8 +47,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
        /* see wmb() in current_io_context() */
        smp_read_barrier_depends();
 
-       if (ioc && ioc->set_ioprio)
-               ioc->set_ioprio(ioc, ioprio);
+       if (ioc)
+               ioc->ioprio_changed = 1;
 
        task_unlock(task);
        return 0;
@@ -81,7 +81,12 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
        }
 
        ret = -ESRCH;
-       read_lock_irq(&tasklist_lock);
+       /*
+        * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
+        * so we can't use rcu_read_lock(). See re-copy of ->ioprio
+        * in copy_process().
+        */
+       read_lock(&tasklist_lock);
        switch (which) {
                case IOPRIO_WHO_PROCESS:
                        if (!who)
@@ -124,7 +129,7 @@ free_uid:
                        ret = -EINVAL;
        }
 
-       read_unlock_irq(&tasklist_lock);
+       read_unlock(&tasklist_lock);
        return ret;
 }
 
@@ -170,7 +175,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
        int ret = -ESRCH;
        int tmpio;
 
-       read_lock_irq(&tasklist_lock);
+       read_lock(&tasklist_lock);
        switch (which) {
                case IOPRIO_WHO_PROCESS:
                        if (!who)
@@ -221,7 +226,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
                        ret = -EINVAL;
        }
 
-       read_unlock_irq(&tasklist_lock);
+       read_unlock(&tasklist_lock);
        return ret;
 }
 
index 67b3774820eb663086675ef4a057066e56e93e94..37db524882628dd3b4a4eed92b7a3243d6e61b60 100644 (file)
@@ -6,7 +6,6 @@
  */
 
 #include <linux/fs.h>
-#include <linux/ext2_fs.h>
 #include <linux/ctype.h>
 #include <linux/capability.h>
 #include <linux/time.h>
@@ -22,13 +21,13 @@ static struct {
        long jfs_flag;
        long ext2_flag;
 } jfs_map[] = {
-       {JFS_NOATIME_FL, EXT2_NOATIME_FL},
-       {JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL},
-       {JFS_SYNC_FL, EXT2_SYNC_FL},
-       {JFS_SECRM_FL, EXT2_SECRM_FL},
-       {JFS_UNRM_FL, EXT2_UNRM_FL},
-       {JFS_APPEND_FL, EXT2_APPEND_FL},
-       {JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL},
+       {JFS_NOATIME_FL,        FS_NOATIME_FL},
+       {JFS_DIRSYNC_FL,        FS_DIRSYNC_FL},
+       {JFS_SYNC_FL,           FS_SYNC_FL},
+       {JFS_SECRM_FL,          FS_SECRM_FL},
+       {JFS_UNRM_FL,           FS_UNRM_FL},
+       {JFS_APPEND_FL,         FS_APPEND_FL},
+       {JFS_IMMUTABLE_FL,      FS_IMMUTABLE_FL},
        {0, 0},
 };
 
index 1e4598247d0b962f02eca7f390bf6683c9df3e4e..692a3e578fc8f1102823548dbc5d49a1a1850953 100644 (file)
@@ -693,6 +693,8 @@ out:
  * the call was made get new I/O started against them.  If wbc->sync_mode is
  * WB_SYNC_ALL then we were called for data integrity and we must wait for
  * existing IO to complete.
+ *
+ * If you fix this you should check generic_writepages() also!
  */
 int
 mpage_writepages(struct address_space *mapping,
index 6ede3a539ed82acb0eca0781ce78ba2fd86db1df..66d921e14feebaa532bed7e6a041ba08e5057a8c 100644 (file)
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/mount.h>
+#include <linux/ramfs.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include "pnode.h"
 
-extern int __init init_rootfs(void);
-
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 
index b674462793d3375cf2adab2d642a8526e956fa0d..f6675d2c386c29127a9e6bad4223f47a1dbd3e12 100644 (file)
@@ -51,7 +51,6 @@
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
-#include <linux/mpage.h>
 #include <linux/writeback.h>
 
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/no-block.c b/fs/no-block.c
new file mode 100644 (file)
index 0000000..d269a93
--- /dev/null
@@ -0,0 +1,22 @@
+/* no-block.c: implementation of routines required for non-BLOCK configuration
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+
+static int no_blkdev_open(struct inode * inode, struct file * filp)
+{
+       return -ENODEV;
+}
+
+const struct file_operations def_blk_fops = {
+       .open           = no_blkdev_open,
+};
index d713ce6b3e12935891bda7e21d301ec3d53038f8..67e665fdb7fccbde923238532d49f192931d43f9 100644 (file)
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y := check.o
+obj-$(CONFIG_BLOCK) := check.o
 
 obj-$(CONFIG_ACORN_PARTITION) += acorn.o
 obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
index 5bbd60896050545759f4aa603e12cfee9cd63295..66bc425f2f3db467344dd81c841442c5806b985e 100644 (file)
@@ -277,12 +277,15 @@ static int devinfo_show(struct seq_file *f, void *v)
                if (i == 0)
                        seq_printf(f, "Character devices:\n");
                chrdev_show(f, i);
-       } else {
+       }
+#ifdef CONFIG_BLOCK
+       else {
                i -= CHRDEV_MAJOR_HASH_SIZE;
                if (i == 0)
                        seq_printf(f, "\nBlock devices:\n");
                blkdev_show(f, i);
        }
+#endif
        return 0;
 }
 
@@ -355,6 +358,7 @@ static int stram_read_proc(char *page, char **start, off_t off,
 }
 #endif
 
+#ifdef CONFIG_BLOCK
 extern struct seq_operations partitions_op;
 static int partitions_open(struct inode *inode, struct file *file)
 {
@@ -378,6 +382,7 @@ static struct file_operations proc_diskstats_operations = {
        .llseek         = seq_lseek,
        .release        = seq_release,
 };
+#endif
 
 #ifdef CONFIG_MODULES
 extern struct seq_operations modules_op;
@@ -695,7 +700,9 @@ void __init proc_misc_init(void)
                entry->proc_fops = &proc_kmsg_operations;
        create_seq_entry("devices", 0, &proc_devinfo_operations);
        create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
+#ifdef CONFIG_BLOCK
        create_seq_entry("partitions", 0, &proc_partitions_operations);
+#endif
        create_seq_entry("stat", 0, &proc_stat_operations);
        create_seq_entry("interrupts", 0, &proc_interrupts_operations);
 #ifdef CONFIG_SLAB
@@ -707,7 +714,9 @@ void __init proc_misc_init(void)
        create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
        create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
        create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
+#ifdef CONFIG_BLOCK
        create_seq_entry("diskstats", 0, &proc_diskstats_operations);
+#endif
 #ifdef CONFIG_MODULES
        create_seq_entry("modules", 0, &proc_modules_operations);
 #endif
index d6a2be826e29880abd0f6873caa2344f88f2c5f8..b9dae76a0b6e2212083efa9ca0281e31661c0375 100644 (file)
@@ -337,6 +337,34 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
        return 0;
 }
 
+/*
+ * look up a superblock on which quota ops will be performed
+ * - use the name of a block device to find the superblock thereon
+ */
+static inline struct super_block *quotactl_block(const char __user *special)
+{
+#ifdef CONFIG_BLOCK
+       struct block_device *bdev;
+       struct super_block *sb;
+       char *tmp = getname(special);
+
+       if (IS_ERR(tmp))
+               return ERR_PTR(PTR_ERR(tmp));
+       bdev = lookup_bdev(tmp);
+       putname(tmp);
+       if (IS_ERR(bdev))
+               return ERR_PTR(PTR_ERR(bdev));
+       sb = get_super(bdev);
+       bdput(bdev);
+       if (!sb)
+               return ERR_PTR(-ENODEV);
+
+       return sb;
+#else
+       return ERR_PTR(-ENODEV);
+#endif
+}
+
 /*
  * This is the system call interface. This communicates with
  * the user-level programs. Currently this only supports diskquota
@@ -347,25 +375,15 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t
 {
        uint cmds, type;
        struct super_block *sb = NULL;
-       struct block_device *bdev;
-       char *tmp;
        int ret;
 
        cmds = cmd >> SUBCMDSHIFT;
        type = cmd & SUBCMDMASK;
 
        if (cmds != Q_SYNC || special) {
-               tmp = getname(special);
-               if (IS_ERR(tmp))
-                       return PTR_ERR(tmp);
-               bdev = lookup_bdev(tmp);
-               putname(tmp);
-               if (IS_ERR(bdev))
-                       return PTR_ERR(bdev);
-               sb = get_super(bdev);
-               bdput(bdev);
-               if (!sb)
-                       return -ENODEV;
+               sb = quotactl_block(special);
+               if (IS_ERR(sb))
+                       return PTR_ERR(sb);
        }
 
        ret = check_quotactl_valid(sb, type, cmds, id);
index 9aabcc0ccd2d002401e7611a93f5f6b8458136c0..657050ad7430e68777a326c18fad5a4d8a04cefb 100644 (file)
@@ -22,6 +22,9 @@ const struct file_operations reiserfs_dir_operations = {
        .readdir = reiserfs_readdir,
        .fsync = reiserfs_dir_fsync,
        .ioctl = reiserfs_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl = reiserfs_compat_ioctl,
+#endif
 };
 
 static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
index 1cfbe857ba27216c03243c6eea5b07ab9d72b1f4..3e08f7161a3ddafa86440d11865eedf09053ab49 100644 (file)
@@ -2,6 +2,7 @@
  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  */
 
+#include <linux/config.h>
 #include <linux/time.h>
 #include <linux/reiserfs_fs.h>
 #include <linux/reiserfs_acl.h>
@@ -1568,6 +1569,9 @@ const struct file_operations reiserfs_file_operations = {
        .read = generic_file_read,
        .write = reiserfs_file_write,
        .ioctl = reiserfs_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl = reiserfs_compat_ioctl,
+#endif
        .mmap = generic_file_mmap,
        .release = reiserfs_file_release,
        .fsync = reiserfs_sync_file,
index a986b5e1e288c8dd47eca5ea525c9cfe2fc26b77..9c57578cb831a2f869c7e277896b04b06a369c80 100644 (file)
@@ -9,6 +9,7 @@
 #include <asm/uaccess.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
+#include <linux/compat.h>
 
 static int reiserfs_unpack(struct inode *inode, struct file *filp);
 
@@ -94,6 +95,40 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
        }
 }
 
+#ifdef CONFIG_COMPAT
+long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
+                               unsigned long arg)
+{
+       struct inode *inode = file->f_dentry->d_inode;
+       int ret;
+
+       /* These are just misnamed, they actually get/put from/to user an int */
+       switch (cmd) {
+       case REISERFS_IOC32_UNPACK:
+               cmd = REISERFS_IOC_UNPACK;
+               break;
+       case REISERFS_IOC32_GETFLAGS:
+               cmd = REISERFS_IOC_GETFLAGS;
+               break;
+       case REISERFS_IOC32_SETFLAGS:
+               cmd = REISERFS_IOC_SETFLAGS;
+               break;
+       case REISERFS_IOC32_GETVERSION:
+               cmd = REISERFS_IOC_GETVERSION;
+               break;
+       case REISERFS_IOC32_SETVERSION:
+               cmd = REISERFS_IOC_SETVERSION;
+               break;
+       default:
+               return -ENOIOCTLCMD;
+       }
+       lock_kernel();
+       ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+       unlock_kernel();
+       return ret;
+}
+#endif
+
 /*
 ** reiserfs_unpack
 ** Function try to convert tail from direct item into indirect.
index 684bca3d3a107c021715f8394a1f1d23a1913aaa..13e92dd19fbb1b9165f6cd05e02128c2a0a39cd8 100644 (file)
@@ -12,7 +12,7 @@
  * Jens to support splicing to files, network, direct splicing, etc and
  * fixing lots of bugs.
  *
- * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk>
  * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
  * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
  *
index 6987824d0dce0ec94713d3f394e8d2c8535c04aa..aec99ddbe53f726a526d4cd0b02abb9f55179e0b 100644 (file)
@@ -220,6 +220,37 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
        return 0;
 }
 
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock.  Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.  Requires a second blkdev
+ * flush by the caller to complete the operation.
+ */
+void __fsync_super(struct super_block *sb)
+{
+       sync_inodes_sb(sb, 0);
+       DQUOT_SYNC(sb);
+       lock_super(sb);
+       if (sb->s_dirt && sb->s_op->write_super)
+               sb->s_op->write_super(sb);
+       unlock_super(sb);
+       if (sb->s_op->sync_fs)
+               sb->s_op->sync_fs(sb, 1);
+       sync_blockdev(sb->s_bdev);
+       sync_inodes_sb(sb, 1);
+}
+
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock.  Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.
+ */
+int fsync_super(struct super_block *sb)
+{
+       __fsync_super(sb);
+       return sync_blockdev(sb->s_bdev);
+}
+
 /**
  *     generic_shutdown_super  -       common helper for ->kill_sb()
  *     @sb: superblock to kill
@@ -540,8 +571,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 {
        int retval;
        
+#ifdef CONFIG_BLOCK
        if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
                return -EACCES;
+#endif
        if (flags & MS_RDONLY)
                acct_auto_close(sb);
        shrink_dcache_sb(sb);
@@ -661,6 +694,7 @@ void kill_litter_super(struct super_block *sb)
 
 EXPORT_SYMBOL(kill_litter_super);
 
+#ifdef CONFIG_BLOCK
 static int set_bdev_super(struct super_block *s, void *data)
 {
        s->s_bdev = data;
@@ -756,6 +790,7 @@ void kill_block_super(struct super_block *sb)
 }
 
 EXPORT_SYMBOL(kill_block_super);
+#endif
 
 int get_sb_nodev(struct file_system_type *fs_type,
        int flags, void *data,
index 955aef04da289fecb80f51981ae98f538e6bcdc2..1de747b5ddb9dcaf12ac0dc164f9a614db7b6d13 100644 (file)
--- a/fs/sync.c
+++ b/fs/sync.c
 #include <linux/syscalls.h>
 #include <linux/linkage.h>
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
 
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
                        SYNC_FILE_RANGE_WAIT_AFTER)
 
+/*
+ * sync everything.  Start out by waking pdflush, because that writes back
+ * all queues in parallel.
+ */
+static void do_sync(unsigned long wait)
+{
+       wakeup_pdflush(0);
+       sync_inodes(0);         /* All mappings, inodes and their blockdevs */
+       DQUOT_SYNC(NULL);
+       sync_supers();          /* Write the superblocks */
+       sync_filesystems(0);    /* Start syncing the filesystems */
+       sync_filesystems(wait); /* Waitingly sync the filesystems */
+       sync_inodes(wait);      /* Mappings, inodes and blockdevs, again. */
+       if (!wait)
+               printk("Emergency Sync complete\n");
+       if (unlikely(laptop_mode))
+               laptop_sync_completion();
+}
+
+asmlinkage long sys_sync(void)
+{
+       do_sync(1);
+       return 0;
+}
+
+void emergency_sync(void)
+{
+       pdflush_operation(do_sync, 0);
+}
+
+/*
+ * Generic function to fsync a file.
+ *
+ * filp may be NULL if called via the msync of a vma.
+ */
+int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+{
+       struct inode * inode = dentry->d_inode;
+       struct super_block * sb;
+       int ret, err;
+
+       /* sync the inode to buffers */
+       ret = write_inode_now(inode, 0);
+
+       /* sync the superblock to buffers */
+       sb = inode->i_sb;
+       lock_super(sb);
+       if (sb->s_op->write_super)
+               sb->s_op->write_super(sb);
+       unlock_super(sb);
+
+       /* .. finally sync the buffers to disk */
+       err = sync_blockdev(sb->s_bdev);
+       if (!ret)
+               ret = err;
+       return ret;
+}
+
+long do_fsync(struct file *file, int datasync)
+{
+       int ret;
+       int err;
+       struct address_space *mapping = file->f_mapping;
+
+       if (!file->f_op || !file->f_op->fsync) {
+               /* Why?  We can still call filemap_fdatawrite */
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ret = filemap_fdatawrite(mapping);
+
+       /*
+        * We need to protect against concurrent writers, which could cause
+        * livelocks in fsync_buffers_list().
+        */
+       mutex_lock(&mapping->host->i_mutex);
+       err = file->f_op->fsync(file, file->f_dentry, datasync);
+       if (!ret)
+               ret = err;
+       mutex_unlock(&mapping->host->i_mutex);
+       err = filemap_fdatawait(mapping);
+       if (!ret)
+               ret = err;
+out:
+       return ret;
+}
+
+static long __do_fsync(unsigned int fd, int datasync)
+{
+       struct file *file;
+       int ret = -EBADF;
+
+       file = fget(fd);
+       if (file) {
+               ret = do_fsync(file, datasync);
+               fput(file);
+       }
+       return ret;
+}
+
+asmlinkage long sys_fsync(unsigned int fd)
+{
+       return __do_fsync(fd, 0);
+}
+
+asmlinkage long sys_fdatasync(unsigned int fd)
+{
+       return __do_fsync(fd, 1);
+}
+
 /*
  * sys_sync_file_range() permits finely controlled syncing over a segment of
  * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
index 26b364c9d62c67226de54101ae6f7faa31f33251..35115bca036e01d5363c6103100a875c232784e0 100644 (file)
@@ -1,5 +1,6 @@
 config XFS_FS
        tristate "XFS filesystem support"
+       depends on BLOCK
        help
          XFS is a high performance journaling filesystem which originated
          on the SGI IRIX platform.  It is completely multi-threaded, can
index 76bdaeab6f622240fe8f898a9caf0815770f7c38..711c321a70119f07b6bc1d6192caec4f64aa017c 100644 (file)
@@ -148,6 +148,7 @@ struct bio {
 #define BIO_RW_BARRIER 2
 #define BIO_RW_FAILFAST        3
 #define BIO_RW_SYNC    4
+#define BIO_RW_META    5
 
 /*
  * upper 16 bits of bi_rw define the io priority of this bio
@@ -178,6 +179,7 @@ struct bio {
 #define bio_sync(bio)          ((bio)->bi_rw & (1 << BIO_RW_SYNC))
 #define bio_failfast(bio)      ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
 #define bio_rw_ahead(bio)      ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
+#define bio_rw_meta(bio)       ((bio)->bi_rw & (1 << BIO_RW_META))
 
 /*
  * will die
index cfde8b3ee9199293b17c883bd7a2292ecbad44c2..1d79b8d4ca6dcc21f2e9f80775883350eb389547 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _LINUX_BLKDEV_H
 #define _LINUX_BLKDEV_H
 
+#include <linux/sched.h>
 #include <linux/major.h>
 #include <linux/genhd.h>
 #include <linux/list.h>
 
 #include <asm/scatterlist.h>
 
+#ifdef CONFIG_LBD
+# include <asm/div64.h>
+# define sector_div(a, b) do_div(a, b)
+#else
+# define sector_div(n, b)( \
+{ \
+       int _res; \
+       _res = (n) % (b); \
+       (n) /= (b); \
+       _res; \
+} \
+)
+#endif
+
+#ifdef CONFIG_BLOCK
+
 struct scsi_ioctl_command;
 
 struct request_queue;
@@ -90,7 +107,7 @@ struct io_context {
        atomic_t refcount;
        struct task_struct *task;
 
-       int (*set_ioprio)(struct io_context *, unsigned int);
+       unsigned int ioprio_changed;
 
        /*
         * For request batching
@@ -104,8 +121,7 @@ struct io_context {
 
 void put_io_context(struct io_context *ioc);
 void exit_io_context(void);
-struct io_context *current_io_context(gfp_t gfp_flags);
-struct io_context *get_io_context(gfp_t gfp_flags);
+struct io_context *get_io_context(gfp_t gfp_flags, int node);
 void copy_io_context(struct io_context **pdst, struct io_context **psrc);
 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
 
@@ -120,6 +136,90 @@ struct request_list {
        wait_queue_head_t wait[2];
 };
 
+/*
+ * request command types
+ */
+enum rq_cmd_type_bits {
+       REQ_TYPE_FS             = 1,    /* fs request */
+       REQ_TYPE_BLOCK_PC,              /* scsi command */
+       REQ_TYPE_SENSE,                 /* sense request */
+       REQ_TYPE_PM_SUSPEND,            /* suspend request */
+       REQ_TYPE_PM_RESUME,             /* resume request */
+       REQ_TYPE_PM_SHUTDOWN,           /* shutdown request */
+       REQ_TYPE_FLUSH,                 /* flush request */
+       REQ_TYPE_SPECIAL,               /* driver defined type */
+       REQ_TYPE_LINUX_BLOCK,           /* generic block layer message */
+       /*
+        * for ATA/ATAPI devices. this really doesn't belong here, ide should
+        * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
+        * private REQ_LB opcodes to differentiate what type of request this is
+        */
+       REQ_TYPE_ATA_CMD,
+       REQ_TYPE_ATA_TASK,
+       REQ_TYPE_ATA_TASKFILE,
+};
+
+/*
+ * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
+ * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
+ * SCSI cdb.
+ *
+ * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
+ * typically to differentiate REQ_TYPE_SPECIAL requests.
+ *
+ */
+enum {
+       /*
+        * just examples for now
+        */
+       REQ_LB_OP_EJECT = 0x40,         /* eject request */
+       REQ_LB_OP_FLUSH = 0x41,         /* flush device */
+};
+
+/*
+ * request type modified bits. first three bits match BIO_RW* bits, important
+ */
+enum rq_flag_bits {
+       __REQ_RW,               /* not set, read. set, write */
+       __REQ_FAILFAST,         /* no low level driver retries */
+       __REQ_SORTED,           /* elevator knows about this request */
+       __REQ_SOFTBARRIER,      /* may not be passed by ioscheduler */
+       __REQ_HARDBARRIER,      /* may not be passed by drive either */
+       __REQ_FUA,              /* forced unit access */
+       __REQ_NOMERGE,          /* don't touch this for merging */
+       __REQ_STARTED,          /* drive already may have started this one */
+       __REQ_DONTPREP,         /* don't call prep for this one */
+       __REQ_QUEUED,           /* uses queueing */
+       __REQ_ELVPRIV,          /* elevator private data attached */
+       __REQ_FAILED,           /* set if the request failed */
+       __REQ_QUIET,            /* don't worry about errors */
+       __REQ_PREEMPT,          /* set for "ide_preempt" requests */
+       __REQ_ORDERED_COLOR,    /* is before or after barrier */
+       __REQ_RW_SYNC,          /* request is sync (O_DIRECT) */
+       __REQ_ALLOCED,          /* request came from our alloc pool */
+       __REQ_RW_META,          /* metadata io request */
+       __REQ_NR_BITS,          /* stops here */
+};
+
+#define REQ_RW         (1 << __REQ_RW)
+#define REQ_FAILFAST   (1 << __REQ_FAILFAST)
+#define REQ_SORTED     (1 << __REQ_SORTED)
+#define REQ_SOFTBARRIER        (1 << __REQ_SOFTBARRIER)
+#define REQ_HARDBARRIER        (1 << __REQ_HARDBARRIER)
+#define REQ_FUA                (1 << __REQ_FUA)
+#define REQ_NOMERGE    (1 << __REQ_NOMERGE)
+#define REQ_STARTED    (1 << __REQ_STARTED)
+#define REQ_DONTPREP   (1 << __REQ_DONTPREP)
+#define REQ_QUEUED     (1 << __REQ_QUEUED)
+#define REQ_ELVPRIV    (1 << __REQ_ELVPRIV)
+#define REQ_FAILED     (1 << __REQ_FAILED)
+#define REQ_QUIET      (1 << __REQ_QUIET)
+#define REQ_PREEMPT    (1 << __REQ_PREEMPT)
+#define REQ_ORDERED_COLOR      (1 << __REQ_ORDERED_COLOR)
+#define REQ_RW_SYNC    (1 << __REQ_RW_SYNC)
+#define REQ_ALLOCED    (1 << __REQ_ALLOCED)
+#define REQ_RW_META    (1 << __REQ_RW_META)
+
 #define BLK_MAX_CDB    16
 
 /*
@@ -129,30 +229,46 @@ struct request {
        struct list_head queuelist;
        struct list_head donelist;
 
-       unsigned long flags;            /* see REQ_ bits below */
+       request_queue_t *q;
+
+       unsigned int cmd_flags;
+       enum rq_cmd_type_bits cmd_type;
 
        /* Maintain bio traversal state for part by part I/O submission.
         * hard_* are block layer internals, no driver should touch them!
         */
 
        sector_t sector;                /* next sector to submit */
+       sector_t hard_sector;           /* next sector to complete */
        unsigned long nr_sectors;       /* no. of sectors left to submit */
+       unsigned long hard_nr_sectors;  /* no. of sectors left to complete */
        /* no. of sectors left to submit in the current segment */
        unsigned int current_nr_sectors;
 
-       sector_t hard_sector;           /* next sector to complete */
-       unsigned long hard_nr_sectors;  /* no. of sectors left to complete */
        /* no. of sectors left to complete in the current segment */
        unsigned int hard_cur_sectors;
 
        struct bio *bio;
        struct bio *biotail;
 
+       struct hlist_node hash; /* merge hash */
+       /*
+        * The rb_node is only used inside the io scheduler, requests
+        * are pruned when moved to the dispatch queue. So let the
+        * completion_data share space with the rb_node.
+        */
+       union {
+               struct rb_node rb_node; /* sort/lookup */
+               void *completion_data;
+       };
+
+       /*
+        * two pointers are available for the IO schedulers, if they need
+        * more they have to dynamically allocate it.
+        */
        void *elevator_private;
-       void *completion_data;
+       void *elevator_private2;
 
-       int rq_status;  /* should split this into a few status bits */
-       int errors;
        struct gendisk *rq_disk;
        unsigned long start_time;
 
@@ -170,15 +286,13 @@ struct request {
 
        unsigned short ioprio;
 
+       void *special;
+       char *buffer;
+
        int tag;
+       int errors;
 
        int ref_count;
-       request_queue_t *q;
-       struct request_list *rl;
-
-       struct completion *waiting;
-       void *special;
-       char *buffer;
 
        /*
         * when request is used as a packet command carrier
@@ -195,80 +309,14 @@ struct request {
        int retries;
 
        /*
-        * completion callback. end_io_data should be folded in with waiting
+        * completion callback.
         */
        rq_end_io_fn *end_io;
        void *end_io_data;
 };
 
 /*
- * first three bits match BIO_RW* bits, important
- */
-enum rq_flag_bits {
-       __REQ_RW,               /* not set, read. set, write */
-       __REQ_FAILFAST,         /* no low level driver retries */
-       __REQ_SORTED,           /* elevator knows about this request */
-       __REQ_SOFTBARRIER,      /* may not be passed by ioscheduler */
-       __REQ_HARDBARRIER,      /* may not be passed by drive either */
-       __REQ_FUA,              /* forced unit access */
-       __REQ_CMD,              /* is a regular fs rw request */
-       __REQ_NOMERGE,          /* don't touch this for merging */
-       __REQ_STARTED,          /* drive already may have started this one */
-       __REQ_DONTPREP,         /* don't call prep for this one */
-       __REQ_QUEUED,           /* uses queueing */
-       __REQ_ELVPRIV,          /* elevator private data attached */
-       /*
-        * for ATA/ATAPI devices
-        */
-       __REQ_PC,               /* packet command (special) */
-       __REQ_BLOCK_PC,         /* queued down pc from block layer */
-       __REQ_SENSE,            /* sense retrival */
-
-       __REQ_FAILED,           /* set if the request failed */
-       __REQ_QUIET,            /* don't worry about errors */
-       __REQ_SPECIAL,          /* driver suplied command */
-       __REQ_DRIVE_CMD,
-       __REQ_DRIVE_TASK,
-       __REQ_DRIVE_TASKFILE,
-       __REQ_PREEMPT,          /* set for "ide_preempt" requests */
-       __REQ_PM_SUSPEND,       /* suspend request */
-       __REQ_PM_RESUME,        /* resume request */
-       __REQ_PM_SHUTDOWN,      /* shutdown request */
-       __REQ_ORDERED_COLOR,    /* is before or after barrier */
-       __REQ_RW_SYNC,          /* request is sync (O_DIRECT) */
-       __REQ_NR_BITS,          /* stops here */
-};
-
-#define REQ_RW         (1 << __REQ_RW)
-#define REQ_FAILFAST   (1 << __REQ_FAILFAST)
-#define REQ_SORTED     (1 << __REQ_SORTED)
-#define REQ_SOFTBARRIER        (1 << __REQ_SOFTBARRIER)
-#define REQ_HARDBARRIER        (1 << __REQ_HARDBARRIER)
-#define REQ_FUA                (1 << __REQ_FUA)
-#define REQ_CMD                (1 << __REQ_CMD)
-#define REQ_NOMERGE    (1 << __REQ_NOMERGE)
-#define REQ_STARTED    (1 << __REQ_STARTED)
-#define REQ_DONTPREP   (1 << __REQ_DONTPREP)
-#define REQ_QUEUED     (1 << __REQ_QUEUED)
-#define REQ_ELVPRIV    (1 << __REQ_ELVPRIV)
-#define REQ_PC         (1 << __REQ_PC)
-#define REQ_BLOCK_PC   (1 << __REQ_BLOCK_PC)
-#define REQ_SENSE      (1 << __REQ_SENSE)
-#define REQ_FAILED     (1 << __REQ_FAILED)
-#define REQ_QUIET      (1 << __REQ_QUIET)
-#define REQ_SPECIAL    (1 << __REQ_SPECIAL)
-#define REQ_DRIVE_CMD  (1 << __REQ_DRIVE_CMD)
-#define REQ_DRIVE_TASK (1 << __REQ_DRIVE_TASK)
-#define REQ_DRIVE_TASKFILE     (1 << __REQ_DRIVE_TASKFILE)
-#define REQ_PREEMPT    (1 << __REQ_PREEMPT)
-#define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND)
-#define REQ_PM_RESUME  (1 << __REQ_PM_RESUME)
-#define REQ_PM_SHUTDOWN        (1 << __REQ_PM_SHUTDOWN)
-#define REQ_ORDERED_COLOR      (1 << __REQ_ORDERED_COLOR)
-#define REQ_RW_SYNC    (1 << __REQ_RW_SYNC)
-
-/*
- * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
+ * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
  * requests. Some step values could eventually be made generic.
  */
 struct request_pm_state
@@ -432,9 +480,6 @@ struct request_queue
        struct mutex            sysfs_lock;
 };
 
-#define RQ_INACTIVE            (-1)
-#define RQ_ACTIVE              1
-
 #define QUEUE_FLAG_CLUSTER     0       /* cluster several segments into 1 */
 #define QUEUE_FLAG_QUEUED      1       /* uses generic tag queueing */
 #define QUEUE_FLAG_STOPPED     2       /* queue is stopped */
@@ -490,25 +535,34 @@ enum {
 #define blk_queue_stopped(q)   test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_flushing(q)  ((q)->ordseq)
 
-#define blk_fs_request(rq)     ((rq)->flags & REQ_CMD)
-#define blk_pc_request(rq)     ((rq)->flags & REQ_BLOCK_PC)
-#define blk_noretry_request(rq)        ((rq)->flags & REQ_FAILFAST)
-#define blk_rq_started(rq)     ((rq)->flags & REQ_STARTED)
+#define blk_fs_request(rq)     ((rq)->cmd_type == REQ_TYPE_FS)
+#define blk_pc_request(rq)     ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
+#define blk_special_request(rq)        ((rq)->cmd_type == REQ_TYPE_SPECIAL)
+#define blk_sense_request(rq)  ((rq)->cmd_type == REQ_TYPE_SENSE)
+
+#define blk_noretry_request(rq)        ((rq)->cmd_flags & REQ_FAILFAST)
+#define blk_rq_started(rq)     ((rq)->cmd_flags & REQ_STARTED)
 
 #define blk_account_rq(rq)     (blk_rq_started(rq) && blk_fs_request(rq))
 
-#define blk_pm_suspend_request(rq)     ((rq)->flags & REQ_PM_SUSPEND)
-#define blk_pm_resume_request(rq)      ((rq)->flags & REQ_PM_RESUME)
+#define blk_pm_suspend_request(rq)     ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
+#define blk_pm_resume_request(rq)      ((rq)->cmd_type == REQ_TYPE_PM_RESUME)
 #define blk_pm_request(rq)     \
-       ((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME))
+       (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
 
-#define blk_sorted_rq(rq)      ((rq)->flags & REQ_SORTED)
-#define blk_barrier_rq(rq)     ((rq)->flags & REQ_HARDBARRIER)
-#define blk_fua_rq(rq)         ((rq)->flags & REQ_FUA)
+#define blk_sorted_rq(rq)      ((rq)->cmd_flags & REQ_SORTED)
+#define blk_barrier_rq(rq)     ((rq)->cmd_flags & REQ_HARDBARRIER)
+#define blk_fua_rq(rq)         ((rq)->cmd_flags & REQ_FUA)
 
 #define list_entry_rq(ptr)     list_entry((ptr), struct request, queuelist)
 
-#define rq_data_dir(rq)                ((rq)->flags & 1)
+#define rq_data_dir(rq)                ((rq)->cmd_flags & 1)
+
+/*
+ * We regard a request as sync, if it's a READ or a SYNC write.
+ */
+#define rq_is_sync(rq)         (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC)
+#define rq_is_meta(rq)         ((rq)->cmd_flags & REQ_RW_META)
 
 static inline int blk_queue_full(struct request_queue *q, int rw)
 {
@@ -541,13 +595,7 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw)
 #define RQ_NOMERGE_FLAGS       \
        (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
 #define rq_mergeable(rq)       \
-       (!((rq)->flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
-
-/*
- * noop, requests are automagically marked as active/inactive by I/O
- * scheduler -- see elv_next_request
- */
-#define blk_queue_headactive(q, head_active)
+       (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
 
 /*
  * q->prep_rq_fn return values
@@ -586,11 +634,6 @@ static inline void blk_queue_bounce(request_queue_t *q, struct bio **bio)
        if ((rq->bio))                  \
                for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
 
-struct sec_size {
-       unsigned block_size;
-       unsigned block_size_bits;
-};
-
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 extern void register_disk(struct gendisk *dev);
@@ -612,6 +655,7 @@ extern void blk_stop_queue(request_queue_t *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(request_queue_t *q);
 extern void blk_run_queue(request_queue_t *);
+extern void blk_start_queueing(request_queue_t *);
 extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
 extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int);
 extern int blk_rq_unmap_user(struct bio *, unsigned int);
@@ -655,16 +699,6 @@ extern void end_that_request_last(struct request *, int);
 extern void end_request(struct request *req, int uptodate);
 extern void blk_complete_request(struct request *);
 
-static inline int rq_all_done(struct request *rq, unsigned int nr_bytes)
-{
-       if (blk_fs_request(rq))
-               return (nr_bytes >= (rq->hard_nr_sectors << 9));
-       else if (blk_pc_request(rq))
-               return nr_bytes >= rq->data_len;
-
-       return 0;
-}
-
 /*
  * end_that_request_first/chunk() takes an uptodate argument. we account
  * any value <= as an io error. 0 means -EIO for compatability reasons,
@@ -678,21 +712,6 @@ static inline void blkdev_dequeue_request(struct request *req)
        elv_dequeue_request(req->q, req);
 }
 
-/*
- * This should be in elevator.h, but that requires pulling in rq and q
- */
-static inline void elv_dispatch_add_tail(struct request_queue *q,
-                                        struct request *rq)
-{
-       if (q->last_merge == rq)
-               q->last_merge = NULL;
-       q->nr_sorted--;
-
-       q->end_sector = rq_end_sector(rq);
-       q->boundary_rq = rq;
-       list_add_tail(&rq->queuelist, &q->queue_head);
-}
-
 /*
  * Access functions for manipulating queue properties
  */
@@ -737,7 +756,7 @@ extern void blk_put_queue(request_queue_t *);
  */
 #define blk_queue_tag_depth(q)         ((q)->queue_tags->busy)
 #define blk_queue_tag_queue(q)         ((q)->queue_tags->busy < (q)->queue_tags->max_depth)
-#define blk_rq_tagged(rq)              ((rq)->flags & REQ_QUEUED)
+#define blk_rq_tagged(rq)              ((rq)->cmd_flags & REQ_QUEUED)
 extern int blk_queue_start_tag(request_queue_t *, struct request *);
 extern struct request *blk_queue_find_tag(request_queue_t *, int);
 extern void blk_queue_end_tag(request_queue_t *, struct request *);
@@ -787,14 +806,6 @@ static inline int queue_dma_alignment(request_queue_t *q)
        return retval;
 }
 
-static inline int bdev_dma_aligment(struct block_device *bdev)
-{
-       return queue_dma_alignment(bdev_get_queue(bdev));
-}
-
-#define blk_finished_io(nsects)        do { } while (0)
-#define blk_started_io(nsects) do { } while (0)
-
 /* assumes size > 256 */
 static inline unsigned int blksize_bits(unsigned int size)
 {
@@ -824,24 +835,32 @@ struct work_struct;
 int kblockd_schedule_work(struct work_struct *work);
 void kblockd_flush(void);
 
-#ifdef CONFIG_LBD
-# include <asm/div64.h>
-# define sector_div(a, b) do_div(a, b)
-#else
-# define sector_div(n, b)( \
-{ \
-       int _res; \
-       _res = (n) % (b); \
-       (n) /= (b); \
-       _res; \
-} \
-)
-#endif 
-
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
        MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
        MODULE_ALIAS("block-major-" __stringify(major) "-*")
 
 
+#else /* CONFIG_BLOCK */
+/*
+ * stubs for when the block layer is configured out
+ */
+#define buffer_heads_over_limit 0
+
+static inline long blk_congestion_wait(int rw, long timeout)
+{
+       return io_schedule_timeout(timeout);
+}
+
+static inline long nr_blockdev_pages(void)
+{
+       return 0;
+}
+
+static inline void exit_io_context(void)
+{
+}
+
+#endif /* CONFIG_BLOCK */
+
 #endif
index 7520cc1ff9e2b9d3947f3cb1ace868b329394555..b99a714fcac67adf74e85650abcfa7ecaf6ef7ab 100644 (file)
@@ -20,6 +20,7 @@ enum blktrace_cat {
        BLK_TC_PC       = 1 << 9,       /* pc requests */
        BLK_TC_NOTIFY   = 1 << 10,      /* special message */
        BLK_TC_AHEAD    = 1 << 11,      /* readahead */
+       BLK_TC_META     = 1 << 12,      /* metadata */
 
        BLK_TC_END      = 1 << 15,      /* only 16-bits, reminder */
 };
@@ -148,7 +149,7 @@ static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
                                    u32 what)
 {
        struct blk_trace *bt = q->blk_trace;
-       int rw = rq->flags & 0x03;
+       int rw = rq->cmd_flags & 0x03;
 
        if (likely(!bt))
                return;
index 737e407d0cd11e7479e6d38d3cf48562eec4f596..131ffd37e716fb7ac4a386eaea9ff58c26bf639d 100644 (file)
@@ -14,6 +14,8 @@
 #include <linux/wait.h>
 #include <asm/atomic.h>
 
+#ifdef CONFIG_BLOCK
+
 enum bh_state_bits {
        BH_Uptodate,    /* Contains valid data */
        BH_Dirty,       /* Is dirty */
@@ -190,9 +192,7 @@ extern int buffer_heads_over_limit;
  * Generic address_space_operations implementations for buffer_head-backed
  * address_spaces.
  */
-int try_to_release_page(struct page * page, gfp_t gfp_mask);
 void block_invalidatepage(struct page *page, unsigned long offset);
-void do_invalidatepage(struct page *page, unsigned long offset);
 int block_write_full_page(struct page *page, get_block_t *get_block,
                                struct writeback_control *wbc);
 int block_read_full_page(struct page*, get_block_t*);
@@ -302,4 +302,19 @@ static inline void lock_buffer(struct buffer_head *bh)
                __lock_buffer(bh);
 }
 
+extern int __set_page_dirty_buffers(struct page *page);
+
+#else /* CONFIG_BLOCK */
+
+static inline void buffer_init(void) {}
+static inline int try_to_free_buffers(struct page *page) { return 1; }
+static inline int sync_blockdev(struct block_device *bdev) { return 0; }
+static inline int inode_has_buffers(struct inode *inode) { return 0; }
+static inline void invalidate_inode_buffers(struct inode *inode) {}
+static inline int remove_inode_buffers(struct inode *inode) { return 1; }
+static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
+static inline void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) {}
+
+
+#endif /* CONFIG_BLOCK */
 #endif /* _LINUX_BUFFER_HEAD_H */
index bea0255196c435c5c1ba986c7c9361034f36ad86..d61ef59515381310ccc2b1d1d7126e0a271ba4cc 100644 (file)
@@ -90,6 +90,7 @@ COMPATIBLE_IOCTL(FDTWADDLE)
 COMPATIBLE_IOCTL(FDFMTTRK)
 COMPATIBLE_IOCTL(FDRAWCMD)
 /* 0x12 */
+#ifdef CONFIG_BLOCK
 COMPATIBLE_IOCTL(BLKRASET)
 COMPATIBLE_IOCTL(BLKROSET)
 COMPATIBLE_IOCTL(BLKROGET)
@@ -103,6 +104,7 @@ COMPATIBLE_IOCTL(BLKTRACESETUP)
 COMPATIBLE_IOCTL(BLKTRACETEARDOWN)
 ULONG_IOCTL(BLKRASET)
 ULONG_IOCTL(BLKFRASET)
+#endif
 /* RAID */
 COMPATIBLE_IOCTL(RAID_VERSION)
 COMPATIBLE_IOCTL(GET_ARRAY_INFO)
@@ -395,12 +397,6 @@ COMPATIBLE_IOCTL(DVD_WRITE_STRUCT)
 COMPATIBLE_IOCTL(DVD_AUTH)
 /* pktcdvd */
 COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
-/* Big L */
-ULONG_IOCTL(LOOP_SET_FD)
-ULONG_IOCTL(LOOP_CHANGE_FD)
-COMPATIBLE_IOCTL(LOOP_CLR_FD)
-COMPATIBLE_IOCTL(LOOP_GET_STATUS64)
-COMPATIBLE_IOCTL(LOOP_SET_STATUS64)
 /* Big A */
 /* sparc only */
 /* Big Q for sound/OSS */
index 1713ace808bfb4d581ccfff53aa4b0c42ebfa9ef..b3370ef5164d0589d3300e91162c0b5599045de1 100644 (file)
@@ -1,12 +1,16 @@
 #ifndef _LINUX_ELEVATOR_H
 #define _LINUX_ELEVATOR_H
 
+#include <linux/percpu.h>
+
+#ifdef CONFIG_BLOCK
+
 typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
                                 struct bio *);
 
 typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *);
 
-typedef void (elevator_merged_fn) (request_queue_t *, struct request *);
+typedef void (elevator_merged_fn) (request_queue_t *, struct request *, int);
 
 typedef int (elevator_dispatch_fn) (request_queue_t *, int);
 
@@ -14,9 +18,9 @@ typedef void (elevator_add_req_fn) (request_queue_t *, struct request *);
 typedef int (elevator_queue_empty_fn) (request_queue_t *);
 typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
 typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
-typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *);
+typedef int (elevator_may_queue_fn) (request_queue_t *, int);
 
-typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t);
+typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, gfp_t);
 typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
 typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *);
 typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *);
@@ -82,19 +86,21 @@ struct elevator_queue
        struct kobject kobj;
        struct elevator_type *elevator_type;
        struct mutex sysfs_lock;
+       struct hlist_head *hash;
 };
 
 /*
  * block elevator interface
  */
 extern void elv_dispatch_sort(request_queue_t *, struct request *);
+extern void elv_dispatch_add_tail(request_queue_t *, struct request *);
 extern void elv_add_request(request_queue_t *, struct request *, int, int);
 extern void __elv_add_request(request_queue_t *, struct request *, int, int);
 extern void elv_insert(request_queue_t *, struct request *, int);
 extern int elv_merge(request_queue_t *, struct request **, struct bio *);
 extern void elv_merge_requests(request_queue_t *, struct request *,
                               struct request *);
-extern void elv_merged_request(request_queue_t *, struct request *);
+extern void elv_merged_request(request_queue_t *, struct request *, int);
 extern void elv_dequeue_request(request_queue_t *, struct request *);
 extern void elv_requeue_request(request_queue_t *, struct request *);
 extern int elv_queue_empty(request_queue_t *);
@@ -103,9 +109,9 @@ extern struct request *elv_former_request(request_queue_t *, struct request *);
 extern struct request *elv_latter_request(request_queue_t *, struct request *);
 extern int elv_register_queue(request_queue_t *q);
 extern void elv_unregister_queue(request_queue_t *q);
-extern int elv_may_queue(request_queue_t *, int, struct bio *);
+extern int elv_may_queue(request_queue_t *, int);
 extern void elv_completed_request(request_queue_t *, struct request *);
-extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t);
+extern int elv_set_request(request_queue_t *, struct request *, gfp_t);
 extern void elv_put_request(request_queue_t *, struct request *);
 
 /*
@@ -124,6 +130,19 @@ extern int elevator_init(request_queue_t *, char *);
 extern void elevator_exit(elevator_t *);
 extern int elv_rq_merge_ok(struct request *, struct bio *);
 
+/*
+ * Helper functions.
+ */
+extern struct request *elv_rb_former_request(request_queue_t *, struct request *);
+extern struct request *elv_rb_latter_request(request_queue_t *, struct request *);
+
+/*
+ * rb support functions.
+ */
+extern struct request *elv_rb_add(struct rb_root *, struct request *);
+extern void elv_rb_del(struct rb_root *, struct request *);
+extern struct request *elv_rb_find(struct rb_root *, sector_t);
+
 /*
  * Return values from elevator merger
  */
@@ -149,5 +168,42 @@ enum {
 };
 
 #define rq_end_sector(rq)      ((rq)->sector + (rq)->nr_sectors)
+#define rb_entry_rq(node)      rb_entry((node), struct request, rb_node)
+
+/*
+ * Hack to reuse the donelist list_head as the fifo time holder while
+ * the request is in the io scheduler. Saves an unsigned long in rq.
+ */
+#define rq_fifo_time(rq)       ((unsigned long) (rq)->donelist.next)
+#define rq_set_fifo_time(rq,exp)       ((rq)->donelist.next = (void *) (exp))
+#define rq_entry_fifo(ptr)     list_entry((ptr), struct request, queuelist)
+#define rq_fifo_clear(rq)      do {            \
+       list_del_init(&(rq)->queuelist);        \
+       INIT_LIST_HEAD(&(rq)->donelist);        \
+       } while (0)
 
+/*
+ * io context count accounting
+ */
+#define elv_ioc_count_mod(name, __val)                         \
+       do {                                                    \
+               preempt_disable();                              \
+               __get_cpu_var(name) += (__val);                 \
+               preempt_enable();                               \
+       } while (0)
+
+#define elv_ioc_count_inc(name)        elv_ioc_count_mod(name, 1)
+#define elv_ioc_count_dec(name)        elv_ioc_count_mod(name, -1)
+
+#define elv_ioc_count_read(name)                               \
+({                                                             \
+       unsigned long __val = 0;                                \
+       int __cpu;                                              \
+       smp_wmb();                                              \
+       for_each_possible_cpu(__cpu)                            \
+               __val += per_cpu(name, __cpu);                  \
+       __val;                                                  \
+})
+
+#endif /* CONFIG_BLOCK */
 #endif
index 33a1aa10732916e222347728888b979b69c98b07..153d755376a42344d046482e7c7606dcea4bb373 100644 (file)
@@ -165,41 +165,49 @@ struct ext2_group_desc
 #define        EXT2_N_BLOCKS                   (EXT2_TIND_BLOCK + 1)
 
 /*
- * Inode flags
+ * Inode flags (GETFLAGS/SETFLAGS)
  */
-#define        EXT2_SECRM_FL                   0x00000001 /* Secure deletion */
-#define        EXT2_UNRM_FL                    0x00000002 /* Undelete */
-#define        EXT2_COMPR_FL                   0x00000004 /* Compress file */
-#define EXT2_SYNC_FL                   0x00000008 /* Synchronous updates */
-#define EXT2_IMMUTABLE_FL              0x00000010 /* Immutable file */
-#define EXT2_APPEND_FL                 0x00000020 /* writes to file may only append */
-#define EXT2_NODUMP_FL                 0x00000040 /* do not dump file */
-#define EXT2_NOATIME_FL                        0x00000080 /* do not update atime */
+#define        EXT2_SECRM_FL                   FS_SECRM_FL     /* Secure deletion */
+#define        EXT2_UNRM_FL                    FS_UNRM_FL      /* Undelete */
+#define        EXT2_COMPR_FL                   FS_COMPR_FL     /* Compress file */
+#define EXT2_SYNC_FL                   FS_SYNC_FL      /* Synchronous updates */
+#define EXT2_IMMUTABLE_FL              FS_IMMUTABLE_FL /* Immutable file */
+#define EXT2_APPEND_FL                 FS_APPEND_FL    /* writes to file may only append */
+#define EXT2_NODUMP_FL                 FS_NODUMP_FL    /* do not dump file */
+#define EXT2_NOATIME_FL                        FS_NOATIME_FL   /* do not update atime */
 /* Reserved for compression usage... */
-#define EXT2_DIRTY_FL                  0x00000100
-#define EXT2_COMPRBLK_FL               0x00000200 /* One or more compressed clusters */
-#define EXT2_NOCOMP_FL                 0x00000400 /* Don't compress */
-#define EXT2_ECOMPR_FL                 0x00000800 /* Compression error */
+#define EXT2_DIRTY_FL                  FS_DIRTY_FL
+#define EXT2_COMPRBLK_FL               FS_COMPRBLK_FL  /* One or more compressed clusters */
+#define EXT2_NOCOMP_FL                 FS_NOCOMP_FL    /* Don't compress */
+#define EXT2_ECOMPR_FL                 FS_ECOMPR_FL    /* Compression error */
 /* End compression flags --- maybe not all used */     
-#define EXT2_BTREE_FL                  0x00001000 /* btree format dir */
-#define EXT2_INDEX_FL                  0x00001000 /* hash-indexed directory */
-#define EXT2_IMAGIC_FL                 0x00002000 /* AFS directory */
-#define EXT2_JOURNAL_DATA_FL           0x00004000 /* Reserved for ext3 */
-#define EXT2_NOTAIL_FL                 0x00008000 /* file tail should not be merged */
-#define EXT2_DIRSYNC_FL                        0x00010000 /* dirsync behaviour (directories only) */
-#define EXT2_TOPDIR_FL                 0x00020000 /* Top of directory hierarchies*/
-#define EXT2_RESERVED_FL               0x80000000 /* reserved for ext2 lib */
-
-#define EXT2_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
-#define EXT2_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
+#define EXT2_BTREE_FL                  FS_BTREE_FL     /* btree format dir */
+#define EXT2_INDEX_FL                  FS_INDEX_FL     /* hash-indexed directory */
+#define EXT2_IMAGIC_FL                 FS_IMAGIC_FL    /* AFS directory */
+#define EXT2_JOURNAL_DATA_FL           FS_JOURNAL_DATA_FL /* Reserved for ext3 */
+#define EXT2_NOTAIL_FL                 FS_NOTAIL_FL    /* file tail should not be merged */
+#define EXT2_DIRSYNC_FL                        FS_DIRSYNC_FL   /* dirsync behaviour (directories only) */
+#define EXT2_TOPDIR_FL                 FS_TOPDIR_FL    /* Top of directory hierarchies*/
+#define EXT2_RESERVED_FL               FS_RESERVED_FL  /* reserved for ext2 lib */
+
+#define EXT2_FL_USER_VISIBLE           FS_FL_USER_VISIBLE      /* User visible flags */
+#define EXT2_FL_USER_MODIFIABLE                FS_FL_USER_MODIFIABLE   /* User modifiable flags */
 
 /*
  * ioctl commands
  */
-#define        EXT2_IOC_GETFLAGS               _IOR('f', 1, long)
-#define        EXT2_IOC_SETFLAGS               _IOW('f', 2, long)
-#define        EXT2_IOC_GETVERSION             _IOR('v', 1, long)
-#define        EXT2_IOC_SETVERSION             _IOW('v', 2, long)
+#define        EXT2_IOC_GETFLAGS               FS_IOC_GETFLAGS
+#define        EXT2_IOC_SETFLAGS               FS_IOC_SETFLAGS
+#define        EXT2_IOC_GETVERSION             FS_IOC_GETVERSION
+#define        EXT2_IOC_SETVERSION             FS_IOC_SETVERSION
+
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define EXT2_IOC32_GETFLAGS            FS_IOC32_GETFLAGS
+#define EXT2_IOC32_SETFLAGS            FS_IOC32_SETFLAGS
+#define EXT2_IOC32_GETVERSION          FS_IOC32_GETVERSION
+#define EXT2_IOC32_SETVERSION          FS_IOC32_SETVERSION
 
 /*
  * Structure of an inode on the disk
index cc08f56750da7cf7ed68c1349db063fd01a6be68..11cca1bdc0c79840ab69c12b2a538d2a9770fb65 100644 (file)
@@ -216,20 +216,37 @@ struct ext3_new_group_data {
 /*
  * ioctl commands
  */
-#define        EXT3_IOC_GETFLAGS               _IOR('f', 1, long)
-#define        EXT3_IOC_SETFLAGS               _IOW('f', 2, long)
+#define        EXT3_IOC_GETFLAGS               FS_IOC_GETFLAGS
+#define        EXT3_IOC_SETFLAGS               FS_IOC_SETFLAGS
 #define        EXT3_IOC_GETVERSION             _IOR('f', 3, long)
 #define        EXT3_IOC_SETVERSION             _IOW('f', 4, long)
 #define EXT3_IOC_GROUP_EXTEND          _IOW('f', 7, unsigned long)
 #define EXT3_IOC_GROUP_ADD             _IOW('f', 8,struct ext3_new_group_input)
-#define        EXT3_IOC_GETVERSION_OLD         _IOR('v', 1, long)
-#define        EXT3_IOC_SETVERSION_OLD         _IOW('v', 2, long)
+#define        EXT3_IOC_GETVERSION_OLD         FS_IOC_GETVERSION
+#define        EXT3_IOC_SETVERSION_OLD         FS_IOC_SETVERSION
 #ifdef CONFIG_JBD_DEBUG
 #define EXT3_IOC_WAIT_FOR_READONLY     _IOR('f', 99, long)
 #endif
 #define EXT3_IOC_GETRSVSZ              _IOR('f', 5, long)
 #define EXT3_IOC_SETRSVSZ              _IOW('f', 6, long)
 
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define EXT3_IOC32_GETFLAGS            FS_IOC32_GETFLAGS
+#define EXT3_IOC32_SETFLAGS            FS_IOC32_SETFLAGS
+#define EXT3_IOC32_GETVERSION          _IOR('f', 3, int)
+#define EXT3_IOC32_SETVERSION          _IOW('f', 4, int)
+#define EXT3_IOC32_GETRSVSZ            _IOR('f', 5, int)
+#define EXT3_IOC32_SETRSVSZ            _IOW('f', 6, int)
+#define EXT3_IOC32_GROUP_EXTEND                _IOW('f', 7, unsigned int)
+#ifdef CONFIG_JBD_DEBUG
+#define EXT3_IOC32_WAIT_FOR_READONLY   _IOR('f', 99, int)
+#endif
+#define EXT3_IOC32_GETVERSION_OLD      FS_IOC32_GETVERSION
+#define EXT3_IOC32_SETVERSION_OLD      FS_IOC32_SETVERSION
+
+
 /*
  *  Mount options
  */
@@ -812,6 +829,7 @@ extern void ext3_set_aops(struct inode *inode);
 /* ioctl.c */
 extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
                       unsigned long);
+extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long);
 
 /* namei.c */
 extern int ext3_orphan_add(handle_t *, struct inode *);
index 6eafbe309483a56c91ec3ae4176dc9525321d457..5baf3a153403ce9ffe8975acc108203d9b5d0dc5 100644 (file)
@@ -79,8 +79,8 @@ extern int dir_notify_enable;
 #define WRITE 1
 #define READA 2                /* read-ahead  - don't block if no resources */
 #define SWRITE 3       /* for ll_rw_block() - wait for buffer lock */
-#define SPECIAL 4      /* For non-blockdevice requests in request queue */
 #define READ_SYNC      (READ | (1 << BIO_RW_SYNC))
+#define READ_META      (READ | (1 << BIO_RW_META))
 #define WRITE_SYNC     (WRITE | (1 << BIO_RW_SYNC))
 #define WRITE_BARRIER  ((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
 
@@ -217,6 +217,45 @@ extern int dir_notify_enable;
 #define FIBMAP    _IO(0x00,1)  /* bmap access */
 #define FIGETBSZ   _IO(0x00,2) /* get the block size used for bmap */
 
+#define        FS_IOC_GETFLAGS                 _IOR('f', 1, long)
+#define        FS_IOC_SETFLAGS                 _IOW('f', 2, long)
+#define        FS_IOC_GETVERSION               _IOR('v', 1, long)
+#define        FS_IOC_SETVERSION               _IOW('v', 2, long)
+#define FS_IOC32_GETFLAGS              _IOR('f', 1, int)
+#define FS_IOC32_SETFLAGS              _IOW('f', 2, int)
+#define FS_IOC32_GETVERSION            _IOR('v', 1, int)
+#define FS_IOC32_SETVERSION            _IOW('v', 2, int)
+
+/*
+ * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ */
+#define        FS_SECRM_FL                     0x00000001 /* Secure deletion */
+#define        FS_UNRM_FL                      0x00000002 /* Undelete */
+#define        FS_COMPR_FL                     0x00000004 /* Compress file */
+#define FS_SYNC_FL                     0x00000008 /* Synchronous updates */
+#define FS_IMMUTABLE_FL                        0x00000010 /* Immutable file */
+#define FS_APPEND_FL                   0x00000020 /* writes to file may only append */
+#define FS_NODUMP_FL                   0x00000040 /* do not dump file */
+#define FS_NOATIME_FL                  0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define FS_DIRTY_FL                    0x00000100
+#define FS_COMPRBLK_FL                 0x00000200 /* One or more compressed clusters */
+#define FS_NOCOMP_FL                   0x00000400 /* Don't compress */
+#define FS_ECOMPR_FL                   0x00000800 /* Compression error */
+/* End compression flags --- maybe not all used */
+#define FS_BTREE_FL                    0x00001000 /* btree format dir */
+#define FS_INDEX_FL                    0x00001000 /* hash-indexed directory */
+#define FS_IMAGIC_FL                   0x00002000 /* AFS directory */
+#define FS_JOURNAL_DATA_FL             0x00004000 /* Reserved for ext3 */
+#define FS_NOTAIL_FL                   0x00008000 /* file tail should not be merged */
+#define FS_DIRSYNC_FL                  0x00010000 /* dirsync behaviour (directories only) */
+#define FS_TOPDIR_FL                   0x00020000 /* Top of directory hierarchies*/
+#define FS_RESERVED_FL                 0x80000000 /* reserved for ext2 lib */
+
+#define FS_FL_USER_VISIBLE             0x0003DFFF /* User visible flags */
+#define FS_FL_USER_MODIFIABLE          0x000380FF /* User modifiable flags */
+
+
 #define SYNC_FILE_RANGE_WAIT_BEFORE    1
 #define SYNC_FILE_RANGE_WRITE          2
 #define SYNC_FILE_RANGE_WAIT_AFTER     4
@@ -1443,6 +1482,7 @@ extern void __init vfs_caches_init(unsigned long);
 extern void putname(const char *name);
 #endif
 
+#ifdef CONFIG_BLOCK
 extern int register_blkdev(unsigned int, const char *);
 extern int unregister_blkdev(unsigned int, const char *);
 extern struct block_device *bdget(dev_t);
@@ -1451,11 +1491,15 @@ extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern struct block_device *open_by_devnum(dev_t, unsigned);
 extern struct block_device *open_partition_by_devnum(dev_t, unsigned);
-extern const struct file_operations def_blk_fops;
 extern const struct address_space_operations def_blk_aops;
+#else
+static inline void bd_forget(struct inode *inode) {}
+#endif
+extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
 extern const struct file_operations bad_sock_fops;
 extern const struct file_operations def_fifo_fops;
+#ifdef CONFIG_BLOCK
 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
 extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
@@ -1471,6 +1515,7 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *);
 #define bd_claim_by_disk(bdev, holder, disk)   bd_claim(bdev, holder)
 #define bd_release_from_disk(bdev, disk)       bd_release(bdev)
 #endif
+#endif
 
 /* fs/char_dev.c */
 #define CHRDEV_MAJOR_HASH_SIZE 255
@@ -1484,14 +1529,19 @@ extern int chrdev_open(struct inode *, struct file *);
 extern void chrdev_show(struct seq_file *,off_t);
 
 /* fs/block_dev.c */
-#define BLKDEV_MAJOR_HASH_SIZE 255
 #define BDEVNAME_SIZE  32      /* Largest string for a blockdev identifier */
+
+#ifdef CONFIG_BLOCK
+#define BLKDEV_MAJOR_HASH_SIZE 255
 extern const char *__bdevname(dev_t, char *buffer);
 extern const char *bdevname(struct block_device *bdev, char *buffer);
 extern struct block_device *lookup_bdev(const char *);
 extern struct block_device *open_bdev_excl(const char *, int, void *);
 extern void close_bdev_excl(struct block_device *);
 extern void blkdev_show(struct seq_file *,off_t);
+#else
+#define BLKDEV_MAJOR_HASH_SIZE 0
+#endif
 
 extern void init_special_inode(struct inode *, umode_t, dev_t);
 
@@ -1505,6 +1555,7 @@ extern const struct file_operations rdwr_fifo_fops;
 
 extern int fs_may_remount_ro(struct super_block *);
 
+#ifdef CONFIG_BLOCK
 /*
  * return READ, READA, or WRITE
  */
@@ -1516,9 +1567,10 @@ extern int fs_may_remount_ro(struct super_block *);
 #define bio_data_dir(bio)      ((bio)->bi_rw & 1)
 
 extern int check_disk_change(struct block_device *);
-extern int invalidate_inodes(struct super_block *);
 extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);
+#endif
+extern int invalidate_inodes(struct super_block *);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                        pgoff_t start, pgoff_t end);
 unsigned long invalidate_inode_pages(struct address_space *mapping);
@@ -1546,11 +1598,14 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
 extern long do_fsync(struct file *file, int datasync);
 extern void sync_supers(void);
 extern void sync_filesystems(int wait);
+extern void __fsync_super(struct super_block *sb);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
 extern int do_remount_sb(struct super_block *sb, int flags,
                         void *data, int force);
+#ifdef CONFIG_BLOCK
 extern sector_t bmap(struct inode *, sector_t);
+#endif
 extern int notify_change(struct dentry *, struct iattr *);
 extern int permission(struct inode *, int, struct nameidata *);
 extern int generic_permission(struct inode *, int,
@@ -1633,9 +1688,11 @@ static inline void insert_inode_hash(struct inode *inode) {
 extern struct file * get_empty_filp(void);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
+#ifdef CONFIG_BLOCK
 struct bio;
 extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
+#endif
 extern int set_blocksize(struct block_device *, int);
 extern int sb_set_blocksize(struct super_block *, int);
 extern int sb_min_blocksize(struct super_block *, int);
@@ -1716,6 +1773,7 @@ static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
                                actor);
 }
 
+#ifdef CONFIG_BLOCK
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        struct block_device *bdev, const struct iovec *iov, loff_t offset,
        unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
@@ -1753,6 +1811,7 @@ static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
        return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
                                nr_segs, get_block, end_io, DIO_OWN_LOCKING);
 }
+#endif
 
 extern const struct file_operations generic_ro_fops;
 
index e4af57e87c178ab0bff22b97b2e710e72a870428..41f276fdd185f6b16b4edb280041a23fa642440c 100644 (file)
@@ -11,6 +11,8 @@
 
 #include <linux/types.h>
 
+#ifdef CONFIG_BLOCK
+
 enum {
 /* These three have identical behaviour; use the second one if DOS FDISK gets
    confused about extended/logical partitions starting past cylinder 1023. */
@@ -420,3 +422,5 @@ static inline struct block_device *bdget_disk(struct gendisk *disk, int index)
 #endif
 
 #endif
+
+#endif
index 7b703b6d43582dd918a1c8c397a2a3932accf72c..4edf1934e5cae2911ccf0b0917d7072fd6f672c4 100644 (file)
@@ -743,7 +743,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long
                int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
 void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
 
-int __set_page_dirty_buffers(struct page *page);
+extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
+extern void do_invalidatepage(struct page *page, unsigned long offset);
+
 int __set_page_dirty_nobuffers(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
                                struct page *page);
index 3ca880463c47426029bad70eb02f30ad3ab41015..cc5fb75af78a25e22c55ba868e4612e09331fbe1 100644 (file)
@@ -9,6 +9,7 @@
  * (And no, it doesn't do the #ifdef __MPAGE_H thing, and it doesn't do
  * nested includes.  Get it right in the .c file).
  */
+#ifdef CONFIG_BLOCK
 
 struct writeback_control;
 typedef int (writepage_t)(struct page *page, struct writeback_control *wbc);
@@ -21,8 +22,4 @@ int mpage_writepages(struct address_space *mapping,
 int mpage_writepage(struct page *page, get_block_t *get_block,
                struct writeback_control *wbc);
 
-static inline int
-generic_writepages(struct address_space *mapping, struct writeback_control *wbc)
-{
-       return mpage_writepages(mapping, wbc, NULL);
-}
+#endif
index eb3e547c8fee37798521949cc4f341df352a6d6d..c588709acbbc90b868681dda826605fa14afb0a5 100644 (file)
@@ -53,6 +53,8 @@
 #include <linux/raid/md_u.h>
 #include <linux/raid/md_k.h>
 
+#ifdef CONFIG_MD
+
 /*
  * Different major versions are not compatible.
  * Different minor versions are only downward compatible.
@@ -95,5 +97,6 @@ extern void md_new_event(mddev_t *mddev);
 
 extern void md_update_sb(mddev_t * mddev);
 
+#endif /* CONFIG_MD */
 #endif 
 
index d28890295852ce4f9be9f6bf0c6529f7b81041e6..920b94fe31fa2ed5184f8cdb4d41683efeeb15f4 100644 (file)
@@ -18,6 +18,8 @@
 /* and dm-bio-list.h is not under include/linux because.... ??? */
 #include "../../../drivers/md/dm-bio-list.h"
 
+#ifdef CONFIG_BLOCK
+
 #define        LEVEL_MULTIPATH         (-4)
 #define        LEVEL_LINEAR            (-1)
 #define        LEVEL_FAULTY            (-5)
@@ -362,5 +364,6 @@ static inline void safe_put_page(struct page *p)
        if (p) put_page(p);
 }
 
+#endif /* CONFIG_BLOCK */
 #endif
 
index 00b340ba66127131b5f4a3790d649b1bc33c5256..b160fb18e8d6ce6f724827fe19fd31dfe748ddb8 100644 (file)
@@ -17,5 +17,6 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
 
 extern const struct file_operations ramfs_file_operations;
 extern struct vm_operations_struct generic_file_vm_ops;
+extern int __init init_rootfs(void);
 
 #endif
index 8d5382e62c08e8503f5e43e2e62b7215df2dd26b..344bc3495ddbd12548c32ebaf57cd3c6ee7e8be9 100644 (file)
@@ -133,7 +133,7 @@ static inline void rb_set_color(struct rb_node *rb, int color)
 #define        rb_entry(ptr, type, member) container_of(ptr, type, member)
 
 #define RB_EMPTY_ROOT(root)    ((root)->rb_node == NULL)
-#define RB_EMPTY_NODE(node)    (rb_parent(node) != node)
+#define RB_EMPTY_NODE(node)    (rb_parent(node) == node)
 #define RB_CLEAR_NODE(node)    (rb_set_parent(node, node))
 
 extern void rb_insert_color(struct rb_node *, struct rb_root *);
index 28493ffaafe7b26e1dfec49fab09333658702d3e..9c63abffd7b298fec7ca9431a73ac328415da264 100644 (file)
@@ -807,21 +807,19 @@ struct stat_data_v1 {
 #define set_sd_v1_first_direct_byte(sdp,v) \
                                 ((sdp)->sd_first_direct_byte = cpu_to_le32(v))
 
-#include <linux/ext2_fs.h>
-
 /* inode flags stored in sd_attrs (nee sd_reserved) */
 
 /* we want common flags to have the same values as in ext2,
    so chattr(1) will work without problems */
-#define REISERFS_IMMUTABLE_FL EXT2_IMMUTABLE_FL
-#define REISERFS_APPEND_FL    EXT2_APPEND_FL
-#define REISERFS_SYNC_FL      EXT2_SYNC_FL
-#define REISERFS_NOATIME_FL   EXT2_NOATIME_FL
-#define REISERFS_NODUMP_FL    EXT2_NODUMP_FL
-#define REISERFS_SECRM_FL     EXT2_SECRM_FL
-#define REISERFS_UNRM_FL      EXT2_UNRM_FL
-#define REISERFS_COMPR_FL     EXT2_COMPR_FL
-#define REISERFS_NOTAIL_FL    EXT2_NOTAIL_FL
+#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
+#define REISERFS_APPEND_FL    FS_APPEND_FL
+#define REISERFS_SYNC_FL      FS_SYNC_FL
+#define REISERFS_NOATIME_FL   FS_NOATIME_FL
+#define REISERFS_NODUMP_FL    FS_NODUMP_FL
+#define REISERFS_SECRM_FL     FS_SECRM_FL
+#define REISERFS_UNRM_FL      FS_UNRM_FL
+#define REISERFS_COMPR_FL     FS_COMPR_FL
+#define REISERFS_NOTAIL_FL    FS_NOTAIL_FL
 
 /* persistent flags that file inherits from the parent directory */
 #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL |        \
@@ -2163,15 +2161,24 @@ __u32 r5_hash(const signed char *msg, int len);
 /* prototypes from ioctl.c */
 int reiserfs_ioctl(struct inode *inode, struct file *filp,
                   unsigned int cmd, unsigned long arg);
+long reiserfs_compat_ioctl(struct file *filp,
+                  unsigned int cmd, unsigned long arg);
 
 /* ioctl's command */
 #define REISERFS_IOC_UNPACK            _IOW(0xCD,1,long)
 /* define following flags to be the same as in ext2, so that chattr(1),
    lsattr(1) will work with us. */
-#define REISERFS_IOC_GETFLAGS          EXT2_IOC_GETFLAGS
-#define REISERFS_IOC_SETFLAGS          EXT2_IOC_SETFLAGS
-#define REISERFS_IOC_GETVERSION                EXT2_IOC_GETVERSION
-#define REISERFS_IOC_SETVERSION                EXT2_IOC_SETVERSION
+#define REISERFS_IOC_GETFLAGS          FS_IOC_GETFLAGS
+#define REISERFS_IOC_SETFLAGS          FS_IOC_SETFLAGS
+#define REISERFS_IOC_GETVERSION                FS_IOC_GETVERSION
+#define REISERFS_IOC_SETVERSION                FS_IOC_SETVERSION
+
+/* the 32 bit compat definitions with int argument */
+#define REISERFS_IOC32_UNPACK          _IOW(0xCD, 1, int)
+#define REISERFS_IOC32_GETFLAGS                FS_IOC32_GETFLAGS
+#define REISERFS_IOC32_SETFLAGS                FS_IOC32_SETFLAGS
+#define REISERFS_IOC32_GETVERSION      FS_IOC32_GETVERSION
+#define REISERFS_IOC32_SETVERSION      FS_IOC32_SETVERSION
 
 /* Locking primitives */
 /* Right now we are still falling back to (un)lock_kernel, but eventually that
index a06fc89cf6e5b8778c93ca53ebbb733948448948..fc4a9873ec10fb5cdb603671fc0d266f9ecd1646 100644 (file)
@@ -710,7 +710,6 @@ extern unsigned int max_cache_size;
 
 
 struct io_context;                     /* See blkdev.h */
-void exit_io_context(void);
 struct cpuset;
 
 #define NGROUPS_SMALL          32
index ea4c2605f8da8836128fd2e2f4fe7e712d536ee5..44091c0db0b46b473234172a91d983319648e7ce 100644 (file)
@@ -307,6 +307,9 @@ extern void tty_ldisc_put(int);
 extern void tty_wakeup(struct tty_struct *tty);
 extern void tty_ldisc_flush(struct tty_struct *tty);
 
+extern int tty_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+                    unsigned long arg);
+
 extern struct mutex tty_mutex;
 
 /* n_tty.c */
index 9d4074ecd0cdc95248fb42dd4dde5b421f152518..4f4d98addb448afb61cd3d49b4a2a8cc3a00cbea 100644 (file)
@@ -111,6 +111,8 @@ balance_dirty_pages_ratelimited(struct address_space *mapping)
 }
 
 int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
+extern int generic_writepages(struct address_space *mapping,
+                             struct writeback_control *wbc);
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
 int sync_page_range(struct inode *inode, struct address_space *mapping,
                        loff_t pos, loff_t count);
index d04d05adfa9b68ca1ccd08e231a81996a316c714..c247a28259bc77fa453994b3492a457f8b99bb7c 100644 (file)
@@ -6,7 +6,6 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 
-
 #define MSG_SIMPLE_TAG 0x20
 #define MSG_HEAD_TAG   0x21
 #define MSG_ORDERED_TAG        0x22
@@ -14,6 +13,7 @@
 #define SCSI_NO_TAG    (-1)    /* identify no tag in use */
 
 
+#ifdef CONFIG_BLOCK
 
 /**
  * scsi_get_tag_type - get the type of tag the device supports
@@ -100,7 +100,7 @@ static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg)
        struct scsi_device *sdev = cmd->device;
 
         if (blk_rq_tagged(req)) {
-               if (sdev->ordered_tags && req->flags & REQ_HARDBARRIER)
+               if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER)
                        *msg++ = MSG_ORDERED_TAG;
                else
                        *msg++ = MSG_SIMPLE_TAG;
@@ -144,4 +144,5 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
        return shost->bqt ? 0 : -ENOMEM;
 }
 
+#endif /* CONFIG_BLOCK */
 #endif /* _SCSI_SCSI_TCQ_H */
index 4381006dd666aaba6f9d1e29913d65fe5c3e5a8c..d2eb7a84a264f0330d0aab1792f3ac34bbba23b2 100644 (file)
@@ -92,7 +92,7 @@ config LOCALVERSION_AUTO
 
 config SWAP
        bool "Support for paging of anonymous memory (swap)"
-       depends on MMU
+       depends on MMU && BLOCK
        default y
        help
          This option allows you to choose whether you want to have support
index b290aadb1d3f2a0b1448a06df90bb05f75aed42f..dc1ec0803ef9b93b76c27b9db3a9d3e741f98280 100644 (file)
@@ -285,7 +285,11 @@ void __init mount_block_root(char *name, int flags)
 {
        char *fs_names = __getname();
        char *p;
+#ifdef CONFIG_BLOCK
        char b[BDEVNAME_SIZE];
+#else
+       const char *b = name;
+#endif
 
        get_fs_names(fs_names);
 retry:
@@ -304,7 +308,9 @@ retry:
                 * Allow the user to distinguish between failed sys_open
                 * and bad superblock on root device.
                 */
+#ifdef CONFIG_BLOCK
                __bdevname(ROOT_DEV, b);
+#endif
                printk("VFS: Cannot open root device \"%s\" or %s\n",
                                root_device_name, b);
                printk("Please append a correct \"root=\" boot option\n");
@@ -316,7 +322,10 @@ retry:
        for (p = fs_names; *p; p += strlen(p)+1)
                printk(" %s", p);
        printk("\n");
-       panic("VFS: Unable to mount root fs on %s", __bdevname(ROOT_DEV, b));
+#ifdef CONFIG_BLOCK
+       __bdevname(ROOT_DEV, b);
+#endif
+       panic("VFS: Unable to mount root fs on %s", b);
 out:
        putname(fs_names);
 }
@@ -387,8 +396,10 @@ void __init mount_root(void)
                        change_floppy("root floppy");
        }
 #endif
+#ifdef CONFIG_BLOCK
        create_dev("/dev/root", ROOT_DEV);
        mount_block_root("/dev/root", root_mountflags);
+#endif
 }
 
 /*
index 75573e5d27b05c92dd1a11aaf483415935032624..b4fbd838cd775ac9007af6e7d566652e1ad629de 100644 (file)
@@ -26,6 +26,8 @@
 
 #include <asm/uaccess.h>
 
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
 int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
 {
        return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
index 2e4c13cba95ae7af79bc373a859ecac42a340225..c189de2927ab6b55ab47baf199d347d65481e906 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/audit.h> /* for audit_free() */
 #include <linux/resource.h>
+#include <linux/blkdev.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
index 6991bece67e8e7609e5d18920183246e0e007143..7a3b2e75f0402122ced8b15d5488a6b9de49ee8e 100644 (file)
@@ -134,3 +134,8 @@ cond_syscall(sys_madvise);
 cond_syscall(sys_mremap);
 cond_syscall(sys_remap_file_pages);
 cond_syscall(compat_sys_move_pages);
+
+/* block-layer dependent */
+cond_syscall(sys_bdflush);
+cond_syscall(sys_ioprio_set);
+cond_syscall(sys_ioprio_get);
index 1e55ba1c2edfac510c41c87e47e7849f99a5f3b3..48499c2d88ccf16d65c8891e3fc5a23e8cdf283a 100644 (file)
@@ -322,6 +322,9 @@ struct rb_node *rb_next(struct rb_node *node)
 {
        struct rb_node *parent;
 
+       if (rb_parent(node) == node)
+               return NULL;
+
        /* If we have a right-hand child, go down and then left as far
           as we can. */
        if (node->rb_right) {
@@ -348,6 +351,9 @@ struct rb_node *rb_prev(struct rb_node *node)
 {
        struct rb_node *parent;
 
+       if (rb_parent(node) == node)
+               return NULL;
+
        /* If we have a left-hand child, go down and then right as far
           as we can. */
        if (node->rb_left) {
index 6200c6d6afd20812acd95af4409b2467dd3a1f6f..12b3a4eee88d56a3b4d4fe6c4255fa4a1ca81352 100644 (file)
@@ -12,6 +12,9 @@ obj-y                 := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
                           readahead.o swap.o truncate.o vmscan.o \
                           prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
 
+ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
+obj-y                  += bounce.o
+endif
 obj-$(CONFIG_SWAP)     += page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HUGETLBFS)        += hugetlb.o
 obj-$(CONFIG_NUMA)     += mempolicy.o
diff --git a/mm/bounce.c b/mm/bounce.c
new file mode 100644 (file)
index 0000000..e4b62d2
--- /dev/null
@@ -0,0 +1,302 @@
+/* bounce buffer handling for block devices
+ *
+ * - Split from highmem.c
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/bio.h>
+#include <linux/pagemap.h>
+#include <linux/mempool.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/hash.h>
+#include <linux/highmem.h>
+#include <linux/blktrace_api.h>
+#include <asm/tlbflush.h>
+
+#define POOL_SIZE      64
+#define ISA_POOL_SIZE  16
+
+static mempool_t *page_pool, *isa_page_pool;
+
+#ifdef CONFIG_HIGHMEM
+static __init int init_emergency_pool(void)
+{
+       struct sysinfo i;
+       si_meminfo(&i);
+       si_swapinfo(&i);
+
+       if (!i.totalhigh)
+               return 0;
+
+       page_pool = mempool_create_page_pool(POOL_SIZE, 0);
+       BUG_ON(!page_pool);
+       printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
+
+       return 0;
+}
+
+__initcall(init_emergency_pool);
+
+/*
+ * highmem version, map in to vec
+ */
+static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
+{
+       unsigned long flags;
+       unsigned char *vto;
+
+       local_irq_save(flags);
+       vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
+       memcpy(vto + to->bv_offset, vfrom, to->bv_len);
+       kunmap_atomic(vto, KM_BOUNCE_READ);
+       local_irq_restore(flags);
+}
+
+#else /* CONFIG_HIGHMEM */
+
+#define bounce_copy_vec(to, vfrom)     \
+       memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
+
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * allocate pages in the DMA region for the ISA pool
+ */
+static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
+{
+       return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
+}
+
+/*
+ * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
+ * as the max address, so check if the pool has already been created.
+ */
+int init_emergency_isa_pool(void)
+{
+       if (isa_page_pool)
+               return 0;
+
+       isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
+                                      mempool_free_pages, (void *) 0);
+       BUG_ON(!isa_page_pool);
+
+       printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
+       return 0;
+}
+
+/*
+ * Simple bounce buffer support for highmem pages. Depending on the
+ * queue gfp mask set, *to may or may not be a highmem page. kmap it
+ * always, it will do the Right Thing
+ */
+static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
+{
+       unsigned char *vfrom;
+       struct bio_vec *tovec, *fromvec;
+       int i;
+
+       __bio_for_each_segment(tovec, to, i, 0) {
+               fromvec = from->bi_io_vec + i;
+
+               /*
+                * not bounced
+                */
+               if (tovec->bv_page == fromvec->bv_page)
+                       continue;
+
+               /*
+                * fromvec->bv_offset and fromvec->bv_len might have been
+                * modified by the block layer, so use the original copy,
+                * bounce_copy_vec already uses tovec->bv_len
+                */
+               vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
+
+               flush_dcache_page(tovec->bv_page);
+               bounce_copy_vec(tovec, vfrom);
+       }
+}
+
+static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
+{
+       struct bio *bio_orig = bio->bi_private;
+       struct bio_vec *bvec, *org_vec;
+       int i;
+
+       if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
+               set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
+
+       /*
+        * free up bounce indirect pages used
+        */
+       __bio_for_each_segment(bvec, bio, i, 0) {
+               org_vec = bio_orig->bi_io_vec + i;
+               if (bvec->bv_page == org_vec->bv_page)
+                       continue;
+
+               dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
+               mempool_free(bvec->bv_page, pool);
+       }
+
+       bio_endio(bio_orig, bio_orig->bi_size, err);
+       bio_put(bio);
+}
+
+static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
+{
+       if (bio->bi_size)
+               return 1;
+
+       bounce_end_io(bio, page_pool, err);
+       return 0;
+}
+
+static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
+{
+       if (bio->bi_size)
+               return 1;
+
+       bounce_end_io(bio, isa_page_pool, err);
+       return 0;
+}
+
+static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
+{
+       struct bio *bio_orig = bio->bi_private;
+
+       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+               copy_to_high_bio_irq(bio_orig, bio);
+
+       bounce_end_io(bio, pool, err);
+}
+
+static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
+{
+       if (bio->bi_size)
+               return 1;
+
+       __bounce_end_io_read(bio, page_pool, err);
+       return 0;
+}
+
+static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
+{
+       if (bio->bi_size)
+               return 1;
+
+       __bounce_end_io_read(bio, isa_page_pool, err);
+       return 0;
+}
+
+static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
+                              mempool_t *pool)
+{
+       struct page *page;
+       struct bio *bio = NULL;
+       int i, rw = bio_data_dir(*bio_orig);
+       struct bio_vec *to, *from;
+
+       bio_for_each_segment(from, *bio_orig, i) {
+               page = from->bv_page;
+
+               /*
+                * is destination page below bounce pfn?
+                */
+               if (page_to_pfn(page) < q->bounce_pfn)
+                       continue;
+
+               /*
+                * irk, bounce it
+                */
+               if (!bio)
+                       bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
+
+               to = bio->bi_io_vec + i;
+
+               to->bv_page = mempool_alloc(pool, q->bounce_gfp);
+               to->bv_len = from->bv_len;
+               to->bv_offset = from->bv_offset;
+               inc_zone_page_state(to->bv_page, NR_BOUNCE);
+
+               if (rw == WRITE) {
+                       char *vto, *vfrom;
+
+                       flush_dcache_page(from->bv_page);
+                       vto = page_address(to->bv_page) + to->bv_offset;
+                       vfrom = kmap(from->bv_page) + from->bv_offset;
+                       memcpy(vto, vfrom, to->bv_len);
+                       kunmap(from->bv_page);
+               }
+       }
+
+       /*
+        * no pages bounced
+        */
+       if (!bio)
+               return;
+
+       /*
+        * at least one page was bounced, fill in possible non-highmem
+        * pages
+        */
+       __bio_for_each_segment(from, *bio_orig, i, 0) {
+               to = bio_iovec_idx(bio, i);
+               if (!to->bv_page) {
+                       to->bv_page = from->bv_page;
+                       to->bv_len = from->bv_len;
+                       to->bv_offset = from->bv_offset;
+               }
+       }
+
+       bio->bi_bdev = (*bio_orig)->bi_bdev;
+       bio->bi_flags |= (1 << BIO_BOUNCED);
+       bio->bi_sector = (*bio_orig)->bi_sector;
+       bio->bi_rw = (*bio_orig)->bi_rw;
+
+       bio->bi_vcnt = (*bio_orig)->bi_vcnt;
+       bio->bi_idx = (*bio_orig)->bi_idx;
+       bio->bi_size = (*bio_orig)->bi_size;
+
+       if (pool == page_pool) {
+               bio->bi_end_io = bounce_end_io_write;
+               if (rw == READ)
+                       bio->bi_end_io = bounce_end_io_read;
+       } else {
+               bio->bi_end_io = bounce_end_io_write_isa;
+               if (rw == READ)
+                       bio->bi_end_io = bounce_end_io_read_isa;
+       }
+
+       bio->bi_private = *bio_orig;
+       *bio_orig = bio;
+}
+
+void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
+{
+       mempool_t *pool;
+
+       /*
+        * for non-isa bounce case, just check if the bounce pfn is equal
+        * to or bigger than the highest pfn in the system -- in that case,
+        * don't waste time iterating over bio segments
+        */
+       if (!(q->bounce_gfp & GFP_DMA)) {
+               if (q->bounce_pfn >= blk_max_pfn)
+                       return;
+               pool = page_pool;
+       } else {
+               BUG_ON(!isa_page_pool);
+               pool = isa_page_pool;
+       }
+
+       blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
+
+       /*
+        * slow path
+        */
+       __blk_queue_bounce(q, bio_orig, pool);
+}
+
+EXPORT_SYMBOL(blk_queue_bounce);
index 3277f3b2352441eb6d0a5e4d8caf8437bab16460..c4fe97f5ace0851d3023d1305d8de5c7ad2b1dd5 100644 (file)
@@ -2020,6 +2020,7 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
                if (unlikely(*pos + *count > inode->i_sb->s_maxbytes))
                        *count = inode->i_sb->s_maxbytes - *pos;
        } else {
+#ifdef CONFIG_BLOCK
                loff_t isize;
                if (bdev_read_only(I_BDEV(inode)))
                        return -EPERM;
@@ -2031,6 +2032,9 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
 
                if (*pos + *count > isize)
                        *count = isize - *pos;
+#else
+               return -EPERM;
+#endif
        }
        return 0;
 }
@@ -2491,3 +2495,33 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
        }
        return retval;
 }
+
+/**
+ * try_to_release_page() - release old fs-specific metadata on a page
+ *
+ * @page: the page which the kernel is trying to free
+ * @gfp_mask: memory allocation flags (and I/O mode)
+ *
+ * The address_space is to try to release any data against the page
+ * (presumably at page->private).  If the release was successful, return `1'.
+ * Otherwise return zero.
+ *
+ * The @gfp_mask argument specifies whether I/O may be performed to release
+ * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
+ *
+ * NOTE: @gfp_mask may go away, and this function may become non-blocking.
+ */
+int try_to_release_page(struct page *page, gfp_t gfp_mask)
+{
+       struct address_space * const mapping = page->mapping;
+
+       BUG_ON(!PageLocked(page));
+       if (PageWriteback(page))
+               return 0;
+
+       if (mapping && mapping->a_ops->releasepage)
+               return mapping->a_ops->releasepage(page, gfp_mask);
+       return try_to_free_buffers(page);
+}
+
+EXPORT_SYMBOL(try_to_release_page);
index ee5519b176ee9a2fc704e9af51f4f26836b53df1..0206e7e5018c8998c87349706fe1fcfb5644933c 100644 (file)
 #include <linux/blktrace_api.h>
 #include <asm/tlbflush.h>
 
-static mempool_t *page_pool, *isa_page_pool;
-
-static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
-{
-       return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
-}
-
 /*
  * Virtual_count is not a pure "count".
  *  0 means that it is not mapped, and has not been mapped
@@ -217,282 +210,8 @@ void fastcall kunmap_high(struct page *page)
 }
 
 EXPORT_SYMBOL(kunmap_high);
-
-#define POOL_SIZE      64
-
-static __init int init_emergency_pool(void)
-{
-       struct sysinfo i;
-       si_meminfo(&i);
-       si_swapinfo(&i);
-        
-       if (!i.totalhigh)
-               return 0;
-
-       page_pool = mempool_create_page_pool(POOL_SIZE, 0);
-       BUG_ON(!page_pool);
-       printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
-
-       return 0;
-}
-
-__initcall(init_emergency_pool);
-
-/*
- * highmem version, map in to vec
- */
-static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
-{
-       unsigned long flags;
-       unsigned char *vto;
-
-       local_irq_save(flags);
-       vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
-       memcpy(vto + to->bv_offset, vfrom, to->bv_len);
-       kunmap_atomic(vto, KM_BOUNCE_READ);
-       local_irq_restore(flags);
-}
-
-#else /* CONFIG_HIGHMEM */
-
-#define bounce_copy_vec(to, vfrom)     \
-       memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
-
 #endif
 
-#define ISA_POOL_SIZE  16
-
-/*
- * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
- * as the max address, so check if the pool has already been created.
- */
-int init_emergency_isa_pool(void)
-{
-       if (isa_page_pool)
-               return 0;
-
-       isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
-                                      mempool_free_pages, (void *) 0);
-       BUG_ON(!isa_page_pool);
-
-       printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
-       return 0;
-}
-
-/*
- * Simple bounce buffer support for highmem pages. Depending on the
- * queue gfp mask set, *to may or may not be a highmem page. kmap it
- * always, it will do the Right Thing
- */
-static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
-{
-       unsigned char *vfrom;
-       struct bio_vec *tovec, *fromvec;
-       int i;
-
-       __bio_for_each_segment(tovec, to, i, 0) {
-               fromvec = from->bi_io_vec + i;
-
-               /*
-                * not bounced
-                */
-               if (tovec->bv_page == fromvec->bv_page)
-                       continue;
-
-               /*
-                * fromvec->bv_offset and fromvec->bv_len might have been
-                * modified by the block layer, so use the original copy,
-                * bounce_copy_vec already uses tovec->bv_len
-                */
-               vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
-
-               flush_dcache_page(tovec->bv_page);
-               bounce_copy_vec(tovec, vfrom);
-       }
-}
-
-static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
-{
-       struct bio *bio_orig = bio->bi_private;
-       struct bio_vec *bvec, *org_vec;
-       int i;
-
-       if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
-               set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
-
-       /*
-        * free up bounce indirect pages used
-        */
-       __bio_for_each_segment(bvec, bio, i, 0) {
-               org_vec = bio_orig->bi_io_vec + i;
-               if (bvec->bv_page == org_vec->bv_page)
-                       continue;
-
-               dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
-               mempool_free(bvec->bv_page, pool);
-       }
-
-       bio_endio(bio_orig, bio_orig->bi_size, err);
-       bio_put(bio);
-}
-
-static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
-{
-       if (bio->bi_size)
-               return 1;
-
-       bounce_end_io(bio, page_pool, err);
-       return 0;
-}
-
-static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
-{
-       if (bio->bi_size)
-               return 1;
-
-       bounce_end_io(bio, isa_page_pool, err);
-       return 0;
-}
-
-static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
-{
-       struct bio *bio_orig = bio->bi_private;
-
-       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
-               copy_to_high_bio_irq(bio_orig, bio);
-
-       bounce_end_io(bio, pool, err);
-}
-
-static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
-{
-       if (bio->bi_size)
-               return 1;
-
-       __bounce_end_io_read(bio, page_pool, err);
-       return 0;
-}
-
-static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
-{
-       if (bio->bi_size)
-               return 1;
-
-       __bounce_end_io_read(bio, isa_page_pool, err);
-       return 0;
-}
-
-static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
-                              mempool_t *pool)
-{
-       struct page *page;
-       struct bio *bio = NULL;
-       int i, rw = bio_data_dir(*bio_orig);
-       struct bio_vec *to, *from;
-
-       bio_for_each_segment(from, *bio_orig, i) {
-               page = from->bv_page;
-
-               /*
-                * is destination page below bounce pfn?
-                */
-               if (page_to_pfn(page) < q->bounce_pfn)
-                       continue;
-
-               /*
-                * irk, bounce it
-                */
-               if (!bio)
-                       bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
-
-               to = bio->bi_io_vec + i;
-
-               to->bv_page = mempool_alloc(pool, q->bounce_gfp);
-               to->bv_len = from->bv_len;
-               to->bv_offset = from->bv_offset;
-               inc_zone_page_state(to->bv_page, NR_BOUNCE);
-
-               if (rw == WRITE) {
-                       char *vto, *vfrom;
-
-                       flush_dcache_page(from->bv_page);
-                       vto = page_address(to->bv_page) + to->bv_offset;
-                       vfrom = kmap(from->bv_page) + from->bv_offset;
-                       memcpy(vto, vfrom, to->bv_len);
-                       kunmap(from->bv_page);
-               }
-       }
-
-       /*
-        * no pages bounced
-        */
-       if (!bio)
-               return;
-
-       /*
-        * at least one page was bounced, fill in possible non-highmem
-        * pages
-        */
-       __bio_for_each_segment(from, *bio_orig, i, 0) {
-               to = bio_iovec_idx(bio, i);
-               if (!to->bv_page) {
-                       to->bv_page = from->bv_page;
-                       to->bv_len = from->bv_len;
-                       to->bv_offset = from->bv_offset;
-               }
-       }
-
-       bio->bi_bdev = (*bio_orig)->bi_bdev;
-       bio->bi_flags |= (1 << BIO_BOUNCED);
-       bio->bi_sector = (*bio_orig)->bi_sector;
-       bio->bi_rw = (*bio_orig)->bi_rw;
-
-       bio->bi_vcnt = (*bio_orig)->bi_vcnt;
-       bio->bi_idx = (*bio_orig)->bi_idx;
-       bio->bi_size = (*bio_orig)->bi_size;
-
-       if (pool == page_pool) {
-               bio->bi_end_io = bounce_end_io_write;
-               if (rw == READ)
-                       bio->bi_end_io = bounce_end_io_read;
-       } else {
-               bio->bi_end_io = bounce_end_io_write_isa;
-               if (rw == READ)
-                       bio->bi_end_io = bounce_end_io_read_isa;
-       }
-
-       bio->bi_private = *bio_orig;
-       *bio_orig = bio;
-}
-
-void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
-{
-       mempool_t *pool;
-
-       /*
-        * for non-isa bounce case, just check if the bounce pfn is equal
-        * to or bigger than the highest pfn in the system -- in that case,
-        * don't waste time iterating over bio segments
-        */
-       if (!(q->bounce_gfp & GFP_DMA)) {
-               if (q->bounce_pfn >= blk_max_pfn)
-                       return;
-               pool = page_pool;
-       } else {
-               BUG_ON(!isa_page_pool);
-               pool = isa_page_pool;
-       }
-
-       blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
-
-       /*
-        * slow path
-        */
-       __blk_queue_bounce(q, bio_orig, pool);
-}
-
-EXPORT_SYMBOL(blk_queue_bounce);
-
 #if defined(HASHED_PAGE_VIRTUAL)
 
 #define PA_HASH_ORDER  7
index 20a8c2687b1efea5b7bb1ab23d6b9195a2e97453..ba2453f9483dfe4b03b3df1c089607da36ebb60c 100644 (file)
@@ -409,6 +409,7 @@ int migrate_page(struct address_space *mapping,
 }
 EXPORT_SYMBOL(migrate_page);
 
+#ifdef CONFIG_BLOCK
 /*
  * Migration function for pages with buffers. This function can only be used
  * if the underlying filesystem guarantees that no other references to "page"
@@ -466,6 +467,7 @@ int buffer_migrate_page(struct address_space *mapping,
        return 0;
 }
 EXPORT_SYMBOL(buffer_migrate_page);
+#endif
 
 /*
  * Writeback a page to clean the dirty state
@@ -525,7 +527,7 @@ static int fallback_migrate_page(struct address_space *mapping,
         * Buffers may be managed in a filesystem specific way.
         * We must have no buffers or drop them.
         */
-       if (page_has_buffers(page) &&
+       if (PagePrivate(page) &&
            !try_to_release_page(page, GFP_KERNEL))
                return -EAGAIN;
 
index 488b7088557c549840b7cf6cf9daba1efc38bd44..c0d4ce144dec41130e552723c5b3ecd4dca1876a 100644 (file)
@@ -30,6 +30,8 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
+#include <linux/buffer_head.h>
+#include <linux/pagevec.h>
 
 /*
  * The maximum number of pages to writeout in a single bdflush/kupdate
@@ -550,6 +552,139 @@ void __init page_writeback_init(void)
        register_cpu_notifier(&ratelimit_nb);
 }
 
+/**
+ * generic_writepages - walk the list of dirty pages of the given
+ *                      address space and writepage() all of them.
+ *
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ *
+ * This is a library function, which implements the writepages()
+ * address_space_operation.
+ *
+ * If a page is already under I/O, generic_writepages() skips it, even
+ * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
+ * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
+ * and msync() need to guarantee that all the data which was dirty at the time
+ * the call was made get new I/O started against them.  If wbc->sync_mode is
+ * WB_SYNC_ALL then we were called for data integrity and we must wait for
+ * existing IO to complete.
+ *
+ * Derived from mpage_writepages() - if you fix this you should check that
+ * also!
+ */
+int generic_writepages(struct address_space *mapping,
+                      struct writeback_control *wbc)
+{
+       struct backing_dev_info *bdi = mapping->backing_dev_info;
+       int ret = 0;
+       int done = 0;
+       int (*writepage)(struct page *page, struct writeback_control *wbc);
+       struct pagevec pvec;
+       int nr_pages;
+       pgoff_t index;
+       pgoff_t end;            /* Inclusive */
+       int scanned = 0;
+       int range_whole = 0;
+
+       if (wbc->nonblocking && bdi_write_congested(bdi)) {
+               wbc->encountered_congestion = 1;
+               return 0;
+       }
+
+       writepage = mapping->a_ops->writepage;
+
+       /* deal with chardevs and other special file */
+       if (!writepage)
+               return 0;
+
+       pagevec_init(&pvec, 0);
+       if (wbc->range_cyclic) {
+               index = mapping->writeback_index; /* Start from prev offset */
+               end = -1;
+       } else {
+               index = wbc->range_start >> PAGE_CACHE_SHIFT;
+               end = wbc->range_end >> PAGE_CACHE_SHIFT;
+               if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+                       range_whole = 1;
+               scanned = 1;
+       }
+retry:
+       while (!done && (index <= end) &&
+              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                                             PAGECACHE_TAG_DIRTY,
+                                             min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+               unsigned i;
+
+               scanned = 1;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       /*
+                        * At this point we hold neither mapping->tree_lock nor
+                        * lock on the page itself: the page may be truncated or
+                        * invalidated (changing page->mapping to NULL), or even
+                        * swizzled back from swapper_space to tmpfs file
+                        * mapping
+                        */
+                       lock_page(page);
+
+                       if (unlikely(page->mapping != mapping)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (!wbc->range_cyclic && page->index > end) {
+                               done = 1;
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (wbc->sync_mode != WB_SYNC_NONE)
+                               wait_on_page_writeback(page);
+
+                       if (PageWriteback(page) ||
+                           !clear_page_dirty_for_io(page)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       ret = (*writepage)(page, wbc);
+                       if (ret) {
+                               if (ret == -ENOSPC)
+                                       set_bit(AS_ENOSPC, &mapping->flags);
+                               else
+                                       set_bit(AS_EIO, &mapping->flags);
+                       }
+
+                       if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
+                               unlock_page(page);
+                       if (ret || (--(wbc->nr_to_write) <= 0))
+                               done = 1;
+                       if (wbc->nonblocking && bdi_write_congested(bdi)) {
+                               wbc->encountered_congestion = 1;
+                               done = 1;
+                       }
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+       if (!scanned && !done) {
+               /*
+                * We hit the last page and there is more work to be done: wrap
+                * back to the start of the file
+                */
+               scanned = 1;
+               index = 0;
+               goto retry;
+       }
+       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+               mapping->writeback_index = index;
+       return ret;
+}
+
+EXPORT_SYMBOL(generic_writepages);
+
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
        int ret;
@@ -672,9 +807,11 @@ int fastcall set_page_dirty(struct page *page)
 
        if (likely(mapping)) {
                int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
-               if (spd)
-                       return (*spd)(page);
-               return __set_page_dirty_buffers(page);
+#ifdef CONFIG_BLOCK
+               if (!spd)
+                       spd = __set_page_dirty_buffers;
+#endif
+               return (*spd)(page);
        }
        if (!PageDirty(page)) {
                if (!TestSetPageDirty(page))
index a654928323dcc4459cf33b59f67be5868d5b105d..8fde6580657ed15c860d2f6e5bffa77c88965240 100644 (file)
                                   do_invalidatepage */
 
 
+/**
+ * do_invalidatepage - invalidate part of all of a page
+ * @page: the page which is affected
+ * @offset: the index of the truncation point
+ *
+ * do_invalidatepage() is called when all or part of the page has become
+ * invalidated by a truncate operation.
+ *
+ * do_invalidatepage() does not have to release all buffers, but it must
+ * ensure that no dirty buffer is left outside @offset and that no I/O
+ * is underway against any of the blocks which are outside the truncation
+ * point.  Because the caller is about to free (and possibly reuse) those
+ * blocks on-disk.
+ */
+void do_invalidatepage(struct page *page, unsigned long offset)
+{
+       void (*invalidatepage)(struct page *, unsigned long);
+       invalidatepage = page->mapping->a_ops->invalidatepage;
+#ifdef CONFIG_BLOCK
+       if (!invalidatepage)
+               invalidatepage = block_invalidatepage;
+#endif
+       if (invalidatepage)
+               (*invalidatepage)(page, offset);
+}
+
 static inline void truncate_partial_page(struct page *page, unsigned partial)
 {
        memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);