mm: don't cap request size based on read-ahead setting

author Jens Axboe <axboe@fb.com>

Tue, 13 Dec 2016 00:43:26 +0000 (16:43 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 13 Dec 2016 02:55:08 +0000 (18:55 -0800)
author Jens Axboe <axboe@fb.com>
Tue, 13 Dec 2016 00:43:26 +0000 (16:43 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 13 Dec 2016 02:55:08 +0000 (18:55 -0800)
diff --git a/block/blk-settings.c b/block/blk-settings.c

index f679ae12284351fdb53f8cfc80a2a662269c164f..65f16cf4f8509b094585e119e7bcc47a5ae45b64 100644 (file)
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -249,6 +249,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
         max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
         max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
         limits->max_sectors = max_sectors;
+       q->backing_dev_info.io_pages = max_sectors >> (PAGE_SHIFT - 9);
  }
  EXPORT_SYMBOL(blk_queue_max_hw_sectors);
  
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c

index 9cc8d7c5439a98422eb5a99a66ea79314ff7baa7..ea374e820775fc123cb8b74ad2038abd5d86feba 100644 (file)
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -212,6 +212,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
  
         spin_lock_irq(q->queue_lock);
         q->limits.max_sectors = max_sectors_kb << 1;
+       q->backing_dev_info.io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
         spin_unlock_irq(q->queue_lock);
  
         return ret;
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h

index c357f27d54835fdb25c977a014ea8b65acf75e92..b8144b2d59ce7f59349fdffbaebe2ad86bb9433f 100644 (file)
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -136,6 +136,7 @@ struct bdi_writeback {
  struct backing_dev_info {
         struct list_head bdi_list;
         unsigned long ra_pages; /* max readahead in PAGE_SIZE units */
+       unsigned long io_pages; /* max allowed IO size */
         unsigned int capabilities; /* Device capabilities */
         congested_fn *congested_fn; /* Function pointer if device is md/dm */
         void *congested_data;   /* Pointer to aux data for congested func */
diff --git a/mm/readahead.c b/mm/readahead.c

index c8a955b1297e0b60fb2efffdf3fc2bb71cfc4630..c4ca702392333288448cc7ae4ba6b410ddb93fef 100644 (file)
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -207,12 +207,21 @@ out:
   * memory at once.
   */
  int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
-               pgoff_t offset, unsigned long nr_to_read)
+                              pgoff_t offset, unsigned long nr_to_read)
  {
+       struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
+       struct file_ra_state *ra = &filp->f_ra;
+       unsigned long max_pages;
+
         if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
                 return -EINVAL;
  
-       nr_to_read = min(nr_to_read, inode_to_bdi(mapping->host)->ra_pages);
+       /*
+        * If the request exceeds the readahead window, allow the read to
+        * be up to the optimal hardware IO size
+        */
+       max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
+       nr_to_read = min(nr_to_read, max_pages);
         while (nr_to_read) {
                 int err;
  
@@ -369,9 +378,17 @@ ondemand_readahead(struct address_space *mapping,
                    bool hit_readahead_marker, pgoff_t offset,
                    unsigned long req_size)
  {
-       unsigned long max = ra->ra_pages;
+       struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
+       unsigned long max_pages = ra->ra_pages;
         pgoff_t prev_offset;
  
+       /*
+        * If the request exceeds the readahead window, allow the read to
+        * be up to the optimal hardware IO size
+        */
+       if (req_size > max_pages && bdi->io_pages > max_pages)
+               max_pages = min(req_size, bdi->io_pages);
+
         /*
          * start of file
          */
@@ -385,7 +402,7 @@ ondemand_readahead(struct address_space *mapping,
         if ((offset == (ra->start + ra->size - ra->async_size) ||
              offset == (ra->start + ra->size))) {
                 ra->start += ra->size;
-               ra->size = get_next_ra_size(ra, max);
+               ra->size = get_next_ra_size(ra, max_pages);
                 ra->async_size = ra->size;
                 goto readit;
         }
@@ -400,16 +417,16 @@ ondemand_readahead(struct address_space *mapping,
                 pgoff_t start;
  
                 rcu_read_lock();
-               start = page_cache_next_hole(mapping, offset + 1, max);
+               start = page_cache_next_hole(mapping, offset + 1, max_pages);
                 rcu_read_unlock();
  
-               if (!start || start - offset > max)
+               if (!start || start - offset > max_pages)
                         return 0;
  
                 ra->start = start;
                 ra->size = start - offset;      /* old async_size */
                 ra->size += req_size;
-               ra->size = get_next_ra_size(ra, max);
+               ra->size = get_next_ra_size(ra, max_pages);
                 ra->async_size = ra->size;
                 goto readit;
         }
@@ -417,7 +434,7 @@ ondemand_readahead(struct address_space *mapping,
         /*
          * oversize read
          */
-       if (req_size > max)
+       if (req_size > max_pages)
                 goto initial_readahead;
  
         /*
@@ -433,7 +450,7 @@ ondemand_readahead(struct address_space *mapping,
          * Query the page cache and look for the traces(cached history pages)
          * that a sequential stream would leave behind.
          */
-       if (try_context_readahead(mapping, ra, offset, req_size, max))
+       if (try_context_readahead(mapping, ra, offset, req_size, max_pages))
                 goto readit;
  
         /*
@@ -444,7 +461,7 @@ ondemand_readahead(struct address_space *mapping,
  
  initial_readahead:
         ra->start = offset;
-       ra->size = get_init_ra_size(req_size, max);
+       ra->size = get_init_ra_size(req_size, max_pages);
         ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
  
  readit:
@@ -454,7 +471,7 @@ readit:
          * the resulted next readahead window into the current one.
          */
         if (offset == ra->start && ra->size == ra->async_size) {
-               ra->async_size = get_next_ra_size(ra, max);
+               ra->async_size = get_next_ra_size(ra, max_pages);
                 ra->size += ra->async_size;
         }
author	Jens Axboe <axboe@fb.com>
	Tue, 13 Dec 2016 00:43:26 +0000 (16:43 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 13 Dec 2016 02:55:08 +0000 (18:55 -0800)
block/blk-settings.c		patch \| blob \| blame \| history
block/blk-sysfs.c		patch \| blob \| blame \| history
include/linux/backing-dev-defs.h		patch \| blob \| blame \| history
mm/readahead.c		patch \| blob \| blame \| history