]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
mm/writeback: discard NR_UNSTABLE_NFS, use NR_WRITEBACK instead
authorNeilBrown <neilb@suse.de>
Tue, 2 Jun 2020 04:48:21 +0000 (21:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 2 Jun 2020 17:59:08 +0000 (10:59 -0700)
After an NFS page has been written it is considered "unstable" until a
COMMIT request succeeds.  If the COMMIT fails, the page will be
re-written.

These "unstable" pages are currently accounted as "reclaimable", either
in WB_RECLAIMABLE, or in NR_UNSTABLE_NFS which is included in a
'reclaimable' count.  This might have made sense when sending the COMMIT
required a separate action by the VFS/MM (e.g.  releasepage() used to
send a COMMIT).  However now that all writes generated by ->writepages()
will automatically be followed by a COMMIT (since commit 919e3bd9a875
("NFS: Ensure we commit after writeback is complete")) it makes more
sense to treat them as writeback pages.

So this patch removes NR_UNSTABLE_NFS and accounts unstable pages in
NR_WRITEBACK and WB_WRITEBACK.

A particular effect of this change is that when
wb_check_background_flush() calls wb_over_bg_threshold(), the latter
will report 'true' a lot less often as the 'unstable' pages are no
longer considered 'dirty' (as there is nothing that writeback can do
about them anyway).

Currently wb_check_background_flush() will trigger writeback to NFS even
when there are relatively few dirty pages (if there are lots of unstable
pages), this can result in small writes going to the server (10s of
Kilobytes rather than a Megabyte) which hurts throughput.  With this
patch, there are fewer writes which are each larger on average.

Where the NR_UNSTABLE_NFS count was included in statistics
virtual-files, the entry is retained, but the value is hard-coded as
zero.  static trace points and warning printks which mentioned this
counter no longer report it.

[akpm@linux-foundation.org: re-layout comment]
[akpm@linux-foundation.org: fix printk warning]
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Acked-by: Michal Hocko <mhocko@suse.com> [mm]
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chuck Lever <chuck.lever@oracle.com>
Link: http://lkml.kernel.org/r/87d06j7gqa.fsf@notabene.neil.brown.name
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
12 files changed:
Documentation/filesystems/proc.rst
drivers/base/node.c
fs/fs-writeback.c
fs/nfs/internal.h
fs/nfs/write.c
fs/proc/meminfo.c
include/linux/mmzone.h
include/trace/events/writeback.h
mm/memcontrol.c
mm/page-writeback.c
mm/page_alloc.c
mm/vmstat.c

index 38b606991065b3df4f075bb3120dc9e5db09bf71..092b7b44d158e78c98f34e3afaff037352f65594 100644 (file)
@@ -1042,8 +1042,8 @@ PageTables
               amount of memory dedicated to the lowest level of page
               tables.
 NFS_Unstable
-              NFS pages sent to the server, but not yet committed to stable
-             storage
+              Always zero. Previous counted pages which had been written to
+              the server, but has not been committed to stable storage.
 Bounce
               Memory used for block device "bounce buffers"
 WritebackTmp
index 10d7e818e118d32e9ca37429b10805099c467add..6012574913f79620980b51ccd93c967ec30b066f 100644 (file)
@@ -439,7 +439,7 @@ static ssize_t node_read_meminfo(struct device *dev,
                       nid, K(i.sharedram),
                       nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
                       nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
-                      nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
+                      nid, 0UL,
                       nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
                       nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
                       nid, K(sreclaimable +
index 76ac9c7d32ec7ee2e719e48d2956276676810d92..c5bdf46e3b4bc743002e5261a6182d78193e7c6f 100644 (file)
@@ -1070,7 +1070,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
 static unsigned long get_nr_dirty_pages(void)
 {
        return global_node_page_state(NR_FILE_DIRTY) +
-               global_node_page_state(NR_UNSTABLE_NFS) +
                get_nr_dirty_inodes();
 }
 
index 1f32a9fbfdafbb21c57db8d60097bc1593837499..6673a77884d9daeeee1f11f565ef541779e28fa7 100644 (file)
@@ -668,7 +668,8 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
 }
 
 /*
- * Record the page as unstable and mark its inode as dirty.
+ * Record the page as unstable (an extra writeback period) and mark its
+ * inode as dirty.
  */
 static inline
 void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)
@@ -676,8 +677,11 @@ void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)
        if (!cinfo->dreq) {
                struct inode *inode = page_file_mapping(page)->host;
 
-               inc_node_page_state(page, NR_UNSTABLE_NFS);
-               inc_wb_stat(&inode_to_bdi(inode)->wb, WB_RECLAIMABLE);
+               /* This page is really still in write-back - just that the
+                * writeback is happening on the server now.
+                */
+               inc_node_page_state(page, NR_WRITEBACK);
+               inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
                __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
        }
 }
index 1e767f779c498c3b57db16d904b4fa25bdb183f1..639c34fec04a8488dd3a5df5b1b1c9ba0f170753 100644 (file)
@@ -946,9 +946,9 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
 static void
 nfs_clear_page_commit(struct page *page)
 {
-       dec_node_page_state(page, NR_UNSTABLE_NFS);
+       dec_node_page_state(page, NR_WRITEBACK);
        dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
-                   WB_RECLAIMABLE);
+                   WB_WRITEBACK);
 }
 
 /* Called holding the request lock on @req */
index 8c1f1bb1a5ce3fd1d9e757a87805fb3df638206c..9bd94b5a96582ef77d462f00eaabbd2b0c77e2a7 100644 (file)
@@ -106,8 +106,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
        show_val_kb(m, "PageTables:     ",
                    global_zone_page_state(NR_PAGETABLE));
 
-       show_val_kb(m, "NFS_Unstable:   ",
-                   global_node_page_state(NR_UNSTABLE_NFS));
+       show_val_kb(m, "NFS_Unstable:   ", 0);
        show_val_kb(m, "Bounce:         ",
                    global_zone_page_state(NR_BOUNCE));
        show_val_kb(m, "WritebackTmp:   ",
index 1b9de7d220fb7856b71fc54510bdabbfc947d096..a89f47515eb17c047a3945783dfc3d37e61f0679 100644 (file)
@@ -193,7 +193,6 @@ enum node_stat_item {
        NR_FILE_THPS,
        NR_FILE_PMDMAPPED,
        NR_ANON_THPS,
-       NR_UNSTABLE_NFS,        /* NFS unstable pages */
        NR_VMSCAN_WRITE,
        NR_VMSCAN_IMMEDIATE,    /* Prioritise for reclaim when writeback ends */
        NR_DIRTIED,             /* page dirtyings since bootup */
index 85a33bea76f1a776584ca36927e2481d88052daa..10f5d1fa73476a9cb4fb5e21b24d25f3c599500b 100644 (file)
@@ -541,7 +541,6 @@ TRACE_EVENT(global_dirty_state,
        TP_STRUCT__entry(
                __field(unsigned long,  nr_dirty)
                __field(unsigned long,  nr_writeback)
-               __field(unsigned long,  nr_unstable)
                __field(unsigned long,  background_thresh)
                __field(unsigned long,  dirty_thresh)
                __field(unsigned long,  dirty_limit)
@@ -552,7 +551,6 @@ TRACE_EVENT(global_dirty_state,
        TP_fast_assign(
                __entry->nr_dirty       = global_node_page_state(NR_FILE_DIRTY);
                __entry->nr_writeback   = global_node_page_state(NR_WRITEBACK);
-               __entry->nr_unstable    = global_node_page_state(NR_UNSTABLE_NFS);
                __entry->nr_dirtied     = global_node_page_state(NR_DIRTIED);
                __entry->nr_written     = global_node_page_state(NR_WRITTEN);
                __entry->background_thresh = background_thresh;
@@ -560,12 +558,11 @@ TRACE_EVENT(global_dirty_state,
                __entry->dirty_limit    = global_wb_domain.dirty_limit;
        ),
 
-       TP_printk("dirty=%lu writeback=%lu unstable=%lu "
+       TP_printk("dirty=%lu writeback=%lu "
                  "bg_thresh=%lu thresh=%lu limit=%lu "
                  "dirtied=%lu written=%lu",
                  __entry->nr_dirty,
                  __entry->nr_writeback,
-                 __entry->nr_unstable,
                  __entry->background_thresh,
                  __entry->dirty_thresh,
                  __entry->dirty_limit,
index a3b97f10396654b18faa4642dafc4975507da45b..1db4b285c4075eb8d2391fa56bbed2a8b4763ec4 100644 (file)
@@ -4330,7 +4330,6 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 
        *pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
 
-       /* this should eventually include NR_UNSTABLE_NFS */
        *pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
        *pfilepages = memcg_exact_page_state(memcg, NR_INACTIVE_FILE) +
                        memcg_exact_page_state(memcg, NR_ACTIVE_FILE);
index 7ff2290cf43d5d223902f62238d69f34d607c0f0..7185652662577d103d5d275484cf4d485dfbb9f3 100644 (file)
@@ -504,7 +504,6 @@ bool node_dirty_ok(struct pglist_data *pgdat)
        unsigned long nr_pages = 0;
 
        nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
-       nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
        nr_pages += node_page_state(pgdat, NR_WRITEBACK);
 
        return nr_pages <= limit;
@@ -758,7 +757,7 @@ static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
  * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
  *
  * Return: @wb's dirty limit in pages. The term "dirty" in the context of
- * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
+ * dirty balancing includes all PG_dirty and PG_writeback pages.
  */
 static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
 {
@@ -1566,7 +1565,7 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
        struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
                                                     &mdtc_stor : NULL;
        struct dirty_throttle_control *sdtc;
-       unsigned long nr_reclaimable;   /* = file_dirty + unstable_nfs */
+       unsigned long nr_reclaimable;   /* = file_dirty */
        long period;
        long pause;
        long max_pause;
@@ -1586,14 +1585,7 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
                unsigned long m_thresh = 0;
                unsigned long m_bg_thresh = 0;
 
-               /*
-                * Unstable writes are a feature of certain networked
-                * filesystems (i.e. NFS) in which data may have been
-                * written to the server's write cache, but has not yet
-                * been flushed to permanent storage.
-                */
-               nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
-                                       global_node_page_state(NR_UNSTABLE_NFS);
+               nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
                gdtc->avail = global_dirtyable_memory();
                gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
 
@@ -1963,8 +1955,7 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
         * as we're trying to decide whether to put more under writeback.
         */
        gdtc->avail = global_dirtyable_memory();
-       gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
-                     global_node_page_state(NR_UNSTABLE_NFS);
+       gdtc->dirty = global_node_page_state(NR_FILE_DIRTY);
        domain_dirty_limits(gdtc);
 
        if (gdtc->dirty > gdtc->bg_thresh)
index 13cc653122b73278afaeb6054539c619b164d11a..cc406ee17ad9604722ba773d47ea24061b4cb5fc 100644 (file)
@@ -5319,7 +5319,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 
        printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
                " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
-               " unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+               " unevictable:%lu dirty:%lu writeback:%lu\n"
                " slab_reclaimable:%lu slab_unreclaimable:%lu\n"
                " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
                " free:%lu free_pcp:%lu free_cma:%lu\n",
@@ -5332,7 +5332,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                global_node_page_state(NR_UNEVICTABLE),
                global_node_page_state(NR_FILE_DIRTY),
                global_node_page_state(NR_WRITEBACK),
-               global_node_page_state(NR_UNSTABLE_NFS),
                global_node_page_state(NR_SLAB_RECLAIMABLE),
                global_node_page_state(NR_SLAB_UNRECLAIMABLE),
                global_node_page_state(NR_FILE_MAPPED),
@@ -5365,7 +5364,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        " anon_thp: %lukB"
 #endif
                        " writeback_tmp:%lukB"
-                       " unstable:%lukB"
                        " all_unreclaimable? %s"
                        "\n",
                        pgdat->node_id,
@@ -5387,7 +5385,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
 #endif
                        K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
-                       K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
                        pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
                                "yes" : "no");
        }
index 96d21a792b57c35ad59f1f062b7efedf9f59a34c..b1582fdf757ce7f8a6d21462a23e8688cc1034aa 100644 (file)
@@ -1108,7 +1108,7 @@ int fragmentation_index(struct zone *zone, unsigned int order)
                                        TEXT_FOR_HIGHMEM(xx) xx "_movable",
 
 const char * const vmstat_text[] = {
-       /* enum zone_stat_item countes */
+       /* enum zone_stat_item counters */
        "nr_free_pages",
        "nr_zone_inactive_anon",
        "nr_zone_active_anon",
@@ -1162,7 +1162,6 @@ const char * const vmstat_text[] = {
        "nr_file_hugepages",
        "nr_file_pmdmapped",
        "nr_anon_transparent_hugepages",
-       "nr_unstable",
        "nr_vmscan_write",
        "nr_vmscan_immediate_reclaim",
        "nr_dirtied",
@@ -1723,6 +1722,14 @@ static int vmstat_show(struct seq_file *m, void *arg)
        seq_puts(m, vmstat_text[off]);
        seq_put_decimal_ull(m, " ", *l);
        seq_putc(m, '\n');
+
+       if (off == NR_VMSTAT_ITEMS - 1) {
+               /*
+                * We've come to the end - add any deprecated counters to avoid
+                * breaking userspace which might depend on them being present.
+                */
+               seq_puts(m, "nr_unstable 0\n");
+       }
        return 0;
 }