]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - mm/page-writeback.c
mm: handle no memcg case in memcg_kmem_charge() properly
[mirror_ubuntu-bionic-kernel.git] / mm / page-writeback.c
index 586f31261c8328e30106254e09e52fa6e93f410e..214f006503678e7b8bfcae79738b694df1bee116 100644 (file)
@@ -2149,6 +2149,13 @@ EXPORT_SYMBOL(tag_pages_for_writeback);
  * not miss some pages (e.g., because some other process has cleared TOWRITE
  * tag we set). The rule we follow is that TOWRITE tag can be cleared only
  * by the process clearing the DIRTY tag (and submitting the page for IO).
+ *
+ * To avoid deadlocks between range_cyclic writeback and callers that hold
+ * pages in PageWriteback to aggregate IO until write_cache_pages() returns,
+ * we do not loop back to the start of the file. Doing so causes a page
+ * lock/page writeback access order inversion - we should only ever lock
+ * multiple pages in ascending page->index order, and looping back to the start
+ * of the file violates that rule and causes deadlocks.
  */
 int write_cache_pages(struct address_space *mapping,
                      struct writeback_control *wbc, writepage_t writepage,
@@ -2156,13 +2163,13 @@ int write_cache_pages(struct address_space *mapping,
 {
        int ret = 0;
        int done = 0;
+       int error;
        struct pagevec pvec;
        int nr_pages;
        pgoff_t uninitialized_var(writeback_index);
        pgoff_t index;
        pgoff_t end;            /* Inclusive */
        pgoff_t done_index;
-       int cycled;
        int range_whole = 0;
        int tag;
 
@@ -2170,23 +2177,17 @@ int write_cache_pages(struct address_space *mapping,
        if (wbc->range_cyclic) {
                writeback_index = mapping->writeback_index; /* prev offset */
                index = writeback_index;
-               if (index == 0)
-                       cycled = 1;
-               else
-                       cycled = 0;
                end = -1;
        } else {
                index = wbc->range_start >> PAGE_SHIFT;
                end = wbc->range_end >> PAGE_SHIFT;
                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
                        range_whole = 1;
-               cycled = 1; /* ignore range_cyclic tests */
        }
        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                tag = PAGECACHE_TAG_TOWRITE;
        else
                tag = PAGECACHE_TAG_DIRTY;
-retry:
        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                tag_pages_for_writeback(mapping, index, end);
        done_index = index;
@@ -2236,25 +2237,31 @@ continue_unlock:
                                goto continue_unlock;
 
                        trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
-                       ret = (*writepage)(page, wbc, data);
-                       if (unlikely(ret)) {
-                               if (ret == AOP_WRITEPAGE_ACTIVATE) {
+                       error = (*writepage)(page, wbc, data);
+                       if (unlikely(error)) {
+                               /*
+                                * Handle errors according to the type of
+                                * writeback. There's no need to continue for
+                                * background writeback. Just push done_index
+                                * past this page so media errors won't choke
+                                * writeout for the entire file. For integrity
+                                * writeback, we must process the entire dirty
+                                * set regardless of errors because the fs may
+                                * still have state to clear for each page. In
+                                * that case we continue processing and return
+                                * the first error.
+                                */
+                               if (error == AOP_WRITEPAGE_ACTIVATE) {
                                        unlock_page(page);
-                                       ret = 0;
-                               } else {
-                                       /*
-                                        * done_index is set past this page,
-                                        * so media errors will not choke
-                                        * background writeout for the entire
-                                        * file. This has consequences for
-                                        * range_cyclic semantics (ie. it may
-                                        * not be suitable for data integrity
-                                        * writeout).
-                                        */
+                                       error = 0;
+                               } else if (wbc->sync_mode != WB_SYNC_ALL) {
+                                       ret = error;
                                        done_index = page->index + 1;
                                        done = 1;
                                        break;
                                }
+                               if (!ret)
+                                       ret = error;
                        }
 
                        /*
@@ -2272,17 +2279,14 @@ continue_unlock:
                pagevec_release(&pvec);
                cond_resched();
        }
-       if (!cycled && !done) {
-               /*
-                * range_cyclic:
-                * We hit the last page and there is more work to be done: wrap
-                * back to the start of the file
-                */
-               cycled = 1;
-               index = 0;
-               end = writeback_index - 1;
-               goto retry;
-       }
+
+       /*
+        * If we hit the last page and there is more work to be done: wrap
+        * back the index back to the start of the file for the next
+        * time we are called.
+        */
+       if (wbc->range_cyclic && !done)
+               done_index = 0;
        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                mapping->writeback_index = done_index;
 
@@ -2501,13 +2505,13 @@ void account_page_redirty(struct page *page)
        if (mapping && mapping_cap_account_dirty(mapping)) {
                struct inode *inode = mapping->host;
                struct bdi_writeback *wb;
-               bool locked;
+               struct wb_lock_cookie cookie = {};
 
-               wb = unlocked_inode_to_wb_begin(inode, &locked);
+               wb = unlocked_inode_to_wb_begin(inode, &cookie);
                current->nr_dirtied--;
                dec_node_page_state(page, NR_DIRTIED);
                dec_wb_stat(wb, WB_DIRTIED);
-               unlocked_inode_to_wb_end(inode, locked);
+               unlocked_inode_to_wb_end(inode, &cookie);
        }
 }
 EXPORT_SYMBOL(account_page_redirty);
@@ -2613,15 +2617,15 @@ void __cancel_dirty_page(struct page *page)
        if (mapping_cap_account_dirty(mapping)) {
                struct inode *inode = mapping->host;
                struct bdi_writeback *wb;
-               bool locked;
+               struct wb_lock_cookie cookie = {};
 
                lock_page_memcg(page);
-               wb = unlocked_inode_to_wb_begin(inode, &locked);
+               wb = unlocked_inode_to_wb_begin(inode, &cookie);
 
                if (TestClearPageDirty(page))
                        account_page_cleaned(page, mapping, wb);
 
-               unlocked_inode_to_wb_end(inode, locked);
+               unlocked_inode_to_wb_end(inode, &cookie);
                unlock_page_memcg(page);
        } else {
                ClearPageDirty(page);
@@ -2653,7 +2657,7 @@ int clear_page_dirty_for_io(struct page *page)
        if (mapping && mapping_cap_account_dirty(mapping)) {
                struct inode *inode = mapping->host;
                struct bdi_writeback *wb;
-               bool locked;
+               struct wb_lock_cookie cookie = {};
 
                /*
                 * Yes, Virginia, this is indeed insane.
@@ -2690,14 +2694,14 @@ int clear_page_dirty_for_io(struct page *page)
                 * always locked coming in here, so we get the desired
                 * exclusion.
                 */
-               wb = unlocked_inode_to_wb_begin(inode, &locked);
+               wb = unlocked_inode_to_wb_begin(inode, &cookie);
                if (TestClearPageDirty(page)) {
                        dec_lruvec_page_state(page, NR_FILE_DIRTY);
                        dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
                        dec_wb_stat(wb, WB_RECLAIMABLE);
                        ret = 1;
                }
-               unlocked_inode_to_wb_end(inode, locked);
+               unlocked_inode_to_wb_end(inode, &cookie);
                return ret;
        }
        return TestClearPageDirty(page);