UBUNTU: Ubuntu-4.15.0-96.97

[mirror_ubuntu-bionic-kernel.git] / mm / page-writeback.c
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index 586f31261c8328e30106254e09e52fa6e93f410e..710ce835595ef1142d51a3ef1b0f89f02ddef2b8 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -201,11 +201,11 @@ static void wb_min_max_ratio(struct bdi_writeback *wb,
         if (this_bw < tot_bw) {
                 if (min) {
                         min *= this_bw;
-                       do_div(min, tot_bw);
+                       min = div64_ul(min, tot_bw);
                 }
                 if (max < 100) {
                         max *= this_bw;
-                       do_div(max, tot_bw);
+                       max = div64_ul(max, tot_bw);
                 }
         }
  
@@ -2149,6 +2149,13 @@ EXPORT_SYMBOL(tag_pages_for_writeback);
   * not miss some pages (e.g., because some other process has cleared TOWRITE
   * tag we set). The rule we follow is that TOWRITE tag can be cleared only
   * by the process clearing the DIRTY tag (and submitting the page for IO).
+ *
+ * To avoid deadlocks between range_cyclic writeback and callers that hold
+ * pages in PageWriteback to aggregate IO until write_cache_pages() returns,
+ * we do not loop back to the start of the file. Doing so causes a page
+ * lock/page writeback access order inversion - we should only ever lock
+ * multiple pages in ascending page->index order, and looping back to the start
+ * of the file violates that rule and causes deadlocks.
   */
  int write_cache_pages(struct address_space *mapping,
                       struct writeback_control *wbc, writepage_t writepage,
@@ -2156,13 +2163,13 @@ int write_cache_pages(struct address_space *mapping,
  {
         int ret = 0;
         int done = 0;
+       int error;
         struct pagevec pvec;
         int nr_pages;
         pgoff_t uninitialized_var(writeback_index);
         pgoff_t index;
         pgoff_t end;            /* Inclusive */
         pgoff_t done_index;
-       int cycled;
         int range_whole = 0;
         int tag;
  
@@ -2170,23 +2177,17 @@ int write_cache_pages(struct address_space *mapping,
         if (wbc->range_cyclic) {
                 writeback_index = mapping->writeback_index; /* prev offset */
                 index = writeback_index;
-               if (index == 0)
-                       cycled = 1;
-               else
-                       cycled = 0;
                 end = -1;
         } else {
                 index = wbc->range_start >> PAGE_SHIFT;
                 end = wbc->range_end >> PAGE_SHIFT;
                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
                         range_whole = 1;
-               cycled = 1; /* ignore range_cyclic tests */
         }
         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                 tag = PAGECACHE_TAG_TOWRITE;
         else
                 tag = PAGECACHE_TAG_DIRTY;
-retry:
         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                 tag_pages_for_writeback(mapping, index, end);
         done_index = index;
@@ -2236,25 +2237,31 @@ continue_unlock:
                                 goto continue_unlock;
  
                         trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
-                       ret = (*writepage)(page, wbc, data);
-                       if (unlikely(ret)) {
-                               if (ret == AOP_WRITEPAGE_ACTIVATE) {
+                       error = (*writepage)(page, wbc, data);
+                       if (unlikely(error)) {
+                               /*
+                                * Handle errors according to the type of
+                                * writeback. There's no need to continue for
+                                * background writeback. Just push done_index
+                                * past this page so media errors won't choke
+                                * writeout for the entire file. For integrity
+                                * writeback, we must process the entire dirty
+                                * set regardless of errors because the fs may
+                                * still have state to clear for each page. In
+                                * that case we continue processing and return
+                                * the first error.
+                                */
+                               if (error == AOP_WRITEPAGE_ACTIVATE) {
                                         unlock_page(page);
-                                       ret = 0;
-                               } else {
-                                       /*
-                                        * done_index is set past this page,
-                                        * so media errors will not choke
-                                        * background writeout for the entire
-                                        * file. This has consequences for
-                                        * range_cyclic semantics (ie. it may
-                                        * not be suitable for data integrity
-                                        * writeout).
-                                        */
+                                       error = 0;
+                               } else if (wbc->sync_mode != WB_SYNC_ALL) {
+                                       ret = error;
                                         done_index = page->index + 1;
                                         done = 1;
                                         break;
                                 }
+                               if (!ret)
+                                       ret = error;
                         }
  
                         /*
@@ -2272,17 +2279,14 @@ continue_unlock:
                 pagevec_release(&pvec);
                 cond_resched();
         }
-       if (!cycled && !done) {
-               /*
-                * range_cyclic:
-                * We hit the last page and there is more work to be done: wrap
-                * back to the start of the file
-                */
-               cycled = 1;
-               index = 0;
-               end = writeback_index - 1;
-               goto retry;
-       }
+
+       /*
+        * If we hit the last page and there is more work to be done: wrap
+        * back the index back to the start of the file for the next
+        * time we are called.
+        */
+       if (wbc->range_cyclic && !done)
+               done_index = 0;
         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                 mapping->writeback_index = done_index;
  
@@ -2501,13 +2505,13 @@ void account_page_redirty(struct page *page)
         if (mapping && mapping_cap_account_dirty(mapping)) {
                 struct inode *inode = mapping->host;
                 struct bdi_writeback *wb;
-               bool locked;
+               struct wb_lock_cookie cookie = {};
  
-               wb = unlocked_inode_to_wb_begin(inode, &locked);
+               wb = unlocked_inode_to_wb_begin(inode, &cookie);
                 current->nr_dirtied--;
                 dec_node_page_state(page, NR_DIRTIED);
                 dec_wb_stat(wb, WB_DIRTIED);
-               unlocked_inode_to_wb_end(inode, locked);
+               unlocked_inode_to_wb_end(inode, &cookie);
         }
  }
  EXPORT_SYMBOL(account_page_redirty);
@@ -2613,15 +2617,15 @@ void __cancel_dirty_page(struct page *page)
         if (mapping_cap_account_dirty(mapping)) {
                 struct inode *inode = mapping->host;
                 struct bdi_writeback *wb;
-               bool locked;
+               struct wb_lock_cookie cookie = {};
  
                 lock_page_memcg(page);
-               wb = unlocked_inode_to_wb_begin(inode, &locked);
+               wb = unlocked_inode_to_wb_begin(inode, &cookie);
  
                 if (TestClearPageDirty(page))
                         account_page_cleaned(page, mapping, wb);
  
-               unlocked_inode_to_wb_end(inode, locked);
+               unlocked_inode_to_wb_end(inode, &cookie);
                 unlock_page_memcg(page);
         } else {
                 ClearPageDirty(page);
@@ -2653,7 +2657,7 @@ int clear_page_dirty_for_io(struct page *page)
         if (mapping && mapping_cap_account_dirty(mapping)) {
                 struct inode *inode = mapping->host;
                 struct bdi_writeback *wb;
-               bool locked;
+               struct wb_lock_cookie cookie = {};
  
                 /*
                  * Yes, Virginia, this is indeed insane.
@@ -2690,14 +2694,14 @@ int clear_page_dirty_for_io(struct page *page)
                  * always locked coming in here, so we get the desired
                  * exclusion.
                  */
-               wb = unlocked_inode_to_wb_begin(inode, &locked);
+               wb = unlocked_inode_to_wb_begin(inode, &cookie);
                 if (TestClearPageDirty(page)) {
                         dec_lruvec_page_state(page, NR_FILE_DIRTY);
                         dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
                         dec_wb_stat(wb, WB_RECLAIMABLE);
                         ret = 1;
                 }
-               unlocked_inode_to_wb_end(inode, locked);
+               unlocked_inode_to_wb_end(inode, &cookie);
                 return ret;
         }
         return TestClearPageDirty(page);