Merge tag 'extcon-fixes-for-4.6-rc3' of git://git.kernel.org/pub/scm/linux/kernel...

[mirror_ubuntu-bionic-kernel.git] / fs / btrfs / tree-log.c
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index 978c3a8108936381309de4681e90400aeb86874f..24d03c751149f56e484e32c3b1120dceb495b299 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -26,6 +26,7 @@
  #include "print-tree.h"
  #include "backref.h"
  #include "hash.h"
+#include "compression.h"
  
  /* magic values for the inode_only field in btrfs_log_inode:
   *
@@ -1045,7 +1046,7 @@ again:
  
                 /*
                  * NOTE: we have searched root tree and checked the
-                * coresponding ref, it does not need to check again.
+                * corresponding ref, it does not need to check again.
                  */
                 *search_done = 1;
         }
@@ -4500,7 +4501,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
  
         mutex_lock(&BTRFS_I(inode)->log_mutex);
  
-       btrfs_get_logged_extents(inode, &logged_list, start, end);
+       /*
+        * Collect ordered extents only if we are logging data. This is to
+        * ensure a subsequent request to log this inode in LOG_INODE_ALL mode
+        * will process the ordered extents if they still exists at the time,
+        * because when we collect them we test and set for the flag
+        * BTRFS_ORDERED_LOGGED to prevent multiple log requests to process the
+        * same ordered extents. The consequence for the LOG_INODE_ALL log mode
+        * not processing the ordered extents is that we end up logging the
+        * corresponding file extent items, based on the extent maps in the
+        * inode's extent_map_tree's modified_list, without logging the
+        * respective checksums (since the may still be only attached to the
+        * ordered extents and have not been inserted in the csum tree by
+        * btrfs_finish_ordered_io() yet).
+        */
+       if (inode_only == LOG_INODE_ALL)
+               btrfs_get_logged_extents(inode, &logged_list, start, end);
  
         /*
          * a brute force approach to making sure we get the most uptodate
@@ -4771,6 +4787,42 @@ out_unlock:
         return err;
  }
  
+/*
+ * Check if we must fallback to a transaction commit when logging an inode.
+ * This must be called after logging the inode and is used only in the context
+ * when fsyncing an inode requires the need to log some other inode - in which
+ * case we can't lock the i_mutex of each other inode we need to log as that
+ * can lead to deadlocks with concurrent fsync against other inodes (as we can
+ * log inodes up or down in the hierarchy) or rename operations for example. So
+ * we take the log_mutex of the inode after we have logged it and then check for
+ * its last_unlink_trans value - this is safe because any task setting
+ * last_unlink_trans must take the log_mutex and it must do this before it does
+ * the actual unlink operation, so if we do this check before a concurrent task
+ * sets last_unlink_trans it means we've logged a consistent version/state of
+ * all the inode items, otherwise we are not sure and must do a transaction
+ * commit (the concurrent task migth have only updated last_unlink_trans before
+ * we logged the inode or it might have also done the unlink).
+ */
+static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
+                                         struct inode *inode)
+{
+       struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+       bool ret = false;
+
+       mutex_lock(&BTRFS_I(inode)->log_mutex);
+       if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) {
+               /*
+                * Make sure any commits to the log are forced to be full
+                * commits.
+                */
+               btrfs_set_log_full_commit(fs_info, trans);
+               ret = true;
+       }
+       mutex_unlock(&BTRFS_I(inode)->log_mutex);
+
+       return ret;
+}
+
  /*
   * follow the dentry parent pointers up the chain and see if any
   * of the directories in it require a full commit before they can
@@ -4784,7 +4836,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                                                u64 last_committed)
  {
         int ret = 0;
-       struct btrfs_root *root;
         struct dentry *old_parent = NULL;
         struct inode *orig_inode = inode;
  
@@ -4816,14 +4867,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                         BTRFS_I(inode)->logged_trans = trans->transid;
                 smp_mb();
  
-               if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
-                       root = BTRFS_I(inode)->root;
-
-                       /*
-                        * make sure any commits to the log are forced
-                        * to be full commits
-                        */
-                       btrfs_set_log_full_commit(root->fs_info, trans);
+               if (btrfs_must_commit_transaction(trans, inode)) {
                         ret = 1;
                         break;
                 }
@@ -4982,6 +5026,9 @@ process_leaf:
                         btrfs_release_path(path);
                         ret = btrfs_log_inode(trans, root, di_inode,
                                               log_mode, 0, LLONG_MAX, ctx);
+                       if (!ret &&
+                           btrfs_must_commit_transaction(trans, di_inode))
+                               ret = 1;
                         iput(di_inode);
                         if (ret)
                                 goto next_dir_inode;
@@ -5096,6 +5143,9 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
  
                         ret = btrfs_log_inode(trans, root, dir_inode,
                                               LOG_INODE_ALL, 0, LLONG_MAX, ctx);
+                       if (!ret &&
+                           btrfs_must_commit_transaction(trans, dir_inode))
+                               ret = 1;
                         iput(dir_inode);
                         if (ret)
                                 goto out;
@@ -5447,6 +5497,9 @@ error:
   * They revolve around files there were unlinked from the directory, and
   * this function updates the parent directory so that a full commit is
   * properly done if it is fsync'd later after the unlinks are done.
+ *
+ * Must be called before the unlink operations (updates to the subvolume tree,
+ * inodes, etc) are done.
   */
  void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
                              struct inode *dir, struct inode *inode,
@@ -5462,8 +5515,11 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
          * into the file.  When the file is logged we check it and
          * don't log the parents if the file is fully on disk.
          */
-       if (S_ISREG(inode->i_mode))
+       if (S_ISREG(inode->i_mode)) {
+               mutex_lock(&BTRFS_I(inode)->log_mutex);
                 BTRFS_I(inode)->last_unlink_trans = trans->transid;
+               mutex_unlock(&BTRFS_I(inode)->log_mutex);
+       }
  
         /*
          * if this directory was already logged any new
@@ -5494,7 +5550,29 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
         return;
  
  record:
+       mutex_lock(&BTRFS_I(dir)->log_mutex);
+       BTRFS_I(dir)->last_unlink_trans = trans->transid;
+       mutex_unlock(&BTRFS_I(dir)->log_mutex);
+}
+
+/*
+ * Make sure that if someone attempts to fsync the parent directory of a deleted
+ * snapshot, it ends up triggering a transaction commit. This is to guarantee
+ * that after replaying the log tree of the parent directory's root we will not
+ * see the snapshot anymore and at log replay time we will not see any log tree
+ * corresponding to the deleted snapshot's root, which could lead to replaying
+ * it after replaying the log tree of the parent directory (which would replay
+ * the snapshot delete operation).
+ *
+ * Must be called before the actual snapshot destroy operation (updates to the
+ * parent root and tree of tree roots trees, etc) are done.
+ */
+void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+                                  struct inode *dir)
+{
+       mutex_lock(&BTRFS_I(dir)->log_mutex);
         BTRFS_I(dir)->last_unlink_trans = trans->transid;
+       mutex_unlock(&BTRFS_I(dir)->log_mutex);
  }
  
  /*