]> git.proxmox.com Git - zfsonlinux.git/blame - zfs-patches/0056-Revert-Handle-zap_add-failures-in-mixed.patch
update ZFS submodule to debian/0.7.9-2
[zfsonlinux.git] / zfs-patches / 0056-Revert-Handle-zap_add-failures-in-mixed.patch
CommitLineData
fd313b30
FG
1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2From: Tony Hutter <hutter2@llnl.gov>
3Date: Mon, 9 Apr 2018 14:24:46 -0700
4Subject: [PATCH] Revert "Handle zap_add() failures in mixed ... "
5MIME-Version: 1.0
6Content-Type: text/plain; charset=UTF-8
7Content-Transfer-Encoding: 8bit
8
9This reverts commit cc63068e95ee725cce03b1b7ce50179825a6cda5.
10
11Under certain circumstances this change can result in an ENOSPC
12error when adding new files to a directory. See #7401 for full
13details.
14
15Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
16Signed-off-by: Tony Hutter <hutter2@llnl.gov>
17Issue #7401
18Closes #7416
19(cherry picked from commit 9a2e90c9fc469d377c14eb863952261f9ec12d2c)
20Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
21---
22 .../tests/functional/casenorm/Makefile.am | 1 -
23 include/sys/zap_leaf.h | 15 +--
24 module/zfs/zap.c | 25 +---
25 module/zfs/zap_leaf.c | 2 +-
26 module/zfs/zap_micro.c | 38 +-----
27 module/zfs/zfs_dir.c | 29 +----
28 module/zfs/zfs_vnops.c | 73 +++--------
29 tests/runfiles/linux.run | 2 +-
30 .../functional/casenorm/mixed_create_failure.ksh | 136 ---------------------
31 9 files changed, 32 insertions(+), 289 deletions(-)
32 delete mode 100755 tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh
33
34diff --git a/tests/zfs-tests/tests/functional/casenorm/Makefile.am b/tests/zfs-tests/tests/functional/casenorm/Makefile.am
35index 00cb59074..00a19c7ff 100644
36--- a/tests/zfs-tests/tests/functional/casenorm/Makefile.am
37+++ b/tests/zfs-tests/tests/functional/casenorm/Makefile.am
38@@ -9,7 +9,6 @@ dist_pkgdata_SCRIPTS = \
39 insensitive_formd_lookup.ksh \
40 insensitive_none_delete.ksh \
41 insensitive_none_lookup.ksh \
42- mixed_create_failure.ksh \
43 mixed_formd_delete.ksh \
44 mixed_formd_lookup_ci.ksh \
45 mixed_formd_lookup.ksh \
46diff --git a/include/sys/zap_leaf.h b/include/sys/zap_leaf.h
47index a3da1036a..e784c5963 100644
48--- a/include/sys/zap_leaf.h
49+++ b/include/sys/zap_leaf.h
50@@ -46,15 +46,10 @@ struct zap_stats;
51 * block size (1<<l->l_bs) - hash entry size (2) * number of hash
52 * entries - header space (2*chunksize)
53 */
54-#define ZAP_LEAF_NUMCHUNKS_BS(bs) \
55- (((1<<(bs)) - 2*ZAP_LEAF_HASH_NUMENTRIES_BS(bs)) / \
56+#define ZAP_LEAF_NUMCHUNKS(l) \
57+ (((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
58 ZAP_LEAF_CHUNKSIZE - 2)
59
60-#define ZAP_LEAF_NUMCHUNKS(l) (ZAP_LEAF_NUMCHUNKS_BS(((l)->l_bs)))
61-
62-#define ZAP_LEAF_NUMCHUNKS_DEF \
63- (ZAP_LEAF_NUMCHUNKS_BS(fzap_default_block_shift))
64-
65 /*
66 * The amount of space within the chunk available for the array is:
67 * chunk size - space for type (1) - space for next pointer (2)
68@@ -79,10 +74,8 @@ struct zap_stats;
69 * which is less than block size / CHUNKSIZE (24) / minimum number of
70 * chunks per entry (3).
71 */
72-#define ZAP_LEAF_HASH_SHIFT_BS(bs) ((bs) - 5)
73-#define ZAP_LEAF_HASH_NUMENTRIES_BS(bs) (1 << ZAP_LEAF_HASH_SHIFT_BS(bs))
74-#define ZAP_LEAF_HASH_SHIFT(l) (ZAP_LEAF_HASH_SHIFT_BS(((l)->l_bs)))
75-#define ZAP_LEAF_HASH_NUMENTRIES(l) (ZAP_LEAF_HASH_NUMENTRIES_BS(((l)->l_bs)))
76+#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
77+#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
78
79 /*
80 * The chunks start immediately after the hash table. The end of the
81diff --git a/module/zfs/zap.c b/module/zfs/zap.c
82index 9843d8c50..ee9962bff 100644
83--- a/module/zfs/zap.c
84+++ b/module/zfs/zap.c
85@@ -819,19 +819,15 @@ fzap_lookup(zap_name_t *zn,
86 return (err);
87 }
88
89-#define MAX_EXPAND_RETRIES 2
90-
91 int
92 fzap_add_cd(zap_name_t *zn,
93 uint64_t integer_size, uint64_t num_integers,
94 const void *val, uint32_t cd, void *tag, dmu_tx_t *tx)
95 {
96 zap_leaf_t *l;
97- zap_leaf_t *prev_l = NULL;
98 int err;
99 zap_entry_handle_t zeh;
100 zap_t *zap = zn->zn_zap;
101- int expand_retries = 0;
102
103 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
104 ASSERT(!zap->zap_ismicro);
105@@ -855,29 +851,10 @@ retry:
106 if (err == 0) {
107 zap_increment_num_entries(zap, 1, tx);
108 } else if (err == EAGAIN) {
109- /*
110- * If the last two expansions did not help, there is no point
111- * trying to expand again
112- */
113- if (expand_retries > MAX_EXPAND_RETRIES && prev_l == l) {
114- err = SET_ERROR(ENOSPC);
115- goto out;
116- }
117-
118 err = zap_expand_leaf(zn, l, tag, tx, &l);
119 zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
120- if (err == 0) {
121- prev_l = l;
122- expand_retries++;
123+ if (err == 0)
124 goto retry;
125- } else if (err == ENOSPC) {
126- /*
127- * If we failed to expand the leaf, then bailout
128- * as there is no point trying
129- * zap_put_leaf_maybe_grow_ptrtbl().
130- */
131- return (err);
132- }
133 }
134
135 out:
136diff --git a/module/zfs/zap_leaf.c b/module/zfs/zap_leaf.c
137index 526e46606..c342695c7 100644
138--- a/module/zfs/zap_leaf.c
139+++ b/module/zfs/zap_leaf.c
140@@ -53,7 +53,7 @@ static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
141 ((h) >> \
142 (64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len)))
143
144-#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
145+#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
146
147 extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l);
148
149diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c
150index 34bef3e63..3ebf995c6 100644
151--- a/module/zfs/zap_micro.c
152+++ b/module/zfs/zap_micro.c
153@@ -363,41 +363,6 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash)
154 return (cd);
155 }
156
157-/*
158- * Each mzap entry requires at max : 4 chunks
159- * 3 chunks for names + 1 chunk for value.
160- */
161-#define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \
162- ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t)))
163-
164-/*
165- * Check if the current entry keeps the colliding entries under the fatzap leaf
166- * size.
167- */
168-static boolean_t
169-mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
170-{
171- zap_t *zap = zn->zn_zap;
172- mzap_ent_t mze_tofind;
173- mzap_ent_t *mze;
174- avl_index_t idx;
175- avl_tree_t *avl = &zap->zap_m.zap_avl;
176- uint32_t mzap_ents = 0;
177-
178- mze_tofind.mze_hash = hash;
179- mze_tofind.mze_cd = 0;
180-
181- for (mze = avl_find(avl, &mze_tofind, &idx);
182- mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
183- mzap_ents++;
184- }
185-
186- /* Include the new entry being added */
187- mzap_ents++;
188-
189- return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS));
190-}
191-
192 static void
193 mze_remove(zap_t *zap, mzap_ent_t *mze)
194 {
195@@ -1226,8 +1191,7 @@ zap_add_impl(zap_t *zap, const char *key,
196 err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
197 zap = zn->zn_zap; /* fzap_add() may change zap */
198 } else if (integer_size != 8 || num_integers != 1 ||
199- strlen(key) >= MZAP_NAME_LEN ||
200- !mze_canfit_fzap_leaf(zn, zn->zn_hash)) {
201+ strlen(key) >= MZAP_NAME_LEN) {
202 err = mzap_upgrade(&zn->zn_zap, tag, tx, 0);
203 if (err == 0) {
204 err = fzap_add(zn, integer_size, num_integers, val,
205diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
206index 6398a1d15..9a8bbccd9 100644
207--- a/module/zfs/zfs_dir.c
208+++ b/module/zfs/zfs_dir.c
209@@ -742,11 +742,7 @@ zfs_dirent(znode_t *zp, uint64_t mode)
210 }
211
212 /*
213- * Link zp into dl. Can fail in the following cases :
214- * - if zp has been unlinked.
215- * - if the number of entries with the same hash (aka. colliding entries)
216- * exceed the capacity of a leaf-block of fatzap and splitting of the
217- * leaf-block does not help.
218+ * Link zp into dl. Can only fail if zp has been unlinked.
219 */
220 int
221 zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
222@@ -780,24 +776,6 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
223 NULL, &links, sizeof (links));
224 }
225 }
226-
227- value = zfs_dirent(zp, zp->z_mode);
228- error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
229- &value, tx);
230-
231- /*
232- * zap_add could fail to add the entry if it exceeds the capacity of the
233- * leaf-block and zap_leaf_split() failed to help.
234- * The caller of this routine is responsible for failing the transaction
235- * which will rollback the SA updates done above.
236- */
237- if (error != 0) {
238- if (!(flag & ZRENAMING) && !(flag & ZNEW))
239- drop_nlink(ZTOI(zp));
240- mutex_exit(&zp->z_lock);
241- return (error);
242- }
243-
244 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
245 &dzp->z_id, sizeof (dzp->z_id));
246 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
247@@ -835,6 +813,11 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
248 ASSERT(error == 0);
249 mutex_exit(&dzp->z_lock);
250
251+ value = zfs_dirent(zp, zp->z_mode);
252+ error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
253+ 8, 1, &value, tx);
254+ ASSERT(error == 0);
255+
256 return (0);
257 }
258
259diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
260index 8a7ad702c..6f6ce79db 100644
261--- a/module/zfs/zfs_vnops.c
262+++ b/module/zfs/zfs_vnops.c
263@@ -1443,22 +1443,10 @@ top:
264 }
265 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
266
267- error = zfs_link_create(dl, zp, tx, ZNEW);
268- if (error != 0) {
269- /*
270- * Since, we failed to add the directory entry for it,
271- * delete the newly created dnode.
272- */
273- zfs_znode_delete(zp, tx);
274- remove_inode_hash(ZTOI(zp));
275- zfs_acl_ids_free(&acl_ids);
276- dmu_tx_commit(tx);
277- goto out;
278- }
279-
280 if (fuid_dirtied)
281 zfs_fuid_sync(zfsvfs, tx);
282
283+ (void) zfs_link_create(dl, zp, tx, ZNEW);
284 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
285 if (flag & FIGNORECASE)
286 txtype |= TX_CI;
287@@ -2049,18 +2037,13 @@ top:
288 */
289 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
290
291+ if (fuid_dirtied)
292+ zfs_fuid_sync(zfsvfs, tx);
293+
294 /*
295 * Now put new name in parent dir.
296 */
297- error = zfs_link_create(dl, zp, tx, ZNEW);
298- if (error != 0) {
299- zfs_znode_delete(zp, tx);
300- remove_inode_hash(ZTOI(zp));
301- goto out;
302- }
303-
304- if (fuid_dirtied)
305- zfs_fuid_sync(zfsvfs, tx);
306+ (void) zfs_link_create(dl, zp, tx, ZNEW);
307
308 *ipp = ZTOI(zp);
309
310@@ -2070,7 +2053,6 @@ top:
311 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
312 acl_ids.z_fuidp, vap);
313
314-out:
315 zfs_acl_ids_free(&acl_ids);
316
317 dmu_tx_commit(tx);
318@@ -2080,14 +2062,10 @@ out:
319 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
320 zil_commit(zilog, 0);
321
322- if (error != 0) {
323- iput(ZTOI(zp));
324- } else {
325- zfs_inode_update(dzp);
326- zfs_inode_update(zp);
327- }
328+ zfs_inode_update(dzp);
329+ zfs_inode_update(zp);
330 ZFS_EXIT(zfsvfs);
331- return (error);
332+ return (0);
333 }
334
335 /*
336@@ -3705,13 +3683,6 @@ top:
337 VERIFY3U(zfs_link_destroy(tdl, szp, tx,
338 ZRENAMING, NULL), ==, 0);
339 }
340- } else {
341- /*
342- * If we had removed the existing target, subsequent
343- * call to zfs_link_create() to add back the same entry
344- * but, the new dnode (szp) should not fail.
345- */
346- ASSERT(tzp == NULL);
347 }
348 }
349
350@@ -3882,18 +3853,14 @@ top:
351 /*
352 * Insert the new object into the directory.
353 */
354- error = zfs_link_create(dl, zp, tx, ZNEW);
355- if (error != 0) {
356- zfs_znode_delete(zp, tx);
357- remove_inode_hash(ZTOI(zp));
358- } else {
359- if (flags & FIGNORECASE)
360- txtype |= TX_CI;
361- zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
362+ (void) zfs_link_create(dl, zp, tx, ZNEW);
363
364- zfs_inode_update(dzp);
365- zfs_inode_update(zp);
366- }
367+ if (flags & FIGNORECASE)
368+ txtype |= TX_CI;
369+ zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
370+
371+ zfs_inode_update(dzp);
372+ zfs_inode_update(zp);
373
374 zfs_acl_ids_free(&acl_ids);
375
376@@ -3901,14 +3868,10 @@ top:
377
378 zfs_dirent_unlock(dl);
379
380- if (error == 0) {
381- *ipp = ZTOI(zp);
382+ *ipp = ZTOI(zp);
383
384- if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
385- zil_commit(zilog, 0);
386- } else {
387- iput(ZTOI(zp));
388- }
389+ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
390+ zil_commit(zilog, 0);
391
392 ZFS_EXIT(zfsvfs);
393 return (error);
394diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
395index 25ae3fe5e..4b2694202 100644
396--- a/tests/runfiles/linux.run
397+++ b/tests/runfiles/linux.run
398@@ -55,7 +55,7 @@ tags = ['functional', 'cachefile']
399 # 'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete',
400 # 'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete']
401 [tests/functional/casenorm]
402-tests = ['case_all_values', 'norm_all_values', 'mixed_create_failure']
403+tests = ['case_all_values', 'norm_all_values']
404 tags = ['functional', 'casenorm']
405
406 [tests/functional/chattr]
407diff --git a/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh b/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh
408deleted file mode 100755
409index 51b5bb3f6..000000000
410--- a/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh
411+++ /dev/null
412@@ -1,136 +0,0 @@
413-#!/bin/ksh -p
414-#
415-#
416-# This file and its contents are supplied under the terms of the
417-# Common Development and Distribution License ("CDDL"), version 1.0.
418-# You may only use this file in accordance with the terms of version
419-# 1.0 of the CDDL.
420-#
421-# A full copy of the text of the CDDL should have accompanied this
422-# source. A copy of the CDDL is also available via the Internet at
423-# http://www.illumos.org/license/CDDL.
424-#
425-#
426-# Copyright 2018 Nutanix Inc. All rights reserved.
427-#
428-
429-. $STF_SUITE/tests/functional/casenorm/casenorm.kshlib
430-
431-# DESCRIPTION:
432-# For the filesystem with casesensitivity=mixed, normalization=none,
433-# when multiple files with the same name (differing only in case) are created,
434-# the number of files is limited to what can fit in a fatzap leaf-block.
435-# And beyond that, it fails with ENOSPC.
436-#
437-# Ensure that the create/rename operations fail gracefully and not trigger an
438-# ASSERT.
439-#
440-# STRATEGY:
441-# Repeat the below steps for objects: files, directories, symlinks and hardlinks
442-# 1. Create objects with same name but varying in case.
443-# E.g. 'abcdefghijklmnop', 'Abcdefghijklmnop', 'ABcdefghijklmnop' etc.
444-# The create should fail with ENOSPC.
445-# 2. Create an object with name 'tmp_obj' and try to rename it to name that we
446-# failed to add in step 1 above.
447-# This should fail as well.
448-
449-verify_runnable "global"
450-
451-function cleanup
452-{
453- destroy_testfs
454-}
455-
456-log_onexit cleanup
457-log_assert "With mixed mode: ensure create fails with ENOSPC beyond a certain limit"
458-
459-create_testfs "-o casesensitivity=mixed -o normalization=none"
460-
461-# Different object types
462-obj_type=('file' 'dir' 'symlink' 'hardlink')
463-
464-# Commands to create different object types
465-typeset -A ops
466-ops['file']='touch'
467-ops['dir']='mkdir'
468-ops['symlink']='ln -s'
469-ops['hardlink']='ln'
470-
471-# This function tests the following for a give object type :
472-# - Create multiple objects with the same name (varying only in case).
473-# Ensure that it eventually fails once the leaf-block limit is exceeded.
474-# - Create another object with a different name. And attempt rename it to the
475-# name (for which the create had failed in the previous step).
476-# This should fail as well.
477-# Args :
478-# $1 - object type (file/dir/symlink/hardlink)
479-# $2 - test directory
480-#
481-function test_ops
482-{
483- typeset obj_type=$1
484- typeset testdir=$2
485-
486- target_obj='target-file'
487-
488- op="${ops[$obj_type]}"
489-
490- log_note "The op : $op"
491- log_note "testdir=$testdir obj_type=$obj_type"
492-
493- test_path="$testdir/$obj_type"
494- mkdir $test_path
495- log_note "Created test dir $test_path"
496-
497- if [[ $obj_type = "symlink" || $obj_type = "hardlink" ]]; then
498- touch $test_path/$target_obj
499- log_note "Created target: $test_path/$target_obj"
500- op="$op $test_path/$target_obj"
501- fi
502-
503- log_note "op : $op"
504- names='{a,A}{b,B}{c,C}{d,D}{e,E}{f,F}{g,G}{h,H}{i,I}{j,J}{k,K}{l,L}'
505- for name in $names; do
506- cmd="$op $test_path/$name"
507- out=$($cmd 2>&1)
508- ret=$?
509- log_note "cmd: $cmd ret: $ret out=$out"
510- if (($ret != 0)); then
511- if [[ $out = *@(No space left on device)* ]]; then
512- save_name="$test_path/$name"
513- break;
514- else
515- log_err "$cmd failed with unexpected error : $out"
516- fi
517- fi
518- done
519-
520- log_note 'Test rename \"sample_name\" rename'
521- TMP_OBJ="$test_path/tmp_obj"
522- cmd="$op $TMP_OBJ"
523- out=$($cmd 2>&1)
524- ret=$?
525- if (($ret != 0)); then
526- log_err "cmd:$cmd failed out:$out"
527- fi
528-
529- # Now, try to rename the tmp_obj to the name which we failed to add earlier.
530- # This should fail as well.
531- out=$(mv $TMP_OBJ $save_name 2>&1)
532- ret=$?
533- if (($ret != 0)); then
534- if [[ $out = *@(No space left on device)* ]]; then
535- log_note "$cmd failed as expected : $out"
536- else
537- log_err "$cmd failed with : $out"
538- fi
539- fi
540-}
541-
542-for obj_type in ${obj_type[*]};
543-do
544- log_note "Testing create of $obj_type"
545- test_ops $obj_type $TESTDIR
546-done
547-
548-log_pass "Mixed mode FS: Ops on large number of colliding names fail gracefully"
549--
5502.14.2
551