]>
Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* |
3e57ecf6 | 2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
7b718769 | 3 | * All Rights Reserved. |
1da177e4 | 4 | * |
7b718769 NS |
5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | |
1da177e4 LT |
7 | * published by the Free Software Foundation. |
8 | * | |
7b718769 NS |
9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
1da177e4 | 13 | * |
7b718769 NS |
14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
1da177e4 | 17 | */ |
1da177e4 | 18 | #include "xfs.h" |
a844f451 | 19 | #include "xfs_fs.h" |
1da177e4 | 20 | #include "xfs_types.h" |
a844f451 | 21 | #include "xfs_bit.h" |
1da177e4 | 22 | #include "xfs_log.h" |
a844f451 | 23 | #include "xfs_inum.h" |
1da177e4 LT |
24 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | |
a844f451 | 26 | #include "xfs_ag.h" |
1da177e4 | 27 | #include "xfs_mount.h" |
1da177e4 | 28 | #include "xfs_bmap_btree.h" |
1da177e4 | 29 | #include "xfs_dinode.h" |
1da177e4 | 30 | #include "xfs_inode.h" |
a844f451 | 31 | #include "xfs_inode_item.h" |
1da177e4 | 32 | #include "xfs_bmap.h" |
1da177e4 LT |
33 | #include "xfs_itable.h" |
34 | #include "xfs_dfrag.h" | |
35 | #include "xfs_error.h" | |
739bfb2a | 36 | #include "xfs_vnodeops.h" |
0b1b213f | 37 | #include "xfs_trace.h" |
1da177e4 | 38 | |
6bded0f3 DC |
39 | |
40 | static int xfs_swap_extents( | |
41 | xfs_inode_t *ip, /* target inode */ | |
42 | xfs_inode_t *tip, /* tmp inode */ | |
43 | xfs_swapext_t *sxp); | |
44 | ||
1da177e4 | 45 | /* |
6bded0f3 | 46 | * ioctl interface for swapext |
1da177e4 LT |
47 | */ |
48 | int | |
49 | xfs_swapext( | |
743bb465 | 50 | xfs_swapext_t *sxp) |
1da177e4 | 51 | { |
35fec8df | 52 | xfs_inode_t *ip, *tip; |
6bded0f3 | 53 | struct file *file, *tmp_file; |
1da177e4 | 54 | int error = 0; |
1da177e4 | 55 | |
1da177e4 | 56 | /* Pull information for the target fd */ |
35fec8df CH |
57 | file = fget((int)sxp->sx_fdtarget); |
58 | if (!file) { | |
1da177e4 | 59 | error = XFS_ERROR(EINVAL); |
ac12b4e2 | 60 | goto out; |
1da177e4 LT |
61 | } |
62 | ||
1817176a DR |
63 | if (!(file->f_mode & FMODE_WRITE) || |
64 | !(file->f_mode & FMODE_READ) || | |
65 | (file->f_flags & O_APPEND)) { | |
f6aa7f21 CH |
66 | error = XFS_ERROR(EBADF); |
67 | goto out_put_file; | |
68 | } | |
69 | ||
6bded0f3 DC |
70 | tmp_file = fget((int)sxp->sx_fdtmp); |
71 | if (!tmp_file) { | |
1da177e4 | 72 | error = XFS_ERROR(EINVAL); |
35fec8df | 73 | goto out_put_file; |
1da177e4 LT |
74 | } |
75 | ||
6bded0f3 | 76 | if (!(tmp_file->f_mode & FMODE_WRITE) || |
1817176a | 77 | !(tmp_file->f_mode & FMODE_READ) || |
6bded0f3 | 78 | (tmp_file->f_flags & O_APPEND)) { |
f6aa7f21 | 79 | error = XFS_ERROR(EBADF); |
6bded0f3 | 80 | goto out_put_tmp_file; |
f6aa7f21 CH |
81 | } |
82 | ||
7c8f7af6 | 83 | if (IS_SWAPFILE(file->f_path.dentry->d_inode) || |
6bded0f3 | 84 | IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { |
7c8f7af6 | 85 | error = XFS_ERROR(EINVAL); |
6bded0f3 | 86 | goto out_put_tmp_file; |
7c8f7af6 CH |
87 | } |
88 | ||
35fec8df | 89 | ip = XFS_I(file->f_path.dentry->d_inode); |
6bded0f3 | 90 | tip = XFS_I(tmp_file->f_path.dentry->d_inode); |
1da177e4 LT |
91 | |
92 | if (ip->i_mount != tip->i_mount) { | |
35fec8df | 93 | error = XFS_ERROR(EINVAL); |
6bded0f3 | 94 | goto out_put_tmp_file; |
1da177e4 LT |
95 | } |
96 | ||
97 | if (ip->i_ino == tip->i_ino) { | |
35fec8df | 98 | error = XFS_ERROR(EINVAL); |
6bded0f3 | 99 | goto out_put_tmp_file; |
1da177e4 LT |
100 | } |
101 | ||
35fec8df CH |
102 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
103 | error = XFS_ERROR(EIO); | |
6bded0f3 | 104 | goto out_put_tmp_file; |
1da177e4 LT |
105 | } |
106 | ||
541d7d3c | 107 | error = xfs_swap_extents(ip, tip, sxp); |
3e57ecf6 | 108 | |
6bded0f3 DC |
109 | out_put_tmp_file: |
110 | fput(tmp_file); | |
35fec8df CH |
111 | out_put_file: |
112 | fput(file); | |
35fec8df | 113 | out: |
3e57ecf6 OW |
114 | return error; |
115 | } | |
116 | ||
e09f9860 DC |
117 | /* |
118 | * We need to check that the format of the data fork in the temporary inode is | |
119 | * valid for the target inode before doing the swap. This is not a problem with | |
120 | * attr1 because of the fixed fork offset, but attr2 has a dynamically sized | |
121 | * data fork depending on the space the attribute fork is taking so we can get | |
122 | * invalid formats on the target inode. | |
123 | * | |
124 | * E.g. target has space for 7 extents in extent format, temp inode only has | |
125 | * space for 6. If we defragment down to 7 extents, then the tmp format is a | |
126 | * btree, but when swapped it needs to be in extent format. Hence we can't just | |
127 | * blindly swap data forks on attr2 filesystems. | |
128 | * | |
129 | * Note that we check the swap in both directions so that we don't end up with | |
130 | * a corrupt temporary inode, either. | |
131 | * | |
132 | * Note that fixing the way xfs_fsr sets up the attribute fork in the source | |
133 | * inode will prevent this situation from occurring, so all we do here is | |
134 | * reject and log the attempt. basically we are putting the responsibility on | |
135 | * userspace to get this right. | |
136 | */ | |
137 | static int | |
138 | xfs_swap_extents_check_format( | |
139 | xfs_inode_t *ip, /* target inode */ | |
140 | xfs_inode_t *tip) /* tmp inode */ | |
141 | { | |
142 | ||
143 | /* Should never get a local format */ | |
144 | if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || | |
145 | tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) | |
146 | return EINVAL; | |
147 | ||
148 | /* | |
149 | * if the target inode has less extents that then temporary inode then | |
150 | * why did userspace call us? | |
151 | */ | |
152 | if (ip->i_d.di_nextents < tip->i_d.di_nextents) | |
153 | return EINVAL; | |
154 | ||
155 | /* | |
156 | * if the target inode is in extent form and the temp inode is in btree | |
157 | * form then we will end up with the target inode in the wrong format | |
158 | * as we already know there are less extents in the temp inode. | |
159 | */ | |
160 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | |
161 | tip->i_d.di_format == XFS_DINODE_FMT_BTREE) | |
162 | return EINVAL; | |
163 | ||
164 | /* Check temp in extent form to max in target */ | |
165 | if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | |
166 | XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max) | |
167 | return EINVAL; | |
168 | ||
169 | /* Check target in extent form to max in temp */ | |
170 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | |
171 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) | |
172 | return EINVAL; | |
173 | ||
dd77ef92 DC |
174 | /* |
175 | * If we are in a btree format, check that the temp root block will fit | |
176 | * in the target and that it has enough extents to be in btree format | |
177 | * in the target. | |
178 | * | |
179 | * Note that we have to be careful to allow btree->extent conversions | |
180 | * (a common defrag case) which will occur when the temp inode is in | |
181 | * extent format... | |
182 | */ | |
e09f9860 | 183 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && |
dd77ef92 DC |
184 | ((XFS_IFORK_BOFF(ip) && |
185 | tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) || | |
186 | XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max)) | |
e09f9860 DC |
187 | return EINVAL; |
188 | ||
dd77ef92 | 189 | /* Reciprocal target->temp btree format checks */ |
e09f9860 | 190 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && |
dd77ef92 DC |
191 | ((XFS_IFORK_BOFF(tip) && |
192 | ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) || | |
193 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max)) | |
e09f9860 DC |
194 | return EINVAL; |
195 | ||
196 | return 0; | |
197 | } | |
198 | ||
6bded0f3 | 199 | static int |
3e57ecf6 | 200 | xfs_swap_extents( |
e09f9860 DC |
201 | xfs_inode_t *ip, /* target inode */ |
202 | xfs_inode_t *tip, /* tmp inode */ | |
3e57ecf6 OW |
203 | xfs_swapext_t *sxp) |
204 | { | |
205 | xfs_mount_t *mp; | |
3e57ecf6 OW |
206 | xfs_trans_t *tp; |
207 | xfs_bstat_t *sbp = &sxp->sx_stat; | |
3e57ecf6 OW |
208 | xfs_ifork_t *tempifp, *ifp, *tifp; |
209 | int ilf_fields, tilf_fields; | |
3e57ecf6 OW |
210 | int error = 0; |
211 | int aforkblks = 0; | |
212 | int taforkblks = 0; | |
213 | __uint64_t tmp; | |
3e57ecf6 OW |
214 | |
215 | mp = ip->i_mount; | |
216 | ||
217 | tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); | |
218 | if (!tempifp) { | |
219 | error = XFS_ERROR(ENOMEM); | |
ef8f7fc5 | 220 | goto out; |
3e57ecf6 OW |
221 | } |
222 | ||
223 | sbp = &sxp->sx_stat; | |
1da177e4 | 224 | |
f9114eba DC |
225 | /* |
226 | * we have to do two separate lock calls here to keep lockdep | |
227 | * happy. If we try to get all the locks in one call, lock will | |
228 | * report false positives when we drop the ILOCK and regain them | |
229 | * below. | |
230 | */ | |
231 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); | |
232 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | |
1da177e4 | 233 | |
1da177e4 LT |
234 | /* Verify that both files have the same format */ |
235 | if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { | |
236 | error = XFS_ERROR(EINVAL); | |
ef8f7fc5 | 237 | goto out_unlock; |
1da177e4 LT |
238 | } |
239 | ||
240 | /* Verify both files are either real-time or non-realtime */ | |
71ddabb9 | 241 | if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { |
1da177e4 | 242 | error = XFS_ERROR(EINVAL); |
ef8f7fc5 | 243 | goto out_unlock; |
1da177e4 LT |
244 | } |
245 | ||
df80c933 | 246 | if (VN_CACHED(VFS_I(tip)) != 0) { |
739bfb2a CH |
247 | error = xfs_flushinval_pages(tip, 0, -1, |
248 | FI_REMAPF_LOCKED); | |
d3cf2094 | 249 | if (error) |
ef8f7fc5 | 250 | goto out_unlock; |
bd5a876a | 251 | } |
1da177e4 LT |
252 | |
253 | /* Verify O_DIRECT for ftmp */ | |
df80c933 | 254 | if (VN_CACHED(VFS_I(tip)) != 0) { |
1da177e4 | 255 | error = XFS_ERROR(EINVAL); |
ef8f7fc5 | 256 | goto out_unlock; |
1da177e4 LT |
257 | } |
258 | ||
259 | /* Verify all data are being swapped */ | |
d0cfb373 ES |
260 | if (sxp->sx_offset != 0 || |
261 | sxp->sx_length != ip->i_d.di_size || | |
262 | sxp->sx_length != tip->i_d.di_size) { | |
1da177e4 | 263 | error = XFS_ERROR(EFAULT); |
ef8f7fc5 | 264 | goto out_unlock; |
1da177e4 LT |
265 | } |
266 | ||
3a85cd96 DC |
267 | trace_xfs_swap_extent_before(ip, 0); |
268 | trace_xfs_swap_extent_before(tip, 1); | |
269 | ||
e09f9860 DC |
270 | /* check inode formats now that data is flushed */ |
271 | error = xfs_swap_extents_check_format(ip, tip); | |
272 | if (error) { | |
273 | xfs_fs_cmn_err(CE_NOTE, mp, | |
274 | "%s: inode 0x%llx format is incompatible for exchanging.", | |
275 | __FILE__, ip->i_ino); | |
ef8f7fc5 | 276 | goto out_unlock; |
1da177e4 LT |
277 | } |
278 | ||
279 | /* | |
280 | * Compare the current change & modify times with that | |
281 | * passed in. If they differ, we abort this swap. | |
282 | * This is the mechanism used to ensure the calling | |
283 | * process that the file was not changed out from | |
284 | * under it. | |
285 | */ | |
f9581b14 CH |
286 | if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || |
287 | (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || | |
288 | (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || | |
289 | (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { | |
1da177e4 | 290 | error = XFS_ERROR(EBUSY); |
ef8f7fc5 | 291 | goto out_unlock; |
1da177e4 LT |
292 | } |
293 | ||
294 | /* We need to fail if the file is memory mapped. Once we have tossed | |
295 | * all existing pages, the page fault will have no option | |
296 | * but to go to the filesystem for pages. By making the page fault call | |
67fcaa73 | 297 | * vop_read (or write in the case of autogrow) they block on the iolock |
1da177e4 LT |
298 | * until we have switched the extents. |
299 | */ | |
df80c933 | 300 | if (VN_MAPPED(VFS_I(ip))) { |
1da177e4 | 301 | error = XFS_ERROR(EBUSY); |
ef8f7fc5 | 302 | goto out_unlock; |
1da177e4 LT |
303 | } |
304 | ||
305 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | |
306 | xfs_iunlock(tip, XFS_ILOCK_EXCL); | |
307 | ||
308 | /* | |
309 | * There is a race condition here since we gave up the | |
310 | * ilock. However, the data fork will not change since | |
311 | * we have the iolock (locked for truncation too) so we | |
312 | * are safe. We don't really care if non-io related | |
313 | * fields change. | |
314 | */ | |
315 | ||
739bfb2a | 316 | xfs_tosspages(ip, 0, -1, FI_REMAPF); |
1da177e4 LT |
317 | |
318 | tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); | |
319 | if ((error = xfs_trans_reserve(tp, 0, | |
320 | XFS_ICHANGE_LOG_RES(mp), 0, | |
321 | 0, 0))) { | |
322 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | |
323 | xfs_iunlock(tip, XFS_IOLOCK_EXCL); | |
324 | xfs_trans_cancel(tp, 0); | |
ef8f7fc5 | 325 | goto out; |
1da177e4 | 326 | } |
e1cccd91 | 327 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); |
1da177e4 LT |
328 | |
329 | /* | |
330 | * Count the number of extended attribute blocks | |
331 | */ | |
332 | if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && | |
333 | (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { | |
334 | error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); | |
ef8f7fc5 JJS |
335 | if (error) |
336 | goto out_trans_cancel; | |
1da177e4 LT |
337 | } |
338 | if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && | |
339 | (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { | |
340 | error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, | |
341 | &taforkblks); | |
ef8f7fc5 JJS |
342 | if (error) |
343 | goto out_trans_cancel; | |
1da177e4 LT |
344 | } |
345 | ||
346 | /* | |
347 | * Swap the data forks of the inodes | |
348 | */ | |
349 | ifp = &ip->i_df; | |
350 | tifp = &tip->i_df; | |
d0cfb373 ES |
351 | *tempifp = *ifp; /* struct copy */ |
352 | *ifp = *tifp; /* struct copy */ | |
353 | *tifp = *tempifp; /* struct copy */ | |
1da177e4 | 354 | |
e09f9860 DC |
355 | /* |
356 | * Fix the in-memory data fork values that are dependent on the fork | |
357 | * offset in the inode. We can't assume they remain the same as attr2 | |
358 | * has dynamic fork offsets. | |
359 | */ | |
360 | ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) / | |
361 | (uint)sizeof(xfs_bmbt_rec_t); | |
362 | tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) / | |
363 | (uint)sizeof(xfs_bmbt_rec_t); | |
364 | ||
1da177e4 LT |
365 | /* |
366 | * Fix the on-disk inode values | |
367 | */ | |
368 | tmp = (__uint64_t)ip->i_d.di_nblocks; | |
369 | ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; | |
370 | tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; | |
371 | ||
372 | tmp = (__uint64_t) ip->i_d.di_nextents; | |
373 | ip->i_d.di_nextents = tip->i_d.di_nextents; | |
374 | tip->i_d.di_nextents = tmp; | |
375 | ||
376 | tmp = (__uint64_t) ip->i_d.di_format; | |
377 | ip->i_d.di_format = tip->i_d.di_format; | |
378 | tip->i_d.di_format = tmp; | |
379 | ||
380 | ilf_fields = XFS_ILOG_CORE; | |
381 | ||
382 | switch(ip->i_d.di_format) { | |
383 | case XFS_DINODE_FMT_EXTENTS: | |
384 | /* If the extents fit in the inode, fix the | |
385 | * pointer. Otherwise it's already NULL or | |
386 | * pointing to the extent. | |
387 | */ | |
388 | if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { | |
389 | ifp->if_u1.if_extents = | |
390 | ifp->if_u2.if_inline_ext; | |
391 | } | |
392 | ilf_fields |= XFS_ILOG_DEXT; | |
393 | break; | |
394 | case XFS_DINODE_FMT_BTREE: | |
395 | ilf_fields |= XFS_ILOG_DBROOT; | |
396 | break; | |
397 | } | |
398 | ||
399 | tilf_fields = XFS_ILOG_CORE; | |
400 | ||
401 | switch(tip->i_d.di_format) { | |
402 | case XFS_DINODE_FMT_EXTENTS: | |
403 | /* If the extents fit in the inode, fix the | |
404 | * pointer. Otherwise it's already NULL or | |
405 | * pointing to the extent. | |
406 | */ | |
407 | if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { | |
408 | tifp->if_u1.if_extents = | |
409 | tifp->if_u2.if_inline_ext; | |
410 | } | |
411 | tilf_fields |= XFS_ILOG_DEXT; | |
412 | break; | |
413 | case XFS_DINODE_FMT_BTREE: | |
414 | tilf_fields |= XFS_ILOG_DBROOT; | |
415 | break; | |
416 | } | |
417 | ||
1da177e4 | 418 | |
898621d5 CH |
419 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
420 | xfs_trans_ijoin_ref(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | |
1da177e4 LT |
421 | |
422 | xfs_trans_log_inode(tp, ip, ilf_fields); | |
423 | xfs_trans_log_inode(tp, tip, tilf_fields); | |
424 | ||
425 | /* | |
426 | * If this is a synchronous mount, make sure that the | |
427 | * transaction goes to disk before returning to the user. | |
428 | */ | |
ef8f7fc5 | 429 | if (mp->m_flags & XFS_MOUNT_WSYNC) |
1da177e4 | 430 | xfs_trans_set_sync(tp); |
1da177e4 | 431 | |
1c72bf90 | 432 | error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); |
1da177e4 | 433 | |
3a85cd96 DC |
434 | trace_xfs_swap_extent_after(ip, 0); |
435 | trace_xfs_swap_extent_after(tip, 1); | |
ef8f7fc5 JJS |
436 | out: |
437 | kmem_free(tempifp); | |
1da177e4 | 438 | return error; |
ef8f7fc5 | 439 | |
1f23920d FB |
440 | out_unlock: |
441 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | |
442 | xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | |
443 | goto out; | |
444 | ||
ef8f7fc5 JJS |
445 | out_trans_cancel: |
446 | xfs_trans_cancel(tp, 0); | |
447 | goto out_unlock; | |
1da177e4 | 448 | } |