]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - fs/xfs/xfs_inode_item.c
xfs: refactor xfs_inode_item_size
[mirror_ubuntu-artful-kernel.git] / fs / xfs / xfs_inode_item.c
CommitLineData
1da177e4 1/*
7b718769
NS
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
1da177e4 4 *
7b718769
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
1da177e4
LT
7 * published by the Free Software Foundation.
8 *
7b718769
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
1da177e4 13 *
7b718769
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1da177e4 17 */
1da177e4 18#include "xfs.h"
a844f451 19#include "xfs_fs.h"
a4fbe6ab 20#include "xfs_format.h"
239880ef
DC
21#include "xfs_log_format.h"
22#include "xfs_trans_resv.h"
1da177e4 23#include "xfs_sb.h"
a844f451 24#include "xfs_ag.h"
1da177e4 25#include "xfs_mount.h"
1da177e4 26#include "xfs_inode.h"
239880ef 27#include "xfs_trans.h"
a844f451 28#include "xfs_inode_item.h"
db7a19f2 29#include "xfs_error.h"
0b1b213f 30#include "xfs_trace.h"
239880ef 31#include "xfs_trans_priv.h"
a4fbe6ab 32#include "xfs_dinode.h"
1da177e4
LT
33
34
35kmem_zone_t *xfs_ili_zone; /* inode log item zone */
36
7bfa31d8
CH
37static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
38{
39 return container_of(lip, struct xfs_inode_log_item, ili_item);
40}
41
166d1368 42STATIC void
ce9641d6
CH
43xfs_inode_item_data_fork_size(
44 struct xfs_inode_log_item *iip,
166d1368
DC
45 int *nvecs,
46 int *nbytes)
1da177e4 47{
7bfa31d8 48 struct xfs_inode *ip = iip->ili_inode;
166d1368 49
1da177e4
LT
50 switch (ip->i_d.di_format) {
51 case XFS_DINODE_FMT_EXTENTS:
f5d8d5c4 52 if ((iip->ili_fields & XFS_ILOG_DEXT) &&
339a5f5d 53 ip->i_d.di_nextents > 0 &&
166d1368
DC
54 ip->i_df.if_bytes > 0) {
55 /* worst case, doesn't subtract delalloc extents */
56 *nbytes += XFS_IFORK_DSIZE(ip);
57 *nvecs += 1;
58 }
1da177e4 59 break;
1da177e4 60 case XFS_DINODE_FMT_BTREE:
f5d8d5c4 61 if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
166d1368
DC
62 ip->i_df.if_broot_bytes > 0) {
63 *nbytes += ip->i_df.if_broot_bytes;
64 *nvecs += 1;
65 }
1da177e4 66 break;
1da177e4 67 case XFS_DINODE_FMT_LOCAL:
f5d8d5c4 68 if ((iip->ili_fields & XFS_ILOG_DDATA) &&
166d1368
DC
69 ip->i_df.if_bytes > 0) {
70 *nbytes += roundup(ip->i_df.if_bytes, 4);
71 *nvecs += 1;
72 }
1da177e4
LT
73 break;
74
75 case XFS_DINODE_FMT_DEV:
1da177e4 76 case XFS_DINODE_FMT_UUID:
1da177e4 77 break;
1da177e4
LT
78 default:
79 ASSERT(0);
80 break;
81 }
ce9641d6 82}
1da177e4 83
ce9641d6
CH
84STATIC void
85xfs_inode_item_attr_fork_size(
86 struct xfs_inode_log_item *iip,
87 int *nvecs,
88 int *nbytes)
89{
90 struct xfs_inode *ip = iip->ili_inode;
1da177e4 91
1da177e4
LT
92 switch (ip->i_d.di_aformat) {
93 case XFS_DINODE_FMT_EXTENTS:
f5d8d5c4 94 if ((iip->ili_fields & XFS_ILOG_AEXT) &&
339a5f5d 95 ip->i_d.di_anextents > 0 &&
166d1368
DC
96 ip->i_afp->if_bytes > 0) {
97 /* worst case, doesn't subtract unused space */
98 *nbytes += XFS_IFORK_ASIZE(ip);
99 *nvecs += 1;
100 }
1da177e4 101 break;
1da177e4 102 case XFS_DINODE_FMT_BTREE:
f5d8d5c4 103 if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
166d1368
DC
104 ip->i_afp->if_broot_bytes > 0) {
105 *nbytes += ip->i_afp->if_broot_bytes;
106 *nvecs += 1;
107 }
1da177e4 108 break;
1da177e4 109 case XFS_DINODE_FMT_LOCAL:
f5d8d5c4 110 if ((iip->ili_fields & XFS_ILOG_ADATA) &&
166d1368
DC
111 ip->i_afp->if_bytes > 0) {
112 *nbytes += roundup(ip->i_afp->if_bytes, 4);
113 *nvecs += 1;
114 }
1da177e4 115 break;
1da177e4
LT
116 default:
117 ASSERT(0);
118 break;
119 }
1da177e4
LT
120}
121
ce9641d6
CH
122/*
123 * This returns the number of iovecs needed to log the given inode item.
124 *
125 * We need one iovec for the inode log format structure, one for the
126 * inode core, and possibly one for the inode data/extents/b-tree root
127 * and one for the inode attribute data/extents/b-tree root.
128 */
129STATIC void
130xfs_inode_item_size(
131 struct xfs_log_item *lip,
132 int *nvecs,
133 int *nbytes)
134{
135 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
136 struct xfs_inode *ip = iip->ili_inode;
137
138 *nvecs += 2;
139 *nbytes += sizeof(struct xfs_inode_log_format) +
140 xfs_icdinode_size(ip->i_d.di_version);
141
142 xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
143 if (XFS_IFORK_Q(ip))
144 xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);
145}
146
e828776a
DC
147/*
148 * xfs_inode_item_format_extents - convert in-core extents to on-disk form
149 *
150 * For either the data or attr fork in extent format, we need to endian convert
151 * the in-core extent as we place them into the on-disk inode. In this case, we
152 * need to do this conversion before we write the extents into the log. Because
153 * we don't have the disk inode to write into here, we allocate a buffer and
154 * format the extents into it via xfs_iextents_copy(). We free the buffer in
155 * the unlock routine after the copy for the log has been made.
156 *
157 * In the case of the data fork, the in-core and on-disk fork sizes can be
158 * different due to delayed allocation extents. We only log on-disk extents
159 * here, so always use the physical fork size to determine the size of the
160 * buffer we need to allocate.
161 */
162STATIC void
163xfs_inode_item_format_extents(
164 struct xfs_inode *ip,
165 struct xfs_log_iovec *vecp,
166 int whichfork,
167 int type)
168{
169 xfs_bmbt_rec_t *ext_buffer;
170
171 ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP);
172 if (whichfork == XFS_DATA_FORK)
173 ip->i_itemp->ili_extents_buf = ext_buffer;
174 else
175 ip->i_itemp->ili_aextents_buf = ext_buffer;
176
177 vecp->i_addr = ext_buffer;
178 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork);
179 vecp->i_type = type;
180}
181
1da177e4
LT
182/*
183 * This is called to fill in the vector of log iovecs for the
184 * given inode log item. It fills the first item with an inode
185 * log format structure, the second with the on-disk inode structure,
186 * and a possible third and/or fourth with the inode data/extents/b-tree
187 * root and inode attributes data/extents/b-tree root.
188 */
189STATIC void
190xfs_inode_item_format(
7bfa31d8
CH
191 struct xfs_log_item *lip,
192 struct xfs_log_iovec *vecp)
1da177e4 193{
7bfa31d8
CH
194 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
195 struct xfs_inode *ip = iip->ili_inode;
1da177e4 196 uint nvecs;
1da177e4 197 size_t data_bytes;
1da177e4
LT
198 xfs_mount_t *mp;
199
4e0d5f92 200 vecp->i_addr = &iip->ili_format;
1da177e4 201 vecp->i_len = sizeof(xfs_inode_log_format_t);
4139b3b3 202 vecp->i_type = XLOG_REG_TYPE_IFORMAT;
1da177e4
LT
203 vecp++;
204 nvecs = 1;
205
4e0d5f92 206 vecp->i_addr = &ip->i_d;
93848a99 207 vecp->i_len = xfs_icdinode_size(ip->i_d.di_version);
4139b3b3 208 vecp->i_type = XLOG_REG_TYPE_ICORE;
1da177e4
LT
209 vecp++;
210 nvecs++;
1da177e4
LT
211
212 /*
213 * If this is really an old format inode, then we need to
214 * log it as such. This means that we have to copy the link
215 * count from the new field to the old. We don't have to worry
216 * about the new fields, because nothing trusts them as long as
217 * the old inode version number is there. If the superblock already
218 * has a new version number, then we don't bother converting back.
219 */
220 mp = ip->i_mount;
51ce16d5
CH
221 ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
222 if (ip->i_d.di_version == 1) {
62118709 223 if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
1da177e4
LT
224 /*
225 * Convert it back.
226 */
227 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
228 ip->i_d.di_onlink = ip->i_d.di_nlink;
229 } else {
230 /*
231 * The superblock version has already been bumped,
232 * so just make the conversion to the new inode
233 * format permanent.
234 */
51ce16d5 235 ip->i_d.di_version = 2;
1da177e4
LT
236 ip->i_d.di_onlink = 0;
237 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
238 }
239 }
240
241 switch (ip->i_d.di_format) {
242 case XFS_DINODE_FMT_EXTENTS:
f5d8d5c4 243 iip->ili_fields &=
339a5f5d
CH
244 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
245 XFS_ILOG_DEV | XFS_ILOG_UUID);
246
f5d8d5c4 247 if ((iip->ili_fields & XFS_ILOG_DEXT) &&
339a5f5d
CH
248 ip->i_d.di_nextents > 0 &&
249 ip->i_df.if_bytes > 0) {
1da177e4 250 ASSERT(ip->i_df.if_u1.if_extents != NULL);
339a5f5d 251 ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
1da177e4 252 ASSERT(iip->ili_extents_buf == NULL);
339a5f5d 253
f016bad6 254#ifdef XFS_NATIVE_HOST
696123fc
DC
255 if (ip->i_d.di_nextents == ip->i_df.if_bytes /
256 (uint)sizeof(xfs_bmbt_rec_t)) {
1da177e4
LT
257 /*
258 * There are no delayed allocation
259 * extents, so just point to the
260 * real extents array.
261 */
4e0d5f92 262 vecp->i_addr = ip->i_df.if_u1.if_extents;
1da177e4 263 vecp->i_len = ip->i_df.if_bytes;
4139b3b3 264 vecp->i_type = XLOG_REG_TYPE_IEXT;
1da177e4
LT
265 } else
266#endif
267 {
e828776a
DC
268 xfs_inode_item_format_extents(ip, vecp,
269 XFS_DATA_FORK, XLOG_REG_TYPE_IEXT);
1da177e4
LT
270 }
271 ASSERT(vecp->i_len <= ip->i_df.if_bytes);
272 iip->ili_format.ilf_dsize = vecp->i_len;
273 vecp++;
274 nvecs++;
339a5f5d 275 } else {
f5d8d5c4 276 iip->ili_fields &= ~XFS_ILOG_DEXT;
1da177e4
LT
277 }
278 break;
279
280 case XFS_DINODE_FMT_BTREE:
f5d8d5c4 281 iip->ili_fields &=
339a5f5d
CH
282 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
283 XFS_ILOG_DEV | XFS_ILOG_UUID);
284
f5d8d5c4 285 if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
339a5f5d 286 ip->i_df.if_broot_bytes > 0) {
1da177e4 287 ASSERT(ip->i_df.if_broot != NULL);
4e0d5f92 288 vecp->i_addr = ip->i_df.if_broot;
1da177e4 289 vecp->i_len = ip->i_df.if_broot_bytes;
4139b3b3 290 vecp->i_type = XLOG_REG_TYPE_IBROOT;
1da177e4
LT
291 vecp++;
292 nvecs++;
293 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
339a5f5d 294 } else {
f5d8d5c4 295 ASSERT(!(iip->ili_fields &
339a5f5d 296 XFS_ILOG_DBROOT));
f5d8d5c4 297 iip->ili_fields &= ~XFS_ILOG_DBROOT;
1da177e4
LT
298 }
299 break;
300
301 case XFS_DINODE_FMT_LOCAL:
f5d8d5c4 302 iip->ili_fields &=
339a5f5d
CH
303 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
304 XFS_ILOG_DEV | XFS_ILOG_UUID);
f5d8d5c4 305 if ((iip->ili_fields & XFS_ILOG_DDATA) &&
339a5f5d 306 ip->i_df.if_bytes > 0) {
1da177e4
LT
307 ASSERT(ip->i_df.if_u1.if_data != NULL);
308 ASSERT(ip->i_d.di_size > 0);
309
4e0d5f92 310 vecp->i_addr = ip->i_df.if_u1.if_data;
1da177e4
LT
311 /*
312 * Round i_bytes up to a word boundary.
313 * The underlying memory is guaranteed to
314 * to be there by xfs_idata_realloc().
315 */
316 data_bytes = roundup(ip->i_df.if_bytes, 4);
317 ASSERT((ip->i_df.if_real_bytes == 0) ||
318 (ip->i_df.if_real_bytes == data_bytes));
319 vecp->i_len = (int)data_bytes;
4139b3b3 320 vecp->i_type = XLOG_REG_TYPE_ILOCAL;
1da177e4
LT
321 vecp++;
322 nvecs++;
323 iip->ili_format.ilf_dsize = (unsigned)data_bytes;
339a5f5d 324 } else {
f5d8d5c4 325 iip->ili_fields &= ~XFS_ILOG_DDATA;
1da177e4
LT
326 }
327 break;
328
329 case XFS_DINODE_FMT_DEV:
f5d8d5c4 330 iip->ili_fields &=
339a5f5d
CH
331 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
332 XFS_ILOG_DEXT | XFS_ILOG_UUID);
f5d8d5c4 333 if (iip->ili_fields & XFS_ILOG_DEV) {
1da177e4
LT
334 iip->ili_format.ilf_u.ilfu_rdev =
335 ip->i_df.if_u2.if_rdev;
336 }
337 break;
338
339 case XFS_DINODE_FMT_UUID:
f5d8d5c4 340 iip->ili_fields &=
339a5f5d
CH
341 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
342 XFS_ILOG_DEXT | XFS_ILOG_DEV);
f5d8d5c4 343 if (iip->ili_fields & XFS_ILOG_UUID) {
1da177e4
LT
344 iip->ili_format.ilf_u.ilfu_uuid =
345 ip->i_df.if_u2.if_uuid;
346 }
347 break;
348
349 default:
350 ASSERT(0);
351 break;
352 }
353
354 /*
339a5f5d 355 * If there are no attributes associated with the file, then we're done.
1da177e4
LT
356 */
357 if (!XFS_IFORK_Q(ip)) {
f5d8d5c4 358 iip->ili_fields &=
339a5f5d 359 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
f5d8d5c4 360 goto out;
1da177e4
LT
361 }
362
363 switch (ip->i_d.di_aformat) {
364 case XFS_DINODE_FMT_EXTENTS:
f5d8d5c4 365 iip->ili_fields &=
339a5f5d
CH
366 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
367
f5d8d5c4 368 if ((iip->ili_fields & XFS_ILOG_AEXT) &&
339a5f5d
CH
369 ip->i_d.di_anextents > 0 &&
370 ip->i_afp->if_bytes > 0) {
371 ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
372 ip->i_d.di_anextents);
73523a2e 373 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
f016bad6 374#ifdef XFS_NATIVE_HOST
1da177e4
LT
375 /*
376 * There are not delayed allocation extents
377 * for attributes, so just point at the array.
378 */
4e0d5f92 379 vecp->i_addr = ip->i_afp->if_u1.if_extents;
1da177e4 380 vecp->i_len = ip->i_afp->if_bytes;
e828776a 381 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
1da177e4
LT
382#else
383 ASSERT(iip->ili_aextents_buf == NULL);
e828776a
DC
384 xfs_inode_item_format_extents(ip, vecp,
385 XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT);
1da177e4
LT
386#endif
387 iip->ili_format.ilf_asize = vecp->i_len;
388 vecp++;
389 nvecs++;
339a5f5d 390 } else {
f5d8d5c4 391 iip->ili_fields &= ~XFS_ILOG_AEXT;
1da177e4
LT
392 }
393 break;
394
395 case XFS_DINODE_FMT_BTREE:
f5d8d5c4 396 iip->ili_fields &=
339a5f5d
CH
397 ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
398
f5d8d5c4 399 if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
339a5f5d 400 ip->i_afp->if_broot_bytes > 0) {
1da177e4 401 ASSERT(ip->i_afp->if_broot != NULL);
339a5f5d 402
4e0d5f92 403 vecp->i_addr = ip->i_afp->if_broot;
1da177e4 404 vecp->i_len = ip->i_afp->if_broot_bytes;
4139b3b3 405 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
1da177e4
LT
406 vecp++;
407 nvecs++;
408 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
339a5f5d 409 } else {
f5d8d5c4 410 iip->ili_fields &= ~XFS_ILOG_ABROOT;
1da177e4
LT
411 }
412 break;
413
414 case XFS_DINODE_FMT_LOCAL:
f5d8d5c4 415 iip->ili_fields &=
339a5f5d
CH
416 ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
417
f5d8d5c4 418 if ((iip->ili_fields & XFS_ILOG_ADATA) &&
339a5f5d 419 ip->i_afp->if_bytes > 0) {
1da177e4
LT
420 ASSERT(ip->i_afp->if_u1.if_data != NULL);
421
4e0d5f92 422 vecp->i_addr = ip->i_afp->if_u1.if_data;
1da177e4
LT
423 /*
424 * Round i_bytes up to a word boundary.
425 * The underlying memory is guaranteed to
426 * to be there by xfs_idata_realloc().
427 */
428 data_bytes = roundup(ip->i_afp->if_bytes, 4);
429 ASSERT((ip->i_afp->if_real_bytes == 0) ||
430 (ip->i_afp->if_real_bytes == data_bytes));
431 vecp->i_len = (int)data_bytes;
4139b3b3 432 vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL;
1da177e4
LT
433 vecp++;
434 nvecs++;
435 iip->ili_format.ilf_asize = (unsigned)data_bytes;
339a5f5d 436 } else {
f5d8d5c4 437 iip->ili_fields &= ~XFS_ILOG_ADATA;
1da177e4
LT
438 }
439 break;
440
441 default:
442 ASSERT(0);
443 break;
444 }
445
f5d8d5c4
CH
446out:
447 /*
448 * Now update the log format that goes out to disk from the in-core
449 * values. We always write the inode core to make the arithmetic
450 * games in recovery easier, which isn't a big deal as just about any
451 * transaction would dirty it anyway.
452 */
8f639dde
CH
453 iip->ili_format.ilf_fields = XFS_ILOG_CORE |
454 (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
1da177e4
LT
455 iip->ili_format.ilf_size = nvecs;
456}
457
458
459/*
460 * This is called to pin the inode associated with the inode log
a14a5ab5 461 * item in memory so it cannot be written out.
1da177e4
LT
462 */
463STATIC void
464xfs_inode_item_pin(
7bfa31d8 465 struct xfs_log_item *lip)
1da177e4 466{
7bfa31d8 467 struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
a14a5ab5 468
7bfa31d8
CH
469 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
470
471 trace_xfs_inode_pin(ip, _RET_IP_);
472 atomic_inc(&ip->i_pincount);
1da177e4
LT
473}
474
475
476/*
477 * This is called to unpin the inode associated with the inode log
478 * item which was previously pinned with a call to xfs_inode_item_pin().
a14a5ab5
CH
479 *
480 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
1da177e4 481 */
1da177e4
LT
482STATIC void
483xfs_inode_item_unpin(
7bfa31d8 484 struct xfs_log_item *lip,
9412e318 485 int remove)
1da177e4 486{
7bfa31d8 487 struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
a14a5ab5 488
4aaf15d1 489 trace_xfs_inode_unpin(ip, _RET_IP_);
a14a5ab5
CH
490 ASSERT(atomic_read(&ip->i_pincount) > 0);
491 if (atomic_dec_and_test(&ip->i_pincount))
f392e631 492 wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
1da177e4
LT
493}
494
1da177e4 495STATIC uint
43ff2122
CH
496xfs_inode_item_push(
497 struct xfs_log_item *lip,
498 struct list_head *buffer_list)
1da177e4 499{
7bfa31d8
CH
500 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
501 struct xfs_inode *ip = iip->ili_inode;
43ff2122
CH
502 struct xfs_buf *bp = NULL;
503 uint rval = XFS_ITEM_SUCCESS;
504 int error;
1da177e4 505
7bfa31d8 506 if (xfs_ipincount(ip) > 0)
1da177e4 507 return XFS_ITEM_PINNED;
1da177e4 508
7bfa31d8 509 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1da177e4 510 return XFS_ITEM_LOCKED;
1da177e4 511
4c46819a
CH
512 /*
513 * Re-check the pincount now that we stabilized the value by
514 * taking the ilock.
515 */
516 if (xfs_ipincount(ip) > 0) {
43ff2122
CH
517 rval = XFS_ITEM_PINNED;
518 goto out_unlock;
4c46819a
CH
519 }
520
9a3a5dab
BF
521 /*
522 * Stale inode items should force out the iclog.
523 */
524 if (ip->i_flags & XFS_ISTALE) {
525 rval = XFS_ITEM_PINNED;
526 goto out_unlock;
527 }
528
43ff2122
CH
529 /*
530 * Someone else is already flushing the inode. Nothing we can do
531 * here but wait for the flush to finish and remove the item from
532 * the AIL.
533 */
1da177e4 534 if (!xfs_iflock_nowait(ip)) {
43ff2122
CH
535 rval = XFS_ITEM_FLUSHING;
536 goto out_unlock;
1da177e4
LT
537 }
538
43ff2122
CH
539 ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
540 ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
541
542 spin_unlock(&lip->li_ailp->xa_lock);
543
544 error = xfs_iflush(ip, &bp);
545 if (!error) {
546 if (!xfs_buf_delwri_queue(bp, buffer_list))
547 rval = XFS_ITEM_FLUSHING;
548 xfs_buf_relse(bp);
1da177e4 549 }
43ff2122
CH
550
551 spin_lock(&lip->li_ailp->xa_lock);
552out_unlock:
553 xfs_iunlock(ip, XFS_ILOCK_SHARED);
554 return rval;
1da177e4
LT
555}
556
557/*
558 * Unlock the inode associated with the inode log item.
559 * Clear the fields of the inode and inode log item that
560 * are specific to the current transaction. If the
561 * hold flags is set, do not unlock the inode.
562 */
563STATIC void
564xfs_inode_item_unlock(
7bfa31d8 565 struct xfs_log_item *lip)
1da177e4 566{
7bfa31d8
CH
567 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
568 struct xfs_inode *ip = iip->ili_inode;
898621d5 569 unsigned short lock_flags;
1da177e4 570
f3ca8738
CH
571 ASSERT(ip->i_itemp != NULL);
572 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1da177e4
LT
573
574 /*
575 * If the inode needed a separate buffer with which to log
576 * its extents, then free it now.
577 */
578 if (iip->ili_extents_buf != NULL) {
579 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
580 ASSERT(ip->i_d.di_nextents > 0);
f5d8d5c4 581 ASSERT(iip->ili_fields & XFS_ILOG_DEXT);
1da177e4 582 ASSERT(ip->i_df.if_bytes > 0);
f0e2d93c 583 kmem_free(iip->ili_extents_buf);
1da177e4
LT
584 iip->ili_extents_buf = NULL;
585 }
586 if (iip->ili_aextents_buf != NULL) {
587 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
588 ASSERT(ip->i_d.di_anextents > 0);
f5d8d5c4 589 ASSERT(iip->ili_fields & XFS_ILOG_AEXT);
1da177e4 590 ASSERT(ip->i_afp->if_bytes > 0);
f0e2d93c 591 kmem_free(iip->ili_aextents_buf);
1da177e4
LT
592 iip->ili_aextents_buf = NULL;
593 }
594
898621d5
CH
595 lock_flags = iip->ili_lock_flags;
596 iip->ili_lock_flags = 0;
ddc3415a 597 if (lock_flags)
f3ca8738 598 xfs_iunlock(ip, lock_flags);
1da177e4
LT
599}
600
601/*
de25c181
DC
602 * This is called to find out where the oldest active copy of the inode log
603 * item in the on disk log resides now that the last log write of it completed
604 * at the given lsn. Since we always re-log all dirty data in an inode, the
605 * latest copy in the on disk log is the only one that matters. Therefore,
606 * simply return the given lsn.
607 *
608 * If the inode has been marked stale because the cluster is being freed, we
609 * don't want to (re-)insert this inode into the AIL. There is a race condition
610 * where the cluster buffer may be unpinned before the inode is inserted into
611 * the AIL during transaction committed processing. If the buffer is unpinned
612 * before the inode item has been committed and inserted, then it is possible
1316d4da 613 * for the buffer to be written and IO completes before the inode is inserted
de25c181
DC
614 * into the AIL. In that case, we'd be inserting a clean, stale inode into the
615 * AIL which will never get removed. It will, however, get reclaimed which
616 * triggers an assert in xfs_inode_free() complaining about freein an inode
617 * still in the AIL.
618 *
1316d4da
DC
619 * To avoid this, just unpin the inode directly and return a LSN of -1 so the
620 * transaction committed code knows that it does not need to do any further
621 * processing on the item.
1da177e4 622 */
1da177e4
LT
623STATIC xfs_lsn_t
624xfs_inode_item_committed(
7bfa31d8 625 struct xfs_log_item *lip,
1da177e4
LT
626 xfs_lsn_t lsn)
627{
de25c181
DC
628 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
629 struct xfs_inode *ip = iip->ili_inode;
630
1316d4da
DC
631 if (xfs_iflags_test(ip, XFS_ISTALE)) {
632 xfs_inode_item_unpin(lip, 0);
633 return -1;
634 }
7bfa31d8 635 return lsn;
1da177e4
LT
636}
637
1da177e4
LT
638/*
639 * XXX rcc - this one really has to do something. Probably needs
640 * to stamp in a new field in the incore inode.
641 */
1da177e4
LT
642STATIC void
643xfs_inode_item_committing(
7bfa31d8 644 struct xfs_log_item *lip,
1da177e4
LT
645 xfs_lsn_t lsn)
646{
7bfa31d8 647 INODE_ITEM(lip)->ili_last_lsn = lsn;
1da177e4
LT
648}
649
650/*
651 * This is the ops vector shared by all buf log items.
652 */
272e42b2 653static const struct xfs_item_ops xfs_inode_item_ops = {
7bfa31d8
CH
654 .iop_size = xfs_inode_item_size,
655 .iop_format = xfs_inode_item_format,
656 .iop_pin = xfs_inode_item_pin,
657 .iop_unpin = xfs_inode_item_unpin,
7bfa31d8
CH
658 .iop_unlock = xfs_inode_item_unlock,
659 .iop_committed = xfs_inode_item_committed,
660 .iop_push = xfs_inode_item_push,
7bfa31d8 661 .iop_committing = xfs_inode_item_committing
1da177e4
LT
662};
663
664
665/*
666 * Initialize the inode log item for a newly allocated (in-core) inode.
667 */
668void
669xfs_inode_item_init(
7bfa31d8
CH
670 struct xfs_inode *ip,
671 struct xfs_mount *mp)
1da177e4 672{
7bfa31d8 673 struct xfs_inode_log_item *iip;
1da177e4
LT
674
675 ASSERT(ip->i_itemp == NULL);
676 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
677
1da177e4 678 iip->ili_inode = ip;
43f5efc5
DC
679 xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
680 &xfs_inode_item_ops);
1da177e4
LT
681 iip->ili_format.ilf_type = XFS_LI_INODE;
682 iip->ili_format.ilf_ino = ip->i_ino;
92bfc6e7
CH
683 iip->ili_format.ilf_blkno = ip->i_imap.im_blkno;
684 iip->ili_format.ilf_len = ip->i_imap.im_len;
685 iip->ili_format.ilf_boffset = ip->i_imap.im_boffset;
1da177e4
LT
686}
687
688/*
689 * Free the inode log item and any memory hanging off of it.
690 */
691void
692xfs_inode_item_destroy(
693 xfs_inode_t *ip)
694{
1da177e4
LT
695 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
696}
697
698
699/*
700 * This is the inode flushing I/O completion routine. It is called
701 * from interrupt level when the buffer containing the inode is
702 * flushed to disk. It is responsible for removing the inode item
703 * from the AIL if it has not been re-logged, and unlocking the inode's
704 * flush lock.
30136832
DC
705 *
706 * To reduce AIL lock traffic as much as possible, we scan the buffer log item
707 * list for other inodes that will run this function. We remove them from the
708 * buffer list so we can process all the inode IO completions in one AIL lock
709 * traversal.
1da177e4 710 */
1da177e4
LT
711void
712xfs_iflush_done(
ca30b2a7
CH
713 struct xfs_buf *bp,
714 struct xfs_log_item *lip)
1da177e4 715{
30136832
DC
716 struct xfs_inode_log_item *iip;
717 struct xfs_log_item *blip;
718 struct xfs_log_item *next;
719 struct xfs_log_item *prev;
ca30b2a7 720 struct xfs_ail *ailp = lip->li_ailp;
30136832
DC
721 int need_ail = 0;
722
723 /*
724 * Scan the buffer IO completions for other inodes being completed and
725 * attach them to the current inode log item.
726 */
adadbeef 727 blip = bp->b_fspriv;
30136832
DC
728 prev = NULL;
729 while (blip != NULL) {
730 if (lip->li_cb != xfs_iflush_done) {
731 prev = blip;
732 blip = blip->li_bio_list;
733 continue;
734 }
735
736 /* remove from list */
737 next = blip->li_bio_list;
738 if (!prev) {
adadbeef 739 bp->b_fspriv = next;
30136832
DC
740 } else {
741 prev->li_bio_list = next;
742 }
743
744 /* add to current list */
745 blip->li_bio_list = lip->li_bio_list;
746 lip->li_bio_list = blip;
747
748 /*
749 * while we have the item, do the unlocked check for needing
750 * the AIL lock.
751 */
752 iip = INODE_ITEM(blip);
753 if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
754 need_ail++;
755
756 blip = next;
757 }
758
759 /* make sure we capture the state of the initial inode. */
760 iip = INODE_ITEM(lip);
761 if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
762 need_ail++;
1da177e4
LT
763
764 /*
765 * We only want to pull the item from the AIL if it is
766 * actually there and its location in the log has not
767 * changed since we started the flush. Thus, we only bother
768 * if the ili_logged flag is set and the inode's lsn has not
769 * changed. First we check the lsn outside
770 * the lock since it's cheaper, and then we recheck while
771 * holding the lock before removing the inode from the AIL.
772 */
30136832
DC
773 if (need_ail) {
774 struct xfs_log_item *log_items[need_ail];
775 int i = 0;
783a2f65 776 spin_lock(&ailp->xa_lock);
30136832
DC
777 for (blip = lip; blip; blip = blip->li_bio_list) {
778 iip = INODE_ITEM(blip);
779 if (iip->ili_logged &&
780 blip->li_lsn == iip->ili_flush_lsn) {
781 log_items[i++] = blip;
782 }
783 ASSERT(i <= need_ail);
1da177e4 784 }
30136832 785 /* xfs_trans_ail_delete_bulk() drops the AIL lock. */
04913fdd
DC
786 xfs_trans_ail_delete_bulk(ailp, log_items, i,
787 SHUTDOWN_CORRUPT_INCORE);
1da177e4
LT
788 }
789
1da177e4
LT
790
791 /*
30136832
DC
792 * clean up and unlock the flush lock now we are done. We can clear the
793 * ili_last_fields bits now that we know that the data corresponding to
794 * them is safely on disk.
1da177e4 795 */
30136832
DC
796 for (blip = lip; blip; blip = next) {
797 next = blip->li_bio_list;
798 blip->li_bio_list = NULL;
799
800 iip = INODE_ITEM(blip);
801 iip->ili_logged = 0;
802 iip->ili_last_fields = 0;
803 xfs_ifunlock(iip->ili_inode);
804 }
1da177e4
LT
805}
806
807/*
04913fdd
DC
808 * This is the inode flushing abort routine. It is called from xfs_iflush when
809 * the filesystem is shutting down to clean up the inode state. It is
810 * responsible for removing the inode item from the AIL if it has not been
811 * re-logged, and unlocking the inode's flush lock.
1da177e4
LT
812 */
813void
814xfs_iflush_abort(
04913fdd
DC
815 xfs_inode_t *ip,
816 bool stale)
1da177e4 817{
783a2f65 818 xfs_inode_log_item_t *iip = ip->i_itemp;
1da177e4 819
1da177e4 820 if (iip) {
783a2f65 821 struct xfs_ail *ailp = iip->ili_item.li_ailp;
1da177e4 822 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
783a2f65 823 spin_lock(&ailp->xa_lock);
1da177e4 824 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
783a2f65 825 /* xfs_trans_ail_delete() drops the AIL lock. */
04913fdd
DC
826 xfs_trans_ail_delete(ailp, &iip->ili_item,
827 stale ?
828 SHUTDOWN_LOG_IO_ERROR :
829 SHUTDOWN_CORRUPT_INCORE);
1da177e4 830 } else
783a2f65 831 spin_unlock(&ailp->xa_lock);
1da177e4
LT
832 }
833 iip->ili_logged = 0;
834 /*
835 * Clear the ili_last_fields bits now that we know that the
836 * data corresponding to them is safely on disk.
837 */
838 iip->ili_last_fields = 0;
839 /*
840 * Clear the inode logging fields so no more flushes are
841 * attempted.
842 */
f5d8d5c4 843 iip->ili_fields = 0;
1da177e4
LT
844 }
845 /*
846 * Release the inode's flush lock since we're done with it.
847 */
848 xfs_ifunlock(ip);
849}
850
851void
852xfs_istale_done(
ca30b2a7
CH
853 struct xfs_buf *bp,
854 struct xfs_log_item *lip)
1da177e4 855{
04913fdd 856 xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
1da177e4 857}
6d192a9b
TS
858
859/*
860 * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
861 * (which can have different field alignments) to the native version
862 */
863int
864xfs_inode_item_format_convert(
865 xfs_log_iovec_t *buf,
866 xfs_inode_log_format_t *in_f)
867{
868 if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
4e0d5f92 869 xfs_inode_log_format_32_t *in_f32 = buf->i_addr;
6d192a9b 870
6d192a9b
TS
871 in_f->ilf_type = in_f32->ilf_type;
872 in_f->ilf_size = in_f32->ilf_size;
873 in_f->ilf_fields = in_f32->ilf_fields;
874 in_f->ilf_asize = in_f32->ilf_asize;
875 in_f->ilf_dsize = in_f32->ilf_dsize;
876 in_f->ilf_ino = in_f32->ilf_ino;
877 /* copy biggest field of ilf_u */
878 memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
879 in_f32->ilf_u.ilfu_uuid.__u_bits,
880 sizeof(uuid_t));
881 in_f->ilf_blkno = in_f32->ilf_blkno;
882 in_f->ilf_len = in_f32->ilf_len;
883 in_f->ilf_boffset = in_f32->ilf_boffset;
884 return 0;
885 } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
4e0d5f92 886 xfs_inode_log_format_64_t *in_f64 = buf->i_addr;
6d192a9b 887
6d192a9b
TS
888 in_f->ilf_type = in_f64->ilf_type;
889 in_f->ilf_size = in_f64->ilf_size;
890 in_f->ilf_fields = in_f64->ilf_fields;
891 in_f->ilf_asize = in_f64->ilf_asize;
892 in_f->ilf_dsize = in_f64->ilf_dsize;
893 in_f->ilf_ino = in_f64->ilf_ino;
894 /* copy biggest field of ilf_u */
895 memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
896 in_f64->ilf_u.ilfu_uuid.__u_bits,
897 sizeof(uuid_t));
898 in_f->ilf_blkno = in_f64->ilf_blkno;
899 in_f->ilf_len = in_f64->ilf_len;
900 in_f->ilf_boffset = in_f64->ilf_boffset;
901 return 0;
902 }
903 return EFSCORRUPTED;
904}