]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - fs/ubifs/replay.c
UBIFS: simplify replay
[mirror_ubuntu-artful-kernel.git] / fs / ubifs / replay.c
CommitLineData
1e51764a
AB
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file contains journal replay code. It runs when the file-system is being
25 * mounted and requires no locking.
26 *
27 * The larger is the journal, the longer it takes to scan it, so the longer it
28 * takes to mount UBIFS. This is why the journal has limited size which may be
29 * changed depending on the system requirements. But a larger journal gives
30 * faster I/O speed because it writes the index less frequently. So this is a
31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
32 * larger is the journal, the more memory its index may consume.
33 */
34
35#include "ubifs.h"
36
1e51764a
AB
37/**
38 * struct replay_entry - replay tree entry.
39 * @lnum: logical eraseblock number of the node
40 * @offs: node offset
41 * @len: node length
074bcb9b 42 * @deletion: non-zero if this entry corresponds to a node deletion
1e51764a 43 * @sqnum: node sequence number
1e51764a
AB
44 * @rb: links the replay tree
45 * @key: node key
46 * @nm: directory entry name
47 * @old_size: truncation old size
48 * @new_size: truncation new size
1e51764a
AB
49 *
50 * UBIFS journal replay must compare node sequence numbers, which means it must
51 * build a tree of node information to insert into the TNC.
52 */
53struct replay_entry {
54 int lnum;
55 int offs;
56 int len;
074bcb9b 57 unsigned int deletion:1;
1e51764a 58 unsigned long long sqnum;
1e51764a
AB
59 struct rb_node rb;
60 union ubifs_key key;
61 union {
62 struct qstr nm;
63 struct {
64 loff_t old_size;
65 loff_t new_size;
66 };
1e51764a
AB
67 };
68};
69
70/**
71 * struct bud_entry - entry in the list of buds to replay.
72 * @list: next bud in the list
73 * @bud: bud description object
1e51764a 74 * @sqnum: reference node sequence number
af1dd412
AB
75 * @free: free bytes in the bud
76 * @dirty: dirty bytes in the bud
1e51764a
AB
77 */
78struct bud_entry {
79 struct list_head list;
80 struct ubifs_bud *bud;
1e51764a 81 unsigned long long sqnum;
af1dd412
AB
82 int free;
83 int dirty;
1e51764a
AB
84};
85
86/**
87 * set_bud_lprops - set free and dirty space used by a bud.
88 * @c: UBIFS file-system description object
074bcb9b
AB
89 * @b: bud entry which describes the bud
90 *
91 * This function makes sure the LEB properties of bud @b are set correctly
92 * after the replay. Returns zero in case of success and a negative error code
93 * in case of failure.
1e51764a 94 */
074bcb9b 95static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
1e51764a
AB
96{
97 const struct ubifs_lprops *lp;
98 int err = 0, dirty;
99
100 ubifs_get_lprops(c);
101
074bcb9b 102 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
1e51764a
AB
103 if (IS_ERR(lp)) {
104 err = PTR_ERR(lp);
105 goto out;
106 }
107
108 dirty = lp->dirty;
074bcb9b 109 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
1e51764a
AB
110 /*
111 * The LEB was added to the journal with a starting offset of
112 * zero which means the LEB must have been empty. The LEB
074bcb9b
AB
113 * property values should be @lp->free == @c->leb_size and
114 * @lp->dirty == 0, but that is not the case. The reason is that
7a9c3e39
AB
115 * the LEB had been garbage collected before it became the bud,
116 * and there was not commit inbetween. The garbage collector
117 * resets the free and dirty space without recording it
118 * anywhere except lprops, so if there was no commit then
119 * lprops does not have that information.
1e51764a
AB
120 *
121 * We do not need to adjust free space because the scan has told
122 * us the exact value which is recorded in the replay entry as
074bcb9b 123 * @b->free.
1e51764a
AB
124 *
125 * However we do need to subtract from the dirty space the
126 * amount of space that the garbage collector reclaimed, which
127 * is the whole LEB minus the amount of space that was free.
128 */
074bcb9b 129 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
1e51764a 130 lp->free, lp->dirty);
074bcb9b 131 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
1e51764a
AB
132 lp->free, lp->dirty);
133 dirty -= c->leb_size - lp->free;
134 /*
135 * If the replay order was perfect the dirty space would now be
7d4e9ccb 136 * zero. The order is not perfect because the journal heads
6edbfafd 137 * race with each other. This is not a problem but is does mean
1e51764a
AB
138 * that the dirty space may temporarily exceed c->leb_size
139 * during the replay.
140 */
141 if (dirty != 0)
142 dbg_msg("LEB %d lp: %d free %d dirty "
074bcb9b
AB
143 "replay: %d free %d dirty", b->bud->lnum,
144 lp->free, lp->dirty, b->free, b->dirty);
1e51764a 145 }
074bcb9b 146 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
1e51764a
AB
147 lp->flags | LPROPS_TAKEN, 0);
148 if (IS_ERR(lp)) {
149 err = PTR_ERR(lp);
150 goto out;
151 }
52c6e6f9
AB
152
153 /* Make sure the journal head points to the latest bud */
074bcb9b
AB
154 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
155 b->bud->lnum, c->leb_size - b->free,
156 UBI_SHORTTERM);
52c6e6f9 157
1e51764a
AB
158out:
159 ubifs_release_lprops(c);
160 return err;
161}
162
074bcb9b
AB
163/**
164 * set_buds_lprops - set free and dirty space for all replayed buds.
165 * @c: UBIFS file-system description object
166 *
167 * This function sets LEB properties for all replayed buds. Returns zero in
168 * case of success and a negative error code in case of failure.
169 */
170static int set_buds_lprops(struct ubifs_info *c)
171{
172 struct bud_entry *b;
173 int err;
174
175 list_for_each_entry(b, &c->replay_buds, list) {
176 err = set_bud_lprops(c, b);
177 if (err)
178 return err;
179 }
180
181 return 0;
182}
183
1e51764a
AB
184/**
185 * trun_remove_range - apply a replay entry for a truncation to the TNC.
186 * @c: UBIFS file-system description object
187 * @r: replay entry of truncation
188 */
189static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
190{
191 unsigned min_blk, max_blk;
192 union ubifs_key min_key, max_key;
193 ino_t ino;
194
195 min_blk = r->new_size / UBIFS_BLOCK_SIZE;
196 if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
197 min_blk += 1;
198
199 max_blk = r->old_size / UBIFS_BLOCK_SIZE;
200 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
201 max_blk -= 1;
202
203 ino = key_inum(c, &r->key);
204
205 data_key_init(c, &min_key, ino, min_blk);
206 data_key_init(c, &max_key, ino, max_blk);
207
208 return ubifs_tnc_remove_range(c, &min_key, &max_key);
209}
210
211/**
212 * apply_replay_entry - apply a replay entry to the TNC.
213 * @c: UBIFS file-system description object
214 * @r: replay entry to apply
215 *
216 * Apply a replay entry to the TNC.
217 */
218static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
219{
074bcb9b 220 int err;
1e51764a 221
074bcb9b
AB
222 dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum,
223 r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key));
1e51764a
AB
224
225 /* Set c->replay_sqnum to help deal with dangling branches. */
226 c->replay_sqnum = r->sqnum;
227
074bcb9b
AB
228 if (is_hash_key(c, &r->key)) {
229 if (r->deletion)
1e51764a
AB
230 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
231 else
232 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
233 r->len, &r->nm);
234 } else {
074bcb9b 235 if (r->deletion)
1e51764a
AB
236 switch (key_type(c, &r->key)) {
237 case UBIFS_INO_KEY:
238 {
239 ino_t inum = key_inum(c, &r->key);
240
241 err = ubifs_tnc_remove_ino(c, inum);
242 break;
243 }
244 case UBIFS_TRUN_KEY:
245 err = trun_remove_range(c, r);
246 break;
247 default:
248 err = ubifs_tnc_remove(c, &r->key);
249 break;
250 }
251 else
252 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
253 r->len);
254 if (err)
255 return err;
256
257 if (c->need_recovery)
074bcb9b 258 err = ubifs_recover_size_accum(c, &r->key, r->deletion,
1e51764a
AB
259 r->new_size);
260 }
261
262 return err;
263}
264
265/**
266 * destroy_replay_tree - destroy the replay.
267 * @c: UBIFS file-system description object
268 *
269 * Destroy the replay tree.
270 */
271static void destroy_replay_tree(struct ubifs_info *c)
272{
273 struct rb_node *this = c->replay_tree.rb_node;
274 struct replay_entry *r;
275
276 while (this) {
277 if (this->rb_left) {
278 this = this->rb_left;
279 continue;
280 } else if (this->rb_right) {
281 this = this->rb_right;
282 continue;
283 }
284 r = rb_entry(this, struct replay_entry, rb);
285 this = rb_parent(this);
286 if (this) {
287 if (this->rb_left == &r->rb)
288 this->rb_left = NULL;
289 else
290 this->rb_right = NULL;
291 }
292 if (is_hash_key(c, &r->key))
293 kfree(r->nm.name);
294 kfree(r);
295 }
296 c->replay_tree = RB_ROOT;
297}
298
299/**
300 * apply_replay_tree - apply the replay tree to the TNC.
301 * @c: UBIFS file-system description object
302 *
303 * Apply the replay tree.
304 * Returns zero in case of success and a negative error code in case of
305 * failure.
306 */
307static int apply_replay_tree(struct ubifs_info *c)
308{
309 struct rb_node *this = rb_first(&c->replay_tree);
310
311 while (this) {
312 struct replay_entry *r;
313 int err;
314
315 cond_resched();
316
317 r = rb_entry(this, struct replay_entry, rb);
318 err = apply_replay_entry(c, r);
319 if (err)
320 return err;
321 this = rb_next(this);
322 }
323 return 0;
324}
325
326/**
327 * insert_node - insert a node to the replay tree.
328 * @c: UBIFS file-system description object
329 * @lnum: node logical eraseblock number
330 * @offs: node offset
331 * @len: node length
332 * @key: node key
333 * @sqnum: sequence number
334 * @deletion: non-zero if this is a deletion
335 * @used: number of bytes in use in a LEB
336 * @old_size: truncation old size
337 * @new_size: truncation new size
338 *
339 * This function inserts a scanned non-direntry node to the replay tree. The
340 * replay tree is an RB-tree containing @struct replay_entry elements which are
341 * indexed by the sequence number. The replay tree is applied at the very end
342 * of the replay process. Since the tree is sorted in sequence number order,
343 * the older modifications are applied first. This function returns zero in
344 * case of success and a negative error code in case of failure.
345 */
346static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
347 union ubifs_key *key, unsigned long long sqnum,
348 int deletion, int *used, loff_t old_size,
349 loff_t new_size)
350{
351 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
352 struct replay_entry *r;
353
354 if (key_inum(c, key) >= c->highest_inum)
355 c->highest_inum = key_inum(c, key);
356
357 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
358 while (*p) {
359 parent = *p;
360 r = rb_entry(parent, struct replay_entry, rb);
361 if (sqnum < r->sqnum) {
362 p = &(*p)->rb_left;
363 continue;
364 } else if (sqnum > r->sqnum) {
365 p = &(*p)->rb_right;
366 continue;
367 }
368 ubifs_err("duplicate sqnum in replay");
369 return -EINVAL;
370 }
371
372 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
373 if (!r)
374 return -ENOMEM;
375
376 if (!deletion)
377 *used += ALIGN(len, 8);
378 r->lnum = lnum;
379 r->offs = offs;
380 r->len = len;
074bcb9b 381 r->deletion = !!deletion;
1e51764a 382 r->sqnum = sqnum;
074bcb9b 383 key_copy(c, key, &r->key);
1e51764a
AB
384 r->old_size = old_size;
385 r->new_size = new_size;
1e51764a
AB
386
387 rb_link_node(&r->rb, parent, p);
388 rb_insert_color(&r->rb, &c->replay_tree);
389 return 0;
390}
391
392/**
393 * insert_dent - insert a directory entry node into the replay tree.
394 * @c: UBIFS file-system description object
395 * @lnum: node logical eraseblock number
396 * @offs: node offset
397 * @len: node length
398 * @key: node key
399 * @name: directory entry name
400 * @nlen: directory entry name length
401 * @sqnum: sequence number
402 * @deletion: non-zero if this is a deletion
403 * @used: number of bytes in use in a LEB
404 *
405 * This function inserts a scanned directory entry node to the replay tree.
406 * Returns zero in case of success and a negative error code in case of
407 * failure.
408 *
409 * This function is also used for extended attribute entries because they are
410 * implemented as directory entry nodes.
411 */
412static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
413 union ubifs_key *key, const char *name, int nlen,
414 unsigned long long sqnum, int deletion, int *used)
415{
416 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
417 struct replay_entry *r;
418 char *nbuf;
419
420 if (key_inum(c, key) >= c->highest_inum)
421 c->highest_inum = key_inum(c, key);
422
423 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
424 while (*p) {
425 parent = *p;
426 r = rb_entry(parent, struct replay_entry, rb);
427 if (sqnum < r->sqnum) {
428 p = &(*p)->rb_left;
429 continue;
430 }
431 if (sqnum > r->sqnum) {
432 p = &(*p)->rb_right;
433 continue;
434 }
435 ubifs_err("duplicate sqnum in replay");
436 return -EINVAL;
437 }
438
439 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
440 if (!r)
441 return -ENOMEM;
442 nbuf = kmalloc(nlen + 1, GFP_KERNEL);
443 if (!nbuf) {
444 kfree(r);
445 return -ENOMEM;
446 }
447
448 if (!deletion)
449 *used += ALIGN(len, 8);
450 r->lnum = lnum;
451 r->offs = offs;
452 r->len = len;
074bcb9b 453 r->deletion = !!deletion;
1e51764a 454 r->sqnum = sqnum;
074bcb9b 455 key_copy(c, key, &r->key);
1e51764a
AB
456 r->nm.len = nlen;
457 memcpy(nbuf, name, nlen);
458 nbuf[nlen] = '\0';
459 r->nm.name = nbuf;
1e51764a
AB
460
461 ubifs_assert(!*p);
462 rb_link_node(&r->rb, parent, p);
463 rb_insert_color(&r->rb, &c->replay_tree);
464 return 0;
465}
466
467/**
468 * ubifs_validate_entry - validate directory or extended attribute entry node.
469 * @c: UBIFS file-system description object
470 * @dent: the node to validate
471 *
472 * This function validates directory or extended attribute entry node @dent.
473 * Returns zero if the node is all right and a %-EINVAL if not.
474 */
475int ubifs_validate_entry(struct ubifs_info *c,
476 const struct ubifs_dent_node *dent)
477{
478 int key_type = key_type_flash(c, dent->key);
479 int nlen = le16_to_cpu(dent->nlen);
480
481 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
482 dent->type >= UBIFS_ITYPES_CNT ||
483 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
484 strnlen(dent->name, nlen) != nlen ||
485 le64_to_cpu(dent->inum) > MAX_INUM) {
486 ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ?
487 "directory entry" : "extended attribute entry");
488 return -EINVAL;
489 }
490
491 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
492 ubifs_err("bad key type %d", key_type);
493 return -EINVAL;
494 }
495
496 return 0;
497}
498
499/**
500 * replay_bud - replay a bud logical eraseblock.
501 * @c: UBIFS file-system description object
502 * @lnum: bud logical eraseblock number to replay
503 * @offs: bud start offset
504 * @jhead: journal head to which this bud belongs
505 * @free: amount of free space in the bud is returned here
506 * @dirty: amount of dirty space from padding and deletion nodes is returned
507 * here
508 *
509 * This function returns zero in case of success and a negative error code in
510 * case of failure.
511 */
512static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
513 int *free, int *dirty)
514{
515 int err = 0, used = 0;
516 struct ubifs_scan_leb *sleb;
517 struct ubifs_scan_node *snod;
518 struct ubifs_bud *bud;
519
c839e297 520 dbg_mnt("replay bud LEB %d, head %d, offs %d", lnum, jhead, offs);
1e51764a
AB
521 if (c->need_recovery)
522 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
523 else
348709ba 524 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
1e51764a
AB
525 if (IS_ERR(sleb))
526 return PTR_ERR(sleb);
527
528 /*
529 * The bud does not have to start from offset zero - the beginning of
530 * the 'lnum' LEB may contain previously committed data. One of the
531 * things we have to do in replay is to correctly update lprops with
532 * newer information about this LEB.
533 *
534 * At this point lprops thinks that this LEB has 'c->leb_size - offs'
535 * bytes of free space because it only contain information about
536 * committed data.
537 *
538 * But we know that real amount of free space is 'c->leb_size -
539 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
540 * 'sleb->endpt' is used by bud data. We have to correctly calculate
541 * how much of these data are dirty and update lprops with this
542 * information.
543 *
544 * The dirt in that LEB region is comprised of padding nodes, deletion
545 * nodes, truncation nodes and nodes which are obsoleted by subsequent
546 * nodes in this LEB. So instead of calculating clean space, we
547 * calculate used space ('used' variable).
548 */
549
550 list_for_each_entry(snod, &sleb->nodes, list) {
551 int deletion = 0;
552
553 cond_resched();
554
555 if (snod->sqnum >= SQNUM_WATERMARK) {
556 ubifs_err("file system's life ended");
557 goto out_dump;
558 }
559
560 if (snod->sqnum > c->max_sqnum)
561 c->max_sqnum = snod->sqnum;
562
563 switch (snod->type) {
564 case UBIFS_INO_NODE:
565 {
566 struct ubifs_ino_node *ino = snod->node;
567 loff_t new_size = le64_to_cpu(ino->size);
568
569 if (le32_to_cpu(ino->nlink) == 0)
570 deletion = 1;
571 err = insert_node(c, lnum, snod->offs, snod->len,
572 &snod->key, snod->sqnum, deletion,
573 &used, 0, new_size);
574 break;
575 }
576 case UBIFS_DATA_NODE:
577 {
578 struct ubifs_data_node *dn = snod->node;
579 loff_t new_size = le32_to_cpu(dn->size) +
580 key_block(c, &snod->key) *
581 UBIFS_BLOCK_SIZE;
582
583 err = insert_node(c, lnum, snod->offs, snod->len,
584 &snod->key, snod->sqnum, deletion,
585 &used, 0, new_size);
586 break;
587 }
588 case UBIFS_DENT_NODE:
589 case UBIFS_XENT_NODE:
590 {
591 struct ubifs_dent_node *dent = snod->node;
592
593 err = ubifs_validate_entry(c, dent);
594 if (err)
595 goto out_dump;
596
597 err = insert_dent(c, lnum, snod->offs, snod->len,
598 &snod->key, dent->name,
599 le16_to_cpu(dent->nlen), snod->sqnum,
600 !le64_to_cpu(dent->inum), &used);
601 break;
602 }
603 case UBIFS_TRUN_NODE:
604 {
605 struct ubifs_trun_node *trun = snod->node;
606 loff_t old_size = le64_to_cpu(trun->old_size);
607 loff_t new_size = le64_to_cpu(trun->new_size);
608 union ubifs_key key;
609
610 /* Validate truncation node */
611 if (old_size < 0 || old_size > c->max_inode_sz ||
612 new_size < 0 || new_size > c->max_inode_sz ||
613 old_size <= new_size) {
614 ubifs_err("bad truncation node");
615 goto out_dump;
616 }
617
618 /*
619 * Create a fake truncation key just to use the same
620 * functions which expect nodes to have keys.
621 */
622 trun_key_init(c, &key, le32_to_cpu(trun->inum));
623 err = insert_node(c, lnum, snod->offs, snod->len,
624 &key, snod->sqnum, 1, &used,
625 old_size, new_size);
626 break;
627 }
628 default:
629 ubifs_err("unexpected node type %d in bud LEB %d:%d",
630 snod->type, lnum, snod->offs);
631 err = -EINVAL;
632 goto out_dump;
633 }
634 if (err)
635 goto out;
636 }
637
638 bud = ubifs_search_bud(c, lnum);
639 if (!bud)
640 BUG();
641
642 ubifs_assert(sleb->endpt - offs >= used);
643 ubifs_assert(sleb->endpt % c->min_io_size == 0);
644
1e51764a
AB
645 *dirty = sleb->endpt - offs - used;
646 *free = c->leb_size - sleb->endpt;
c839e297 647 dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, *dirty, *free);
1e51764a
AB
648
649out:
650 ubifs_scan_destroy(sleb);
651 return err;
652
653out_dump:
654 ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
655 dbg_dump_node(c, snod->node);
656 ubifs_scan_destroy(sleb);
657 return -EINVAL;
658}
659
1e51764a
AB
660/**
661 * replay_buds - replay all buds.
662 * @c: UBIFS file-system description object
663 *
664 * This function returns zero in case of success and a negative error code in
665 * case of failure.
666 */
667static int replay_buds(struct ubifs_info *c)
668{
669 struct bud_entry *b;
074bcb9b 670 int err;
7703f09d 671 unsigned long long prev_sqnum = 0;
1e51764a
AB
672
673 list_for_each_entry(b, &c->replay_buds, list) {
674 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
074bcb9b 675 &b->free, &b->dirty);
1e51764a
AB
676 if (err)
677 return err;
7703f09d
AB
678
679 ubifs_assert(b->sqnum > prev_sqnum);
680 prev_sqnum = b->sqnum;
1e51764a
AB
681 }
682
683 return 0;
684}
685
686/**
687 * destroy_bud_list - destroy the list of buds to replay.
688 * @c: UBIFS file-system description object
689 */
690static void destroy_bud_list(struct ubifs_info *c)
691{
692 struct bud_entry *b;
693
694 while (!list_empty(&c->replay_buds)) {
695 b = list_entry(c->replay_buds.next, struct bud_entry, list);
696 list_del(&b->list);
697 kfree(b);
698 }
699}
700
701/**
702 * add_replay_bud - add a bud to the list of buds to replay.
703 * @c: UBIFS file-system description object
704 * @lnum: bud logical eraseblock number to replay
705 * @offs: bud start offset
706 * @jhead: journal head to which this bud belongs
707 * @sqnum: reference node sequence number
708 *
709 * This function returns zero in case of success and a negative error code in
710 * case of failure.
711 */
712static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
713 unsigned long long sqnum)
714{
715 struct ubifs_bud *bud;
716 struct bud_entry *b;
717
718 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
719
720 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
721 if (!bud)
722 return -ENOMEM;
723
724 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
725 if (!b) {
726 kfree(bud);
727 return -ENOMEM;
728 }
729
730 bud->lnum = lnum;
731 bud->start = offs;
732 bud->jhead = jhead;
733 ubifs_add_bud(c, bud);
734
735 b->bud = bud;
736 b->sqnum = sqnum;
737 list_add_tail(&b->list, &c->replay_buds);
738
739 return 0;
740}
741
742/**
743 * validate_ref - validate a reference node.
744 * @c: UBIFS file-system description object
745 * @ref: the reference node to validate
746 * @ref_lnum: LEB number of the reference node
747 * @ref_offs: reference node offset
748 *
749 * This function returns %1 if a bud reference already exists for the LEB. %0 is
750 * returned if the reference node is new, otherwise %-EINVAL is returned if
751 * validation failed.
752 */
753static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
754{
755 struct ubifs_bud *bud;
756 int lnum = le32_to_cpu(ref->lnum);
757 unsigned int offs = le32_to_cpu(ref->offs);
758 unsigned int jhead = le32_to_cpu(ref->jhead);
759
760 /*
761 * ref->offs may point to the end of LEB when the journal head points
762 * to the end of LEB and we write reference node for it during commit.
763 * So this is why we require 'offs > c->leb_size'.
764 */
765 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
766 lnum < c->main_first || offs > c->leb_size ||
767 offs & (c->min_io_size - 1))
768 return -EINVAL;
769
770 /* Make sure we have not already looked at this bud */
771 bud = ubifs_search_bud(c, lnum);
772 if (bud) {
773 if (bud->jhead == jhead && bud->start <= offs)
774 return 1;
775 ubifs_err("bud at LEB %d:%d was already referred", lnum, offs);
776 return -EINVAL;
777 }
778
779 return 0;
780}
781
782/**
783 * replay_log_leb - replay a log logical eraseblock.
784 * @c: UBIFS file-system description object
785 * @lnum: log logical eraseblock to replay
786 * @offs: offset to start replaying from
787 * @sbuf: scan buffer
788 *
789 * This function replays a log LEB and returns zero in case of success, %1 if
790 * this is the last LEB in the log, and a negative error code in case of
791 * failure.
792 */
793static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
794{
795 int err;
796 struct ubifs_scan_leb *sleb;
797 struct ubifs_scan_node *snod;
798 const struct ubifs_cs_node *node;
799
800 dbg_mnt("replay log LEB %d:%d", lnum, offs);
348709ba
AB
801 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
802 if (IS_ERR(sleb)) {
ed43f2f0
AB
803 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
804 return PTR_ERR(sleb);
7d08ae3c
AB
805 /*
806 * Note, the below function will recover this log LEB only if
807 * it is the last, because unclean reboots can possibly corrupt
808 * only the tail of the log.
809 */
ed43f2f0 810 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
1e51764a
AB
811 if (IS_ERR(sleb))
812 return PTR_ERR(sleb);
813 }
814
815 if (sleb->nodes_cnt == 0) {
816 err = 1;
817 goto out;
818 }
819
820 node = sleb->buf;
1e51764a
AB
821 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
822 if (c->cs_sqnum == 0) {
823 /*
824 * This is the first log LEB we are looking at, make sure that
825 * the first node is a commit start node. Also record its
826 * sequence number so that UBIFS can determine where the log
827 * ends, because all nodes which were have higher sequence
828 * numbers.
829 */
830 if (snod->type != UBIFS_CS_NODE) {
831 dbg_err("first log node at LEB %d:%d is not CS node",
832 lnum, offs);
833 goto out_dump;
834 }
835 if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
836 dbg_err("first CS node at LEB %d:%d has wrong "
837 "commit number %llu expected %llu",
838 lnum, offs,
839 (unsigned long long)le64_to_cpu(node->cmt_no),
840 c->cmt_no);
841 goto out_dump;
842 }
843
844 c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
845 dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
846 }
847
848 if (snod->sqnum < c->cs_sqnum) {
849 /*
850 * This means that we reached end of log and now
851 * look to the older log data, which was already
852 * committed but the eraseblock was not erased (UBIFS
6edbfafd 853 * only un-maps it). So this basically means we have to
1e51764a
AB
854 * exit with "end of log" code.
855 */
856 err = 1;
857 goto out;
858 }
859
860 /* Make sure the first node sits at offset zero of the LEB */
861 if (snod->offs != 0) {
862 dbg_err("first node is not at zero offset");
863 goto out_dump;
864 }
865
866 list_for_each_entry(snod, &sleb->nodes, list) {
1e51764a
AB
867 cond_resched();
868
869 if (snod->sqnum >= SQNUM_WATERMARK) {
870 ubifs_err("file system's life ended");
871 goto out_dump;
872 }
873
874 if (snod->sqnum < c->cs_sqnum) {
875 dbg_err("bad sqnum %llu, commit sqnum %llu",
876 snod->sqnum, c->cs_sqnum);
877 goto out_dump;
878 }
879
880 if (snod->sqnum > c->max_sqnum)
881 c->max_sqnum = snod->sqnum;
882
883 switch (snod->type) {
884 case UBIFS_REF_NODE: {
885 const struct ubifs_ref_node *ref = snod->node;
886
887 err = validate_ref(c, ref);
888 if (err == 1)
889 break; /* Already have this bud */
890 if (err)
891 goto out_dump;
892
893 err = add_replay_bud(c, le32_to_cpu(ref->lnum),
894 le32_to_cpu(ref->offs),
895 le32_to_cpu(ref->jhead),
896 snod->sqnum);
897 if (err)
898 goto out;
899
900 break;
901 }
902 case UBIFS_CS_NODE:
903 /* Make sure it sits at the beginning of LEB */
904 if (snod->offs != 0) {
905 ubifs_err("unexpected node in log");
906 goto out_dump;
907 }
908 break;
909 default:
910 ubifs_err("unexpected node in log");
911 goto out_dump;
912 }
913 }
914
915 if (sleb->endpt || c->lhead_offs >= c->leb_size) {
916 c->lhead_lnum = lnum;
917 c->lhead_offs = sleb->endpt;
918 }
919
920 err = !sleb->endpt;
921out:
922 ubifs_scan_destroy(sleb);
923 return err;
924
925out_dump:
681947d2 926 ubifs_err("log error detected while replaying the log at LEB %d:%d",
1e51764a
AB
927 lnum, offs + snod->offs);
928 dbg_dump_node(c, snod->node);
929 ubifs_scan_destroy(sleb);
930 return -EINVAL;
931}
932
933/**
934 * take_ihead - update the status of the index head in lprops to 'taken'.
935 * @c: UBIFS file-system description object
936 *
937 * This function returns the amount of free space in the index head LEB or a
938 * negative error code.
939 */
940static int take_ihead(struct ubifs_info *c)
941{
942 const struct ubifs_lprops *lp;
943 int err, free;
944
945 ubifs_get_lprops(c);
946
947 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
948 if (IS_ERR(lp)) {
949 err = PTR_ERR(lp);
950 goto out;
951 }
952
953 free = lp->free;
954
955 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
956 lp->flags | LPROPS_TAKEN, 0);
957 if (IS_ERR(lp)) {
958 err = PTR_ERR(lp);
959 goto out;
960 }
961
962 err = free;
963out:
964 ubifs_release_lprops(c);
965 return err;
966}
967
968/**
969 * ubifs_replay_journal - replay journal.
970 * @c: UBIFS file-system description object
971 *
972 * This function scans the journal, replays and cleans it up. It makes sure all
973 * memory data structures related to uncommitted journal are built (dirty TNC
974 * tree, tree of buds, modified lprops, etc).
975 */
976int ubifs_replay_journal(struct ubifs_info *c)
977{
978 int err, i, lnum, offs, free;
1e51764a
AB
979
980 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
981
982 /* Update the status of the index head in lprops to 'taken' */
983 free = take_ihead(c);
984 if (free < 0)
985 return free; /* Error code */
986
987 if (c->ihead_offs != c->leb_size - free) {
988 ubifs_err("bad index head LEB %d:%d", c->ihead_lnum,
989 c->ihead_offs);
990 return -EINVAL;
991 }
992
1e51764a 993 dbg_mnt("start replaying the journal");
1e51764a 994 c->replaying = 1;
1e51764a
AB
995 lnum = c->ltail_lnum = c->lhead_lnum;
996 offs = c->lhead_offs;
997
998 for (i = 0; i < c->log_lebs; i++, lnum++) {
999 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) {
1000 /*
1001 * The log is logically circular, we reached the last
1002 * LEB, switch to the first one.
1003 */
1004 lnum = UBIFS_LOG_LNUM;
1005 offs = 0;
1006 }
6599fcbd 1007 err = replay_log_leb(c, lnum, offs, c->sbuf);
1e51764a
AB
1008 if (err == 1)
1009 /* We hit the end of the log */
1010 break;
1011 if (err)
1012 goto out;
1013 offs = 0;
1014 }
1015
1016 err = replay_buds(c);
1017 if (err)
1018 goto out;
1019
1020 err = apply_replay_tree(c);
1021 if (err)
1022 goto out;
1023
074bcb9b
AB
1024 err = set_buds_lprops(c);
1025 if (err)
1026 goto out;
1027
6edbfafd 1028 /*
b137545c
AB
1029 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
1030 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
6edbfafd
AB
1031 * depend on it. This means we have to initialize it to make sure
1032 * budgeting works properly.
1033 */
b137545c
AB
1034 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1035 c->bi.uncommitted_idx *= c->max_idx_node_sz;
6edbfafd 1036
1e51764a
AB
1037 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1038 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1039 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
e84461ad 1040 (unsigned long)c->highest_inum);
1e51764a
AB
1041out:
1042 destroy_replay_tree(c);
1043 destroy_bud_list(c);
1e51764a
AB
1044 c->replaying = 0;
1045 return err;
1046}