]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/mtd/ubi/wl.c
UBI: fix printk
[mirror_ubuntu-bionic-kernel.git] / drivers / mtd / ubi / wl.c
CommitLineData
801c135c
AB
1/*
2 * Copyright (c) International Business Machines Corp., 2006
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner
19 */
20
21/*
22 * UBI wear-leveling unit.
23 *
24 * This unit is responsible for wear-leveling. It works in terms of physical
25 * eraseblocks and erase counters and knows nothing about logical eraseblocks,
26 * volumes, etc. From this unit's perspective all physical eraseblocks are of
27 * two types - used and free. Used physical eraseblocks are those that were
28 * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are
29 * those that were put by the 'ubi_wl_put_peb()' function.
30 *
31 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
32 * header. The rest of the physical eraseblock contains only 0xFF bytes.
33 *
34 * When physical eraseblocks are returned to the WL unit by means of the
35 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
36 * done asynchronously in context of the per-UBI device background thread,
37 * which is also managed by the WL unit.
38 *
39 * The wear-leveling is ensured by means of moving the contents of used
40 * physical eraseblocks with low erase counter to free physical eraseblocks
41 * with high erase counter.
42 *
43 * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
44 * an "optimal" physical eraseblock. For example, when it is known that the
45 * physical eraseblock will be "put" soon because it contains short-term data,
46 * the WL unit may pick a free physical eraseblock with low erase counter, and
47 * so forth.
48 *
49 * If the WL unit fails to erase a physical eraseblock, it marks it as bad.
50 *
51 * This unit is also responsible for scrubbing. If a bit-flip is detected in a
52 * physical eraseblock, it has to be moved. Technically this is the same as
53 * moving it for wear-leveling reasons.
54 *
55 * As it was said, for the UBI unit all physical eraseblocks are either "free"
56 * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used
57 * eraseblocks are kept in a set of different RB-trees: @wl->used,
58 * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
59 *
60 * Note, in this implementation, we keep a small in-RAM object for each physical
61 * eraseblock. This is surely not a scalable solution. But it appears to be good
62 * enough for moderately large flashes and it is simple. In future, one may
63 * re-work this unit and make it more scalable.
64 *
65 * At the moment this unit does not utilize the sequence number, which was
66 * introduced relatively recently. But it would be wise to do this because the
67 * sequence number of a logical eraseblock characterizes how old is it. For
68 * example, when we move a PEB with low erase counter, and we need to pick the
69 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
70 * pick target PEB with an average EC if our PEB is not very "old". This is a
71 * room for future re-works of the WL unit.
72 *
73 * FIXME: looks too complex, should be simplified (later).
74 */
75
76#include <linux/slab.h>
77#include <linux/crc32.h>
78#include <linux/freezer.h>
79#include <linux/kthread.h>
80#include "ubi.h"
81
82/* Number of physical eraseblocks reserved for wear-leveling purposes */
83#define WL_RESERVED_PEBS 1
84
85/*
86 * How many erase cycles are short term, unknown, and long term physical
87 * eraseblocks protected.
88 */
89#define ST_PROTECTION 16
90#define U_PROTECTION 10
91#define LT_PROTECTION 4
92
93/*
94 * Maximum difference between two erase counters. If this threshold is
95 * exceeded, the WL unit starts moving data from used physical eraseblocks with
96 * low erase counter to free physical eraseblocks with high erase counter.
97 */
98#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
99
100/*
101 * When a physical eraseblock is moved, the WL unit has to pick the target
102 * physical eraseblock to move to. The simplest way would be just to pick the
103 * one with the highest erase counter. But in certain workloads this could lead
104 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
105 * situation when the picked physical eraseblock is constantly erased after the
106 * data is written to it. So, we have a constant which limits the highest erase
107 * counter of the free physical eraseblock to pick. Namely, the WL unit does
108 * not pick eraseblocks with erase counter greater then the lowest erase
109 * counter plus %WL_FREE_MAX_DIFF.
110 */
111#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
112
113/*
114 * Maximum number of consecutive background thread failures which is enough to
115 * switch to read-only mode.
116 */
117#define WL_MAX_FAILURES 32
118
801c135c
AB
119/**
120 * struct ubi_wl_prot_entry - PEB protection entry.
121 * @rb_pnum: link in the @wl->prot.pnum RB-tree
122 * @rb_aec: link in the @wl->prot.aec RB-tree
123 * @abs_ec: the absolute erase counter value when the protection ends
124 * @e: the wear-leveling entry of the physical eraseblock under protection
125 *
126 * When the WL unit returns a physical eraseblock, the physical eraseblock is
127 * protected from being moved for some "time". For this reason, the physical
128 * eraseblock is not directly moved from the @wl->free tree to the @wl->used
129 * tree. There is one more tree in between where this physical eraseblock is
130 * temporarily stored (@wl->prot).
131 *
132 * All this protection stuff is needed because:
133 * o we don't want to move physical eraseblocks just after we have given them
134 * to the user; instead, we first want to let users fill them up with data;
135 *
136 * o there is a chance that the user will put the physical eraseblock very
137 * soon, so it makes sense not to move it for some time, but wait; this is
138 * especially important in case of "short term" physical eraseblocks.
139 *
140 * Physical eraseblocks stay protected only for limited time. But the "time" is
141 * measured in erase cycles in this case. This is implemented with help of the
142 * absolute erase counter (@wl->abs_ec). When it reaches certain value, the
143 * physical eraseblocks are moved from the protection trees (@wl->prot.*) to
144 * the @wl->used tree.
145 *
146 * Protected physical eraseblocks are searched by physical eraseblock number
147 * (when they are put) and by the absolute erase counter (to check if it is
148 * time to move them to the @wl->used tree). So there are actually 2 RB-trees
149 * storing the protected physical eraseblocks: @wl->prot.pnum and
150 * @wl->prot.aec. They are referred to as the "protection" trees. The
151 * first one is indexed by the physical eraseblock number. The second one is
152 * indexed by the absolute erase counter. Both trees store
153 * &struct ubi_wl_prot_entry objects.
154 *
155 * Each physical eraseblock has 2 main states: free and used. The former state
156 * corresponds to the @wl->free tree. The latter state is split up on several
157 * sub-states:
158 * o the WL movement is allowed (@wl->used tree);
159 * o the WL movement is temporarily prohibited (@wl->prot.pnum and
160 * @wl->prot.aec trees);
161 * o scrubbing is needed (@wl->scrub tree).
162 *
163 * Depending on the sub-state, wear-leveling entries of the used physical
164 * eraseblocks may be kept in one of those trees.
165 */
166struct ubi_wl_prot_entry {
167 struct rb_node rb_pnum;
168 struct rb_node rb_aec;
169 unsigned long long abs_ec;
170 struct ubi_wl_entry *e;
171};
172
173/**
174 * struct ubi_work - UBI work description data structure.
175 * @list: a link in the list of pending works
176 * @func: worker function
177 * @priv: private data of the worker function
178 *
179 * @e: physical eraseblock to erase
180 * @torture: if the physical eraseblock has to be tortured
181 *
182 * The @func pointer points to the worker function. If the @cancel argument is
183 * not zero, the worker has to free the resources and exit immediately. The
184 * worker has to return zero in case of success and a negative error code in
185 * case of failure.
186 */
187struct ubi_work {
188 struct list_head list;
189 int (*func)(struct ubi_device *ubi, struct ubi_work *wrk, int cancel);
190 /* The below fields are only relevant to erasure works */
191 struct ubi_wl_entry *e;
192 int torture;
193};
194
195#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
e88d6e10 196static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec);
801c135c
AB
197static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
198 struct rb_root *root);
199#else
200#define paranoid_check_ec(ubi, pnum, ec) 0
201#define paranoid_check_in_wl_tree(e, root)
202#endif
203
801c135c
AB
204/**
205 * wl_tree_add - add a wear-leveling entry to a WL RB-tree.
206 * @e: the wear-leveling entry to add
207 * @root: the root of the tree
208 *
209 * Note, we use (erase counter, physical eraseblock number) pairs as keys in
210 * the @ubi->used and @ubi->free RB-trees.
211 */
212static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
213{
214 struct rb_node **p, *parent = NULL;
215
216 p = &root->rb_node;
217 while (*p) {
218 struct ubi_wl_entry *e1;
219
220 parent = *p;
221 e1 = rb_entry(parent, struct ubi_wl_entry, rb);
222
223 if (e->ec < e1->ec)
224 p = &(*p)->rb_left;
225 else if (e->ec > e1->ec)
226 p = &(*p)->rb_right;
227 else {
228 ubi_assert(e->pnum != e1->pnum);
229 if (e->pnum < e1->pnum)
230 p = &(*p)->rb_left;
231 else
232 p = &(*p)->rb_right;
233 }
234 }
235
236 rb_link_node(&e->rb, parent, p);
237 rb_insert_color(&e->rb, root);
238}
239
801c135c
AB
240/**
241 * do_work - do one pending work.
242 * @ubi: UBI device description object
243 *
244 * This function returns zero in case of success and a negative error code in
245 * case of failure.
246 */
247static int do_work(struct ubi_device *ubi)
248{
249 int err;
250 struct ubi_work *wrk;
251
43f9b25a
AB
252 cond_resched();
253
801c135c
AB
254 spin_lock(&ubi->wl_lock);
255
256 if (list_empty(&ubi->works)) {
257 spin_unlock(&ubi->wl_lock);
258 return 0;
259 }
260
261 wrk = list_entry(ubi->works.next, struct ubi_work, list);
262 list_del(&wrk->list);
263 spin_unlock(&ubi->wl_lock);
264
265 /*
266 * Call the worker function. Do not touch the work structure
267 * after this call as it will have been freed or reused by that
268 * time by the worker function.
269 */
270 err = wrk->func(ubi, wrk, 0);
271 if (err)
272 ubi_err("work failed with error code %d", err);
273
274 spin_lock(&ubi->wl_lock);
275 ubi->works_count -= 1;
276 ubi_assert(ubi->works_count >= 0);
277 spin_unlock(&ubi->wl_lock);
278 return err;
279}
280
281/**
282 * produce_free_peb - produce a free physical eraseblock.
283 * @ubi: UBI device description object
284 *
285 * This function tries to make a free PEB by means of synchronous execution of
286 * pending works. This may be needed if, for example the background thread is
287 * disabled. Returns zero in case of success and a negative error code in case
288 * of failure.
289 */
290static int produce_free_peb(struct ubi_device *ubi)
291{
292 int err;
293
294 spin_lock(&ubi->wl_lock);
5abde384 295 while (!ubi->free.rb_node) {
801c135c
AB
296 spin_unlock(&ubi->wl_lock);
297
298 dbg_wl("do one work synchronously");
299 err = do_work(ubi);
300 if (err)
301 return err;
302
303 spin_lock(&ubi->wl_lock);
304 }
305 spin_unlock(&ubi->wl_lock);
306
307 return 0;
308}
309
310/**
311 * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree.
312 * @e: the wear-leveling entry to check
313 * @root: the root of the tree
314 *
315 * This function returns non-zero if @e is in the @root RB-tree and zero if it
316 * is not.
317 */
318static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
319{
320 struct rb_node *p;
321
322 p = root->rb_node;
323 while (p) {
324 struct ubi_wl_entry *e1;
325
326 e1 = rb_entry(p, struct ubi_wl_entry, rb);
327
328 if (e->pnum == e1->pnum) {
329 ubi_assert(e == e1);
330 return 1;
331 }
332
333 if (e->ec < e1->ec)
334 p = p->rb_left;
335 else if (e->ec > e1->ec)
336 p = p->rb_right;
337 else {
338 ubi_assert(e->pnum != e1->pnum);
339 if (e->pnum < e1->pnum)
340 p = p->rb_left;
341 else
342 p = p->rb_right;
343 }
344 }
345
346 return 0;
347}
348
349/**
350 * prot_tree_add - add physical eraseblock to protection trees.
351 * @ubi: UBI device description object
352 * @e: the physical eraseblock to add
353 * @pe: protection entry object to use
354 * @abs_ec: absolute erase counter value when this physical eraseblock has
355 * to be removed from the protection trees.
356 *
357 * @wl->lock has to be locked.
358 */
359static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e,
360 struct ubi_wl_prot_entry *pe, int abs_ec)
361{
362 struct rb_node **p, *parent = NULL;
363 struct ubi_wl_prot_entry *pe1;
364
365 pe->e = e;
366 pe->abs_ec = ubi->abs_ec + abs_ec;
367
368 p = &ubi->prot.pnum.rb_node;
369 while (*p) {
370 parent = *p;
371 pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum);
372
373 if (e->pnum < pe1->e->pnum)
374 p = &(*p)->rb_left;
375 else
376 p = &(*p)->rb_right;
377 }
378 rb_link_node(&pe->rb_pnum, parent, p);
379 rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum);
380
381 p = &ubi->prot.aec.rb_node;
382 parent = NULL;
383 while (*p) {
384 parent = *p;
385 pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec);
386
387 if (pe->abs_ec < pe1->abs_ec)
388 p = &(*p)->rb_left;
389 else
390 p = &(*p)->rb_right;
391 }
392 rb_link_node(&pe->rb_aec, parent, p);
393 rb_insert_color(&pe->rb_aec, &ubi->prot.aec);
394}
395
396/**
397 * find_wl_entry - find wear-leveling entry closest to certain erase counter.
398 * @root: the RB-tree where to look for
399 * @max: highest possible erase counter
400 *
401 * This function looks for a wear leveling entry with erase counter closest to
402 * @max and less then @max.
403 */
404static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
405{
406 struct rb_node *p;
407 struct ubi_wl_entry *e;
408
409 e = rb_entry(rb_first(root), struct ubi_wl_entry, rb);
410 max += e->ec;
411
412 p = root->rb_node;
413 while (p) {
414 struct ubi_wl_entry *e1;
415
416 e1 = rb_entry(p, struct ubi_wl_entry, rb);
417 if (e1->ec >= max)
418 p = p->rb_left;
419 else {
420 p = p->rb_right;
421 e = e1;
422 }
423 }
424
425 return e;
426}
427
428/**
429 * ubi_wl_get_peb - get a physical eraseblock.
430 * @ubi: UBI device description object
431 * @dtype: type of data which will be stored in this physical eraseblock
432 *
433 * This function returns a physical eraseblock in case of success and a
434 * negative error code in case of failure. Might sleep.
435 */
436int ubi_wl_get_peb(struct ubi_device *ubi, int dtype)
437{
438 int err, protect, medium_ec;
439 struct ubi_wl_entry *e, *first, *last;
440 struct ubi_wl_prot_entry *pe;
441
442 ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM ||
443 dtype == UBI_UNKNOWN);
444
33818bbb 445 pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
801c135c
AB
446 if (!pe)
447 return -ENOMEM;
448
449retry:
450 spin_lock(&ubi->wl_lock);
5abde384 451 if (!ubi->free.rb_node) {
801c135c
AB
452 if (ubi->works_count == 0) {
453 ubi_assert(list_empty(&ubi->works));
454 ubi_err("no free eraseblocks");
455 spin_unlock(&ubi->wl_lock);
456 kfree(pe);
457 return -ENOSPC;
458 }
459 spin_unlock(&ubi->wl_lock);
460
461 err = produce_free_peb(ubi);
462 if (err < 0) {
463 kfree(pe);
464 return err;
465 }
466 goto retry;
467 }
468
469 switch (dtype) {
470 case UBI_LONGTERM:
471 /*
472 * For long term data we pick a physical eraseblock
473 * with high erase counter. But the highest erase
474 * counter we can pick is bounded by the the lowest
475 * erase counter plus %WL_FREE_MAX_DIFF.
476 */
477 e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
478 protect = LT_PROTECTION;
479 break;
480 case UBI_UNKNOWN:
481 /*
482 * For unknown data we pick a physical eraseblock with
483 * medium erase counter. But we by no means can pick a
484 * physical eraseblock with erase counter greater or
485 * equivalent than the lowest erase counter plus
486 * %WL_FREE_MAX_DIFF.
487 */
488 first = rb_entry(rb_first(&ubi->free),
489 struct ubi_wl_entry, rb);
490 last = rb_entry(rb_last(&ubi->free),
491 struct ubi_wl_entry, rb);
492
493 if (last->ec - first->ec < WL_FREE_MAX_DIFF)
494 e = rb_entry(ubi->free.rb_node,
495 struct ubi_wl_entry, rb);
496 else {
497 medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
498 e = find_wl_entry(&ubi->free, medium_ec);
499 }
500 protect = U_PROTECTION;
501 break;
502 case UBI_SHORTTERM:
503 /*
504 * For short term data we pick a physical eraseblock
505 * with the lowest erase counter as we expect it will
506 * be erased soon.
507 */
508 e = rb_entry(rb_first(&ubi->free),
509 struct ubi_wl_entry, rb);
510 protect = ST_PROTECTION;
511 break;
512 default:
513 protect = 0;
514 e = NULL;
515 BUG();
516 }
517
518 /*
519 * Move the physical eraseblock to the protection trees where it will
520 * be protected from being moved for some time.
521 */
5abde384
AB
522 paranoid_check_in_wl_tree(e, &ubi->free);
523 rb_erase(&e->rb, &ubi->free);
801c135c
AB
524 prot_tree_add(ubi, e, pe, protect);
525
526 dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect);
527 spin_unlock(&ubi->wl_lock);
528
529 return e->pnum;
530}
531
532/**
533 * prot_tree_del - remove a physical eraseblock from the protection trees
534 * @ubi: UBI device description object
535 * @pnum: the physical eraseblock to remove
43f9b25a
AB
536 *
537 * This function returns PEB @pnum from the protection trees and returns zero
538 * in case of success and %-ENODEV if the PEB was not found in the protection
539 * trees.
801c135c 540 */
43f9b25a 541static int prot_tree_del(struct ubi_device *ubi, int pnum)
801c135c
AB
542{
543 struct rb_node *p;
544 struct ubi_wl_prot_entry *pe = NULL;
545
546 p = ubi->prot.pnum.rb_node;
547 while (p) {
548
549 pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum);
550
551 if (pnum == pe->e->pnum)
43f9b25a 552 goto found;
801c135c
AB
553
554 if (pnum < pe->e->pnum)
555 p = p->rb_left;
556 else
557 p = p->rb_right;
558 }
559
43f9b25a
AB
560 return -ENODEV;
561
562found:
801c135c
AB
563 ubi_assert(pe->e->pnum == pnum);
564 rb_erase(&pe->rb_aec, &ubi->prot.aec);
565 rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
566 kfree(pe);
43f9b25a 567 return 0;
801c135c
AB
568}
569
570/**
571 * sync_erase - synchronously erase a physical eraseblock.
572 * @ubi: UBI device description object
573 * @e: the the physical eraseblock to erase
574 * @torture: if the physical eraseblock has to be tortured
575 *
576 * This function returns zero in case of success and a negative error code in
577 * case of failure.
578 */
579static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture)
580{
581 int err;
582 struct ubi_ec_hdr *ec_hdr;
583 unsigned long long ec = e->ec;
584
585 dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec);
586
587 err = paranoid_check_ec(ubi, e->pnum, e->ec);
588 if (err > 0)
589 return -EINVAL;
590
33818bbb 591 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
801c135c
AB
592 if (!ec_hdr)
593 return -ENOMEM;
594
595 err = ubi_io_sync_erase(ubi, e->pnum, torture);
596 if (err < 0)
597 goto out_free;
598
599 ec += err;
600 if (ec > UBI_MAX_ERASECOUNTER) {
601 /*
602 * Erase counter overflow. Upgrade UBI and use 64-bit
603 * erase counters internally.
604 */
605 ubi_err("erase counter overflow at PEB %d, EC %llu",
606 e->pnum, ec);
607 err = -EINVAL;
608 goto out_free;
609 }
610
611 dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec);
612
3261ebd7 613 ec_hdr->ec = cpu_to_be64(ec);
801c135c
AB
614
615 err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr);
616 if (err)
617 goto out_free;
618
619 e->ec = ec;
620 spin_lock(&ubi->wl_lock);
621 if (e->ec > ubi->max_ec)
622 ubi->max_ec = e->ec;
623 spin_unlock(&ubi->wl_lock);
624
625out_free:
626 kfree(ec_hdr);
627 return err;
628}
629
630/**
631 * check_protection_over - check if it is time to stop protecting some
632 * physical eraseblocks.
633 * @ubi: UBI device description object
634 *
635 * This function is called after each erase operation, when the absolute erase
636 * counter is incremented, to check if some physical eraseblock have not to be
637 * protected any longer. These physical eraseblocks are moved from the
638 * protection trees to the used tree.
639 */
640static void check_protection_over(struct ubi_device *ubi)
641{
642 struct ubi_wl_prot_entry *pe;
643
644 /*
645 * There may be several protected physical eraseblock to remove,
646 * process them all.
647 */
648 while (1) {
649 spin_lock(&ubi->wl_lock);
5abde384 650 if (!ubi->prot.aec.rb_node) {
801c135c
AB
651 spin_unlock(&ubi->wl_lock);
652 break;
653 }
654
655 pe = rb_entry(rb_first(&ubi->prot.aec),
656 struct ubi_wl_prot_entry, rb_aec);
657
658 if (pe->abs_ec > ubi->abs_ec) {
659 spin_unlock(&ubi->wl_lock);
660 break;
661 }
662
663 dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu",
664 pe->e->pnum, ubi->abs_ec, pe->abs_ec);
665 rb_erase(&pe->rb_aec, &ubi->prot.aec);
666 rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
5abde384 667 wl_tree_add(pe->e, &ubi->used);
801c135c
AB
668 spin_unlock(&ubi->wl_lock);
669
670 kfree(pe);
671 cond_resched();
672 }
673}
674
675/**
676 * schedule_ubi_work - schedule a work.
677 * @ubi: UBI device description object
678 * @wrk: the work to schedule
679 *
680 * This function enqueues a work defined by @wrk to the tail of the pending
681 * works list.
682 */
683static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
684{
685 spin_lock(&ubi->wl_lock);
686 list_add_tail(&wrk->list, &ubi->works);
687 ubi_assert(ubi->works_count >= 0);
688 ubi->works_count += 1;
689 if (ubi->thread_enabled)
690 wake_up_process(ubi->bgt_thread);
691 spin_unlock(&ubi->wl_lock);
692}
693
694static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
695 int cancel);
696
697/**
698 * schedule_erase - schedule an erase work.
699 * @ubi: UBI device description object
700 * @e: the WL entry of the physical eraseblock to erase
701 * @torture: if the physical eraseblock has to be tortured
702 *
703 * This function returns zero in case of success and a %-ENOMEM in case of
704 * failure.
705 */
706static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
707 int torture)
708{
709 struct ubi_work *wl_wrk;
710
711 dbg_wl("schedule erasure of PEB %d, EC %d, torture %d",
712 e->pnum, e->ec, torture);
713
33818bbb 714 wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
801c135c
AB
715 if (!wl_wrk)
716 return -ENOMEM;
717
718 wl_wrk->func = &erase_worker;
719 wl_wrk->e = e;
720 wl_wrk->torture = torture;
721
722 schedule_ubi_work(ubi, wl_wrk);
723 return 0;
724}
725
726/**
727 * wear_leveling_worker - wear-leveling worker function.
728 * @ubi: UBI device description object
729 * @wrk: the work object
730 * @cancel: non-zero if the worker has to free memory and exit
731 *
732 * This function copies a more worn out physical eraseblock to a less worn out
733 * one. Returns zero in case of success and a negative error code in case of
734 * failure.
735 */
736static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
737 int cancel)
738{
43f9b25a
AB
739 int err, put = 0, scrubbing = 0, protect = 0;
740 struct ubi_wl_prot_entry *pe;
801c135c
AB
741 struct ubi_wl_entry *e1, *e2;
742 struct ubi_vid_hdr *vid_hdr;
743
744 kfree(wrk);
745
746 if (cancel)
747 return 0;
748
33818bbb 749 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
801c135c
AB
750 if (!vid_hdr)
751 return -ENOMEM;
752
43f9b25a 753 mutex_lock(&ubi->move_mutex);
801c135c 754 spin_lock(&ubi->wl_lock);
43f9b25a
AB
755 ubi_assert(!ubi->move_from && !ubi->move_to);
756 ubi_assert(!ubi->move_to_put);
801c135c 757
43f9b25a 758 if (!ubi->free.rb_node ||
5abde384 759 (!ubi->used.rb_node && !ubi->scrub.rb_node)) {
801c135c 760 /*
43f9b25a
AB
761 * No free physical eraseblocks? Well, they must be waiting in
762 * the queue to be erased. Cancel movement - it will be
763 * triggered again when a free physical eraseblock appears.
801c135c
AB
764 *
765 * No used physical eraseblocks? They must be temporarily
766 * protected from being moved. They will be moved to the
767 * @ubi->used tree later and the wear-leveling will be
768 * triggered again.
769 */
770 dbg_wl("cancel WL, a list is empty: free %d, used %d",
5abde384 771 !ubi->free.rb_node, !ubi->used.rb_node);
43f9b25a 772 goto out_cancel;
801c135c
AB
773 }
774
5abde384 775 if (!ubi->scrub.rb_node) {
801c135c
AB
776 /*
777 * Now pick the least worn-out used physical eraseblock and a
778 * highly worn-out free physical eraseblock. If the erase
779 * counters differ much enough, start wear-leveling.
780 */
781 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb);
782 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
783
784 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
785 dbg_wl("no WL needed: min used EC %d, max free EC %d",
786 e1->ec, e2->ec);
43f9b25a 787 goto out_cancel;
801c135c 788 }
5abde384
AB
789 paranoid_check_in_wl_tree(e1, &ubi->used);
790 rb_erase(&e1->rb, &ubi->used);
801c135c
AB
791 dbg_wl("move PEB %d EC %d to PEB %d EC %d",
792 e1->pnum, e1->ec, e2->pnum, e2->ec);
793 } else {
43f9b25a
AB
794 /* Perform scrubbing */
795 scrubbing = 1;
801c135c
AB
796 e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb);
797 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
5abde384 798 paranoid_check_in_wl_tree(e1, &ubi->scrub);
d2c46855 799 rb_erase(&e1->rb, &ubi->scrub);
801c135c
AB
800 dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
801 }
802
5abde384
AB
803 paranoid_check_in_wl_tree(e2, &ubi->free);
804 rb_erase(&e2->rb, &ubi->free);
801c135c
AB
805 ubi->move_from = e1;
806 ubi->move_to = e2;
807 spin_unlock(&ubi->wl_lock);
808
809 /*
810 * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum.
811 * We so far do not know which logical eraseblock our physical
812 * eraseblock (@e1) belongs to. We have to read the volume identifier
813 * header first.
43f9b25a
AB
814 *
815 * Note, we are protected from this PEB being unmapped and erased. The
816 * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB
817 * which is being moved was unmapped.
801c135c
AB
818 */
819
820 err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
821 if (err && err != UBI_IO_BITFLIPS) {
822 if (err == UBI_IO_PEB_FREE) {
823 /*
824 * We are trying to move PEB without a VID header. UBI
825 * always write VID headers shortly after the PEB was
826 * given, so we have a situation when it did not have
827 * chance to write it down because it was preempted.
828 * Just re-schedule the work, so that next time it will
829 * likely have the VID header in place.
830 */
831 dbg_wl("PEB %d has no VID header", e1->pnum);
43f9b25a 832 goto out_not_moved;
801c135c 833 }
43f9b25a
AB
834
835 ubi_err("error %d while reading VID header from PEB %d",
836 err, e1->pnum);
837 if (err > 0)
838 err = -EIO;
839 goto out_error;
801c135c
AB
840 }
841
842 err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
843 if (err) {
43f9b25a
AB
844
845 if (err < 0)
846 goto out_error;
847 if (err == 1)
848 goto out_not_moved;
849
850 /*
851 * For some reason the LEB was not moved - it might be because
852 * the volume is being deleted. We should prevent this PEB from
853 * being selected for wear-levelling movement for some "time",
854 * so put it to the protection tree.
855 */
856
857 dbg_wl("cancelled moving PEB %d", e1->pnum);
858 pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
859 if (!pe) {
860 err = -ENOMEM;
861 goto out_error;
862 }
863
864 protect = 1;
801c135c
AB
865 }
866
867 ubi_free_vid_hdr(ubi, vid_hdr);
868 spin_lock(&ubi->wl_lock);
43f9b25a
AB
869 if (protect)
870 prot_tree_add(ubi, e1, pe, protect);
801c135c 871 if (!ubi->move_to_put)
5abde384 872 wl_tree_add(e2, &ubi->used);
801c135c
AB
873 else
874 put = 1;
875 ubi->move_from = ubi->move_to = NULL;
43f9b25a 876 ubi->move_to_put = ubi->wl_scheduled = 0;
801c135c
AB
877 spin_unlock(&ubi->wl_lock);
878
879 if (put) {
880 /*
881 * Well, the target PEB was put meanwhile, schedule it for
882 * erasure.
883 */
884 dbg_wl("PEB %d was put meanwhile, erase", e2->pnum);
885 err = schedule_erase(ubi, e2, 0);
43f9b25a
AB
886 if (err)
887 goto out_error;
801c135c
AB
888 }
889
43f9b25a
AB
890 if (!protect) {
891 err = schedule_erase(ubi, e1, 0);
892 if (err)
893 goto out_error;
801c135c
AB
894 }
895
43f9b25a 896
801c135c 897 dbg_wl("done");
43f9b25a
AB
898 mutex_unlock(&ubi->move_mutex);
899 return 0;
801c135c
AB
900
901 /*
43f9b25a
AB
902 * For some reasons the LEB was not moved, might be an error, might be
903 * something else. @e1 was not changed, so return it back. @e2 might
904 * be changed, schedule it for erasure.
801c135c 905 */
43f9b25a 906out_not_moved:
801c135c
AB
907 ubi_free_vid_hdr(ubi, vid_hdr);
908 spin_lock(&ubi->wl_lock);
43f9b25a
AB
909 if (scrubbing)
910 wl_tree_add(e1, &ubi->scrub);
801c135c 911 else
5abde384 912 wl_tree_add(e1, &ubi->used);
801c135c 913 ubi->move_from = ubi->move_to = NULL;
43f9b25a 914 ubi->move_to_put = ubi->wl_scheduled = 0;
801c135c
AB
915 spin_unlock(&ubi->wl_lock);
916
801c135c 917 err = schedule_erase(ubi, e2, 0);
43f9b25a
AB
918 if (err)
919 goto out_error;
920
921 mutex_unlock(&ubi->move_mutex);
922 return 0;
923
924out_error:
925 ubi_err("error %d while moving PEB %d to PEB %d",
926 err, e1->pnum, e2->pnum);
801c135c 927
43f9b25a
AB
928 ubi_free_vid_hdr(ubi, vid_hdr);
929 spin_lock(&ubi->wl_lock);
930 ubi->move_from = ubi->move_to = NULL;
931 ubi->move_to_put = ubi->wl_scheduled = 0;
932 spin_unlock(&ubi->wl_lock);
933
934 kmem_cache_free(ubi_wl_entry_slab, e1);
935 kmem_cache_free(ubi_wl_entry_slab, e2);
936 ubi_ro_mode(ubi);
937
938 mutex_unlock(&ubi->move_mutex);
801c135c 939 return err;
43f9b25a
AB
940
941out_cancel:
942 ubi->wl_scheduled = 0;
943 spin_unlock(&ubi->wl_lock);
944 mutex_unlock(&ubi->move_mutex);
945 ubi_free_vid_hdr(ubi, vid_hdr);
946 return 0;
801c135c
AB
947}
948
949/**
950 * ensure_wear_leveling - schedule wear-leveling if it is needed.
951 * @ubi: UBI device description object
952 *
953 * This function checks if it is time to start wear-leveling and schedules it
954 * if yes. This function returns zero in case of success and a negative error
955 * code in case of failure.
956 */
957static int ensure_wear_leveling(struct ubi_device *ubi)
958{
959 int err = 0;
960 struct ubi_wl_entry *e1;
961 struct ubi_wl_entry *e2;
962 struct ubi_work *wrk;
963
964 spin_lock(&ubi->wl_lock);
965 if (ubi->wl_scheduled)
966 /* Wear-leveling is already in the work queue */
967 goto out_unlock;
968
969 /*
970 * If the ubi->scrub tree is not empty, scrubbing is needed, and the
971 * the WL worker has to be scheduled anyway.
972 */
5abde384
AB
973 if (!ubi->scrub.rb_node) {
974 if (!ubi->used.rb_node || !ubi->free.rb_node)
801c135c
AB
975 /* No physical eraseblocks - no deal */
976 goto out_unlock;
977
978 /*
979 * We schedule wear-leveling only if the difference between the
980 * lowest erase counter of used physical eraseblocks and a high
981 * erase counter of free physical eraseblocks is greater then
982 * %UBI_WL_THRESHOLD.
983 */
984 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb);
985 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
986
987 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
988 goto out_unlock;
989 dbg_wl("schedule wear-leveling");
990 } else
991 dbg_wl("schedule scrubbing");
992
993 ubi->wl_scheduled = 1;
994 spin_unlock(&ubi->wl_lock);
995
33818bbb 996 wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
801c135c
AB
997 if (!wrk) {
998 err = -ENOMEM;
999 goto out_cancel;
1000 }
1001
1002 wrk->func = &wear_leveling_worker;
1003 schedule_ubi_work(ubi, wrk);
1004 return err;
1005
1006out_cancel:
1007 spin_lock(&ubi->wl_lock);
1008 ubi->wl_scheduled = 0;
1009out_unlock:
1010 spin_unlock(&ubi->wl_lock);
1011 return err;
1012}
1013
1014/**
1015 * erase_worker - physical eraseblock erase worker function.
1016 * @ubi: UBI device description object
1017 * @wl_wrk: the work object
1018 * @cancel: non-zero if the worker has to free memory and exit
1019 *
1020 * This function erases a physical eraseblock and perform torture testing if
1021 * needed. It also takes care about marking the physical eraseblock bad if
1022 * needed. Returns zero in case of success and a negative error code in case of
1023 * failure.
1024 */
1025static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
1026 int cancel)
1027{
801c135c 1028 struct ubi_wl_entry *e = wl_wrk->e;
784c1454 1029 int pnum = e->pnum, err, need;
801c135c
AB
1030
1031 if (cancel) {
1032 dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec);
1033 kfree(wl_wrk);
06b68ba1 1034 kmem_cache_free(ubi_wl_entry_slab, e);
801c135c
AB
1035 return 0;
1036 }
1037
1038 dbg_wl("erase PEB %d EC %d", pnum, e->ec);
1039
1040 err = sync_erase(ubi, e, wl_wrk->torture);
1041 if (!err) {
1042 /* Fine, we've erased it successfully */
1043 kfree(wl_wrk);
1044
1045 spin_lock(&ubi->wl_lock);
1046 ubi->abs_ec += 1;
5abde384 1047 wl_tree_add(e, &ubi->free);
801c135c
AB
1048 spin_unlock(&ubi->wl_lock);
1049
1050 /*
1051 * One more erase operation has happened, take care about protected
1052 * physical eraseblocks.
1053 */
1054 check_protection_over(ubi);
1055
1056 /* And take care about wear-leveling */
1057 err = ensure_wear_leveling(ubi);
1058 return err;
1059 }
1060
8d2d4011 1061 ubi_err("failed to erase PEB %d, error %d", pnum, err);
801c135c 1062 kfree(wl_wrk);
06b68ba1 1063 kmem_cache_free(ubi_wl_entry_slab, e);
801c135c 1064
784c1454
AB
1065 if (err == -EINTR || err == -ENOMEM || err == -EAGAIN ||
1066 err == -EBUSY) {
1067 int err1;
1068
1069 /* Re-schedule the LEB for erasure */
1070 err1 = schedule_erase(ubi, e, 0);
1071 if (err1) {
1072 err = err1;
1073 goto out_ro;
1074 }
1075 return err;
1076 } else if (err != -EIO) {
801c135c
AB
1077 /*
1078 * If this is not %-EIO, we have no idea what to do. Scheduling
1079 * this physical eraseblock for erasure again would cause
1080 * errors again and again. Well, lets switch to RO mode.
1081 */
784c1454 1082 goto out_ro;
801c135c
AB
1083 }
1084
1085 /* It is %-EIO, the PEB went bad */
1086
1087 if (!ubi->bad_allowed) {
1088 ubi_err("bad physical eraseblock %d detected", pnum);
784c1454
AB
1089 goto out_ro;
1090 }
801c135c 1091
784c1454
AB
1092 spin_lock(&ubi->volumes_lock);
1093 need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1;
1094 if (need > 0) {
1095 need = ubi->avail_pebs >= need ? need : ubi->avail_pebs;
1096 ubi->avail_pebs -= need;
1097 ubi->rsvd_pebs += need;
1098 ubi->beb_rsvd_pebs += need;
1099 if (need > 0)
1100 ubi_msg("reserve more %d PEBs", need);
1101 }
801c135c 1102
784c1454 1103 if (ubi->beb_rsvd_pebs == 0) {
801c135c 1104 spin_unlock(&ubi->volumes_lock);
784c1454
AB
1105 ubi_err("no reserved physical eraseblocks");
1106 goto out_ro;
1107 }
801c135c 1108
784c1454
AB
1109 spin_unlock(&ubi->volumes_lock);
1110 ubi_msg("mark PEB %d as bad", pnum);
801c135c 1111
784c1454
AB
1112 err = ubi_io_mark_bad(ubi, pnum);
1113 if (err)
1114 goto out_ro;
1115
1116 spin_lock(&ubi->volumes_lock);
1117 ubi->beb_rsvd_pebs -= 1;
1118 ubi->bad_peb_count += 1;
1119 ubi->good_peb_count -= 1;
1120 ubi_calculate_reserved(ubi);
1121 if (ubi->beb_rsvd_pebs == 0)
1122 ubi_warn("last PEB from the reserved pool was used");
1123 spin_unlock(&ubi->volumes_lock);
1124
1125 return err;
801c135c 1126
784c1454
AB
1127out_ro:
1128 ubi_ro_mode(ubi);
801c135c
AB
1129 return err;
1130}
1131
1132/**
43f9b25a 1133 * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit.
801c135c
AB
1134 * @ubi: UBI device description object
1135 * @pnum: physical eraseblock to return
1136 * @torture: if this physical eraseblock has to be tortured
1137 *
1138 * This function is called to return physical eraseblock @pnum to the pool of
1139 * free physical eraseblocks. The @torture flag has to be set if an I/O error
1140 * occurred to this @pnum and it has to be tested. This function returns zero
43f9b25a 1141 * in case of success, and a negative error code in case of failure.
801c135c
AB
1142 */
1143int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
1144{
1145 int err;
1146 struct ubi_wl_entry *e;
1147
1148 dbg_wl("PEB %d", pnum);
1149 ubi_assert(pnum >= 0);
1150 ubi_assert(pnum < ubi->peb_count);
1151
43f9b25a 1152retry:
801c135c 1153 spin_lock(&ubi->wl_lock);
801c135c
AB
1154 e = ubi->lookuptbl[pnum];
1155 if (e == ubi->move_from) {
1156 /*
1157 * User is putting the physical eraseblock which was selected to
1158 * be moved. It will be scheduled for erasure in the
1159 * wear-leveling worker.
1160 */
43f9b25a 1161 dbg_wl("PEB %d is being moved, wait", pnum);
801c135c 1162 spin_unlock(&ubi->wl_lock);
43f9b25a
AB
1163
1164 /* Wait for the WL worker by taking the @ubi->move_mutex */
1165 mutex_lock(&ubi->move_mutex);
1166 mutex_unlock(&ubi->move_mutex);
1167 goto retry;
801c135c
AB
1168 } else if (e == ubi->move_to) {
1169 /*
1170 * User is putting the physical eraseblock which was selected
1171 * as the target the data is moved to. It may happen if the EBA
d2c46855
AB
1172 * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but
1173 * the WL unit has not put the PEB to the "used" tree yet, but
1174 * it is about to do this. So we just set a flag which will
1175 * tell the WL worker that the PEB is not needed anymore and
1176 * should be sheduled for erasure.
801c135c
AB
1177 */
1178 dbg_wl("PEB %d is the target of data moving", pnum);
1179 ubi_assert(!ubi->move_to_put);
1180 ubi->move_to_put = 1;
1181 spin_unlock(&ubi->wl_lock);
1182 return 0;
1183 } else {
5abde384
AB
1184 if (in_wl_tree(e, &ubi->used)) {
1185 paranoid_check_in_wl_tree(e, &ubi->used);
1186 rb_erase(&e->rb, &ubi->used);
1187 } else if (in_wl_tree(e, &ubi->scrub)) {
1188 paranoid_check_in_wl_tree(e, &ubi->scrub);
1189 rb_erase(&e->rb, &ubi->scrub);
43f9b25a
AB
1190 } else {
1191 err = prot_tree_del(ubi, e->pnum);
1192 if (err) {
1193 ubi_err("PEB %d not found", pnum);
1194 ubi_ro_mode(ubi);
1195 spin_unlock(&ubi->wl_lock);
1196 return err;
1197 }
1198 }
801c135c
AB
1199 }
1200 spin_unlock(&ubi->wl_lock);
1201
1202 err = schedule_erase(ubi, e, torture);
1203 if (err) {
1204 spin_lock(&ubi->wl_lock);
5abde384 1205 wl_tree_add(e, &ubi->used);
801c135c
AB
1206 spin_unlock(&ubi->wl_lock);
1207 }
1208
1209 return err;
1210}
1211
1212/**
1213 * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing.
1214 * @ubi: UBI device description object
1215 * @pnum: the physical eraseblock to schedule
1216 *
1217 * If a bit-flip in a physical eraseblock is detected, this physical eraseblock
1218 * needs scrubbing. This function schedules a physical eraseblock for
1219 * scrubbing which is done in background. This function returns zero in case of
1220 * success and a negative error code in case of failure.
1221 */
1222int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
1223{
1224 struct ubi_wl_entry *e;
1225
1226 ubi_msg("schedule PEB %d for scrubbing", pnum);
1227
1228retry:
1229 spin_lock(&ubi->wl_lock);
1230 e = ubi->lookuptbl[pnum];
1231 if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) {
1232 spin_unlock(&ubi->wl_lock);
1233 return 0;
1234 }
1235
1236 if (e == ubi->move_to) {
1237 /*
1238 * This physical eraseblock was used to move data to. The data
1239 * was moved but the PEB was not yet inserted to the proper
1240 * tree. We should just wait a little and let the WL worker
1241 * proceed.
1242 */
1243 spin_unlock(&ubi->wl_lock);
1244 dbg_wl("the PEB %d is not in proper tree, retry", pnum);
1245 yield();
1246 goto retry;
1247 }
1248
5abde384
AB
1249 if (in_wl_tree(e, &ubi->used)) {
1250 paranoid_check_in_wl_tree(e, &ubi->used);
1251 rb_erase(&e->rb, &ubi->used);
43f9b25a
AB
1252 } else {
1253 int err;
1254
1255 err = prot_tree_del(ubi, e->pnum);
1256 if (err) {
1257 ubi_err("PEB %d not found", pnum);
1258 ubi_ro_mode(ubi);
1259 spin_unlock(&ubi->wl_lock);
1260 return err;
1261 }
1262 }
801c135c 1263
5abde384 1264 wl_tree_add(e, &ubi->scrub);
801c135c
AB
1265 spin_unlock(&ubi->wl_lock);
1266
1267 /*
1268 * Technically scrubbing is the same as wear-leveling, so it is done
1269 * by the WL worker.
1270 */
1271 return ensure_wear_leveling(ubi);
1272}
1273
1274/**
1275 * ubi_wl_flush - flush all pending works.
1276 * @ubi: UBI device description object
1277 *
1278 * This function returns zero in case of success and a negative error code in
1279 * case of failure.
1280 */
1281int ubi_wl_flush(struct ubi_device *ubi)
1282{
1283 int err, pending_count;
1284
1285 pending_count = ubi->works_count;
1286
1287 dbg_wl("flush (%d pending works)", pending_count);
1288
1289 /*
1290 * Erase while the pending works queue is not empty, but not more then
1291 * the number of currently pending works.
1292 */
1293 while (pending_count-- > 0) {
1294 err = do_work(ubi);
1295 if (err)
1296 return err;
1297 }
1298
1299 return 0;
1300}
1301
1302/**
1303 * tree_destroy - destroy an RB-tree.
1304 * @root: the root of the tree to destroy
1305 */
1306static void tree_destroy(struct rb_root *root)
1307{
1308 struct rb_node *rb;
1309 struct ubi_wl_entry *e;
1310
1311 rb = root->rb_node;
1312 while (rb) {
1313 if (rb->rb_left)
1314 rb = rb->rb_left;
1315 else if (rb->rb_right)
1316 rb = rb->rb_right;
1317 else {
1318 e = rb_entry(rb, struct ubi_wl_entry, rb);
1319
1320 rb = rb_parent(rb);
1321 if (rb) {
1322 if (rb->rb_left == &e->rb)
1323 rb->rb_left = NULL;
1324 else
1325 rb->rb_right = NULL;
1326 }
1327
06b68ba1 1328 kmem_cache_free(ubi_wl_entry_slab, e);
801c135c
AB
1329 }
1330 }
1331}
1332
1333/**
1334 * ubi_thread - UBI background thread.
1335 * @u: the UBI device description object pointer
1336 */
1337static int ubi_thread(void *u)
1338{
1339 int failures = 0;
1340 struct ubi_device *ubi = u;
1341
1342 ubi_msg("background thread \"%s\" started, PID %d",
ba25f9dc 1343 ubi->bgt_name, task_pid_nr(current));
801c135c 1344
83144186 1345 set_freezable();
801c135c
AB
1346 for (;;) {
1347 int err;
1348
1349 if (kthread_should_stop())
1350 goto out;
1351
1352 if (try_to_freeze())
1353 continue;
1354
1355 spin_lock(&ubi->wl_lock);
1356 if (list_empty(&ubi->works) || ubi->ro_mode ||
1357 !ubi->thread_enabled) {
1358 set_current_state(TASK_INTERRUPTIBLE);
1359 spin_unlock(&ubi->wl_lock);
1360 schedule();
1361 continue;
1362 }
1363 spin_unlock(&ubi->wl_lock);
1364
1365 err = do_work(ubi);
1366 if (err) {
1367 ubi_err("%s: work failed with error code %d",
1368 ubi->bgt_name, err);
1369 if (failures++ > WL_MAX_FAILURES) {
1370 /*
1371 * Too many failures, disable the thread and
1372 * switch to read-only mode.
1373 */
1374 ubi_msg("%s: %d consecutive failures",
1375 ubi->bgt_name, WL_MAX_FAILURES);
1376 ubi_ro_mode(ubi);
1377 break;
1378 }
1379 } else
1380 failures = 0;
1381
1382 cond_resched();
1383 }
1384
1385out:
1386 dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
1387 return 0;
1388}
1389
1390/**
1391 * cancel_pending - cancel all pending works.
1392 * @ubi: UBI device description object
1393 */
1394static void cancel_pending(struct ubi_device *ubi)
1395{
1396 while (!list_empty(&ubi->works)) {
1397 struct ubi_work *wrk;
1398
1399 wrk = list_entry(ubi->works.next, struct ubi_work, list);
1400 list_del(&wrk->list);
1401 wrk->func(ubi, wrk, 1);
1402 ubi->works_count -= 1;
1403 ubi_assert(ubi->works_count >= 0);
1404 }
1405}
1406
1407/**
1408 * ubi_wl_init_scan - initialize the wear-leveling unit using scanning
1409 * information.
1410 * @ubi: UBI device description object
1411 * @si: scanning information
1412 *
1413 * This function returns zero in case of success, and a negative error code in
1414 * case of failure.
1415 */
1416int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
1417{
1418 int err;
1419 struct rb_node *rb1, *rb2;
1420 struct ubi_scan_volume *sv;
1421 struct ubi_scan_leb *seb, *tmp;
1422 struct ubi_wl_entry *e;
1423
1424
1425 ubi->used = ubi->free = ubi->scrub = RB_ROOT;
1426 ubi->prot.pnum = ubi->prot.aec = RB_ROOT;
1427 spin_lock_init(&ubi->wl_lock);
43f9b25a 1428 mutex_init(&ubi->move_mutex);
801c135c
AB
1429 ubi->max_ec = si->max_ec;
1430 INIT_LIST_HEAD(&ubi->works);
1431
1432 sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num);
1433
1434 ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
1435 if (IS_ERR(ubi->bgt_thread)) {
1436 err = PTR_ERR(ubi->bgt_thread);
1437 ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name,
1438 err);
1439 return err;
1440 }
1441
801c135c
AB
1442 err = -ENOMEM;
1443 ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL);
1444 if (!ubi->lookuptbl)
1445 goto out_free;
1446
1447 list_for_each_entry_safe(seb, tmp, &si->erase, u.list) {
1448 cond_resched();
1449
06b68ba1 1450 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
801c135c
AB
1451 if (!e)
1452 goto out_free;
1453
1454 e->pnum = seb->pnum;
1455 e->ec = seb->ec;
1456 ubi->lookuptbl[e->pnum] = e;
1457 if (schedule_erase(ubi, e, 0)) {
06b68ba1 1458 kmem_cache_free(ubi_wl_entry_slab, e);
801c135c
AB
1459 goto out_free;
1460 }
1461 }
1462
1463 list_for_each_entry(seb, &si->free, u.list) {
1464 cond_resched();
1465
06b68ba1 1466 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
801c135c
AB
1467 if (!e)
1468 goto out_free;
1469
1470 e->pnum = seb->pnum;
1471 e->ec = seb->ec;
1472 ubi_assert(e->ec >= 0);
5abde384 1473 wl_tree_add(e, &ubi->free);
801c135c
AB
1474 ubi->lookuptbl[e->pnum] = e;
1475 }
1476
1477 list_for_each_entry(seb, &si->corr, u.list) {
1478 cond_resched();
1479
06b68ba1 1480 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
801c135c
AB
1481 if (!e)
1482 goto out_free;
1483
1484 e->pnum = seb->pnum;
1485 e->ec = seb->ec;
1486 ubi->lookuptbl[e->pnum] = e;
1487 if (schedule_erase(ubi, e, 0)) {
06b68ba1 1488 kmem_cache_free(ubi_wl_entry_slab, e);
801c135c
AB
1489 goto out_free;
1490 }
1491 }
1492
1493 ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
1494 ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
1495 cond_resched();
1496
06b68ba1 1497 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
801c135c
AB
1498 if (!e)
1499 goto out_free;
1500
1501 e->pnum = seb->pnum;
1502 e->ec = seb->ec;
1503 ubi->lookuptbl[e->pnum] = e;
1504 if (!seb->scrub) {
1505 dbg_wl("add PEB %d EC %d to the used tree",
1506 e->pnum, e->ec);
5abde384 1507 wl_tree_add(e, &ubi->used);
801c135c
AB
1508 } else {
1509 dbg_wl("add PEB %d EC %d to the scrub tree",
1510 e->pnum, e->ec);
5abde384 1511 wl_tree_add(e, &ubi->scrub);
801c135c
AB
1512 }
1513 }
1514 }
1515
5abde384 1516 if (ubi->avail_pebs < WL_RESERVED_PEBS) {
801c135c
AB
1517 ubi_err("no enough physical eraseblocks (%d, need %d)",
1518 ubi->avail_pebs, WL_RESERVED_PEBS);
1519 goto out_free;
1520 }
1521 ubi->avail_pebs -= WL_RESERVED_PEBS;
1522 ubi->rsvd_pebs += WL_RESERVED_PEBS;
1523
1524 /* Schedule wear-leveling if needed */
1525 err = ensure_wear_leveling(ubi);
1526 if (err)
1527 goto out_free;
1528
1529 return 0;
1530
1531out_free:
1532 cancel_pending(ubi);
1533 tree_destroy(&ubi->used);
1534 tree_destroy(&ubi->free);
1535 tree_destroy(&ubi->scrub);
1536 kfree(ubi->lookuptbl);
801c135c
AB
1537 return err;
1538}
1539
1540/**
1541 * protection_trees_destroy - destroy the protection RB-trees.
1542 * @ubi: UBI device description object
1543 */
1544static void protection_trees_destroy(struct ubi_device *ubi)
1545{
1546 struct rb_node *rb;
1547 struct ubi_wl_prot_entry *pe;
1548
1549 rb = ubi->prot.aec.rb_node;
1550 while (rb) {
1551 if (rb->rb_left)
1552 rb = rb->rb_left;
1553 else if (rb->rb_right)
1554 rb = rb->rb_right;
1555 else {
1556 pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec);
1557
1558 rb = rb_parent(rb);
1559 if (rb) {
1560 if (rb->rb_left == &pe->rb_aec)
1561 rb->rb_left = NULL;
1562 else
1563 rb->rb_right = NULL;
1564 }
1565
06b68ba1 1566 kmem_cache_free(ubi_wl_entry_slab, pe->e);
801c135c
AB
1567 kfree(pe);
1568 }
1569 }
1570}
1571
1572/**
1573 * ubi_wl_close - close the wear-leveling unit.
1574 * @ubi: UBI device description object
1575 */
1576void ubi_wl_close(struct ubi_device *ubi)
1577{
1578 dbg_wl("disable \"%s\"", ubi->bgt_name);
1579 if (ubi->bgt_thread)
1580 kthread_stop(ubi->bgt_thread);
1581
1582 dbg_wl("close the UBI wear-leveling unit");
1583
1584 cancel_pending(ubi);
1585 protection_trees_destroy(ubi);
1586 tree_destroy(&ubi->used);
1587 tree_destroy(&ubi->free);
1588 tree_destroy(&ubi->scrub);
1589 kfree(ubi->lookuptbl);
801c135c
AB
1590}
1591
1592#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
1593
1594/**
1595 * paranoid_check_ec - make sure that the erase counter of a physical eraseblock
1596 * is correct.
1597 * @ubi: UBI device description object
1598 * @pnum: the physical eraseblock number to check
1599 * @ec: the erase counter to check
1600 *
1601 * This function returns zero if the erase counter of physical eraseblock @pnum
1602 * is equivalent to @ec, %1 if not, and a negative error code if an error
1603 * occurred.
1604 */
e88d6e10 1605static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec)
801c135c
AB
1606{
1607 int err;
1608 long long read_ec;
1609 struct ubi_ec_hdr *ec_hdr;
1610
33818bbb 1611 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
801c135c
AB
1612 if (!ec_hdr)
1613 return -ENOMEM;
1614
1615 err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0);
1616 if (err && err != UBI_IO_BITFLIPS) {
1617 /* The header does not have to exist */
1618 err = 0;
1619 goto out_free;
1620 }
1621
3261ebd7 1622 read_ec = be64_to_cpu(ec_hdr->ec);
801c135c
AB
1623 if (ec != read_ec) {
1624 ubi_err("paranoid check failed for PEB %d", pnum);
1625 ubi_err("read EC is %lld, should be %d", read_ec, ec);
1626 ubi_dbg_dump_stack();
1627 err = 1;
1628 } else
1629 err = 0;
1630
1631out_free:
1632 kfree(ec_hdr);
1633 return err;
1634}
1635
1636/**
1637 * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present
1638 * in a WL RB-tree.
1639 * @e: the wear-leveling entry to check
1640 * @root: the root of the tree
1641 *
1642 * This function returns zero if @e is in the @root RB-tree and %1 if it
1643 * is not.
1644 */
1645static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
1646 struct rb_root *root)
1647{
1648 if (in_wl_tree(e, root))
1649 return 0;
1650
1651 ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ",
1652 e->pnum, e->ec, root);
1653 ubi_dbg_dump_stack();
1654 return 1;
1655}
1656
1657#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */