fs/btrfs/tree-checker.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) Qu Wenruo 2017.  All rights reserved.
   4  */
   5
   6 /*
   7  * The module is used to catch unexpected/corrupted tree block data.
   8  * Such behavior can be caused either by a fuzzed image or bugs.
   9  *
  10  * The objective is to do leaf/node validation checks when tree block is read
  11  * from disk, and check *every* possible member, so other code won't
  12  * need to checking them again.
  13  *
  14  * Due to the potential and unwanted damage, every checker needs to be
  15  * carefully reviewed otherwise so it does not prevent mount of valid images.
  16  */
  17
  18 #include <linux/types.h>
  19 #include <linux/stddef.h>
  20 #include <linux/error-injection.h>
  21 #include "ctree.h"
  22 #include "tree-checker.h"
  23 #include "disk-io.h"
  24 #include "compression.h"
  25 #include "volumes.h"
  26
  27 /*
  28  * Error message should follow the following format:
  29  * corrupt <type>: <identifier>, <reason>[, <bad_value>]
  30  *
  31  * @type:       leaf or node
  32  * @identifier: the necessary info to locate the leaf/node.
  33  *              It's recommended to decode key.objecitd/offset if it's
  34  *              meaningful.
  35  * @reason:     describe the error
  36  * @bad_value:  optional, it's recommended to output bad value and its
  37  *              expected value (range).
  38  *
  39  * Since comma is used to separate the components, only space is allowed
  40  * inside each component.
  41  */
  42
  43 /*
  44  * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
  45  * Allows callers to customize the output.
  46  */
  47 __printf(3, 4)
  48 __cold
  49 static void generic_err(const struct extent_buffer *eb, int slot,
  50                         const char *fmt, ...)
  51 {
  52         const struct btrfs_fs_info *fs_info = eb->fs_info;
  53         struct va_format vaf;
  54         va_list args;
  55
  56         va_start(args, fmt);
  57
  58         vaf.fmt = fmt;
  59         vaf.va = &args;
  60
  61         btrfs_crit(fs_info,
  62                 "corrupt %s: root=%llu block=%llu slot=%d, %pV",
  63                 btrfs_header_level(eb) == 0 ? "leaf" : "node",
  64                 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf);
  65         va_end(args);
  66 }
  67
  68 /*
  69  * Customized reporter for extent data item, since its key objectid and
  70  * offset has its own meaning.
  71  */
  72 __printf(3, 4)
  73 __cold
  74 static void file_extent_err(const struct extent_buffer *eb, int slot,
  75                             const char *fmt, ...)
  76 {
  77         const struct btrfs_fs_info *fs_info = eb->fs_info;
  78         struct btrfs_key key;
  79         struct va_format vaf;
  80         va_list args;
  81
  82         btrfs_item_key_to_cpu(eb, &key, slot);
  83         va_start(args, fmt);
  84
  85         vaf.fmt = fmt;
  86         vaf.va = &args;
  87
  88         btrfs_crit(fs_info,
  89         "corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
  90                 btrfs_header_level(eb) == 0 ? "leaf" : "node",
  91                 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
  92                 key.objectid, key.offset, &vaf);
  93         va_end(args);
  94 }
  95
  96 /*
  97  * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
  98  * Else return 1
  99  */
 100 #define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment)                     \
 101 ({                                                                            \
 102         if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
 103                 file_extent_err((leaf), (slot),                               \
 104         "invalid %s for file extent, have %llu, should be aligned to %u",     \
 105                         (#name), btrfs_file_extent_##name((leaf), (fi)),      \
 106                         (alignment));                                         \
 107         (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
 108 })
 109
 110 static u64 file_extent_end(struct extent_buffer *leaf,
 111                            struct btrfs_key *key,
 112                            struct btrfs_file_extent_item *extent)
 113 {
 114         u64 end;
 115         u64 len;
 116
 117         if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) {
 118                 len = btrfs_file_extent_ram_bytes(leaf, extent);
 119                 end = ALIGN(key->offset + len, leaf->fs_info->sectorsize);
 120         } else {
 121                 len = btrfs_file_extent_num_bytes(leaf, extent);
 122                 end = key->offset + len;
 123         }
 124         return end;
 125 }
 126
 127 static int check_extent_data_item(struct extent_buffer *leaf,
 128                                   struct btrfs_key *key, int slot,
 129                                   struct btrfs_key *prev_key)
 130 {
 131         struct btrfs_fs_info *fs_info = leaf->fs_info;
 132         struct btrfs_file_extent_item *fi;
 133         u32 sectorsize = fs_info->sectorsize;
 134         u32 item_size = btrfs_item_size_nr(leaf, slot);
 135
 136         if (!IS_ALIGNED(key->offset, sectorsize)) {
 137                 file_extent_err(leaf, slot,
 138 "unaligned file_offset for file extent, have %llu should be aligned to %u",
 139                         key->offset, sectorsize);
 140                 return -EUCLEAN;
 141         }
 142
 143         fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
 144
 145         if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
 146                 file_extent_err(leaf, slot,
 147                 "invalid type for file extent, have %u expect range [0, %u]",
 148                         btrfs_file_extent_type(leaf, fi),
 149                         BTRFS_FILE_EXTENT_TYPES);
 150                 return -EUCLEAN;
 151         }
 152
 153         /*
 154          * Support for new compression/encryption must introduce incompat flag,
 155          * and must be caught in open_ctree().
 156          */
 157         if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
 158                 file_extent_err(leaf, slot,
 159         "invalid compression for file extent, have %u expect range [0, %u]",
 160                         btrfs_file_extent_compression(leaf, fi),
 161                         BTRFS_COMPRESS_TYPES);
 162                 return -EUCLEAN;
 163         }
 164         if (btrfs_file_extent_encryption(leaf, fi)) {
 165                 file_extent_err(leaf, slot,
 166                         "invalid encryption for file extent, have %u expect 0",
 167                         btrfs_file_extent_encryption(leaf, fi));
 168                 return -EUCLEAN;
 169         }
 170         if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
 171                 /* Inline extent must have 0 as key offset */
 172                 if (key->offset) {
 173                         file_extent_err(leaf, slot,
 174                 "invalid file_offset for inline file extent, have %llu expect 0",
 175                                 key->offset);
 176                         return -EUCLEAN;
 177                 }
 178
 179                 /* Compressed inline extent has no on-disk size, skip it */
 180                 if (btrfs_file_extent_compression(leaf, fi) !=
 181                     BTRFS_COMPRESS_NONE)
 182                         return 0;
 183
 184                 /* Uncompressed inline extent size must match item size */
 185                 if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
 186                     btrfs_file_extent_ram_bytes(leaf, fi)) {
 187                         file_extent_err(leaf, slot,
 188         "invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
 189                                 item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
 190                                 btrfs_file_extent_ram_bytes(leaf, fi));
 191                         return -EUCLEAN;
 192                 }
 193                 return 0;
 194         }
 195
 196         /* Regular or preallocated extent has fixed item size */
 197         if (item_size != sizeof(*fi)) {
 198                 file_extent_err(leaf, slot,
 199         "invalid item size for reg/prealloc file extent, have %u expect %zu",
 200                         item_size, sizeof(*fi));
 201                 return -EUCLEAN;
 202         }
 203         if (CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) ||
 204             CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) ||
 205             CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) ||
 206             CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) ||
 207             CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))
 208                 return -EUCLEAN;
 209
 210         /*
 211          * Check that no two consecutive file extent items, in the same leaf,
 212          * present ranges that overlap each other.
 213          */
 214         if (slot > 0 &&
 215             prev_key->objectid == key->objectid &&
 216             prev_key->type == BTRFS_EXTENT_DATA_KEY) {
 217                 struct btrfs_file_extent_item *prev_fi;
 218                 u64 prev_end;
 219
 220                 prev_fi = btrfs_item_ptr(leaf, slot - 1,
 221                                          struct btrfs_file_extent_item);
 222                 prev_end = file_extent_end(leaf, prev_key, prev_fi);
 223                 if (prev_end > key->offset) {
 224                         file_extent_err(leaf, slot - 1,
 225 "file extent end range (%llu) goes beyond start offset (%llu) of the next file extent",
 226                                         prev_end, key->offset);
 227                         return -EUCLEAN;
 228                 }
 229         }
 230
 231         return 0;
 232 }
 233
 234 static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
 235                            int slot)
 236 {
 237         struct btrfs_fs_info *fs_info = leaf->fs_info;
 238         u32 sectorsize = fs_info->sectorsize;
 239         u32 csumsize = btrfs_super_csum_size(fs_info->super_copy);
 240
 241         if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
 242                 generic_err(leaf, slot,
 243                 "invalid key objectid for csum item, have %llu expect %llu",
 244                         key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
 245                 return -EUCLEAN;
 246         }
 247         if (!IS_ALIGNED(key->offset, sectorsize)) {
 248                 generic_err(leaf, slot,
 249         "unaligned key offset for csum item, have %llu should be aligned to %u",
 250                         key->offset, sectorsize);
 251                 return -EUCLEAN;
 252         }
 253         if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
 254                 generic_err(leaf, slot,
 255         "unaligned item size for csum item, have %u should be aligned to %u",
 256                         btrfs_item_size_nr(leaf, slot), csumsize);
 257                 return -EUCLEAN;
 258         }
 259         return 0;
 260 }
 261
 262 /*
 263  * Customized reported for dir_item, only important new info is key->objectid,
 264  * which represents inode number
 265  */
 266 __printf(3, 4)
 267 __cold
 268 static void dir_item_err(const struct extent_buffer *eb, int slot,
 269                          const char *fmt, ...)
 270 {
 271         const struct btrfs_fs_info *fs_info = eb->fs_info;
 272         struct btrfs_key key;
 273         struct va_format vaf;
 274         va_list args;
 275
 276         btrfs_item_key_to_cpu(eb, &key, slot);
 277         va_start(args, fmt);
 278
 279         vaf.fmt = fmt;
 280         vaf.va = &args;
 281
 282         btrfs_crit(fs_info,
 283         "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
 284                 btrfs_header_level(eb) == 0 ? "leaf" : "node",
 285                 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
 286                 key.objectid, &vaf);
 287         va_end(args);
 288 }
 289
 290 static int check_dir_item(struct extent_buffer *leaf,
 291                           struct btrfs_key *key, int slot)
 292 {
 293         struct btrfs_fs_info *fs_info = leaf->fs_info;
 294         struct btrfs_dir_item *di;
 295         u32 item_size = btrfs_item_size_nr(leaf, slot);
 296         u32 cur = 0;
 297
 298         di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
 299         while (cur < item_size) {
 300                 u32 name_len;
 301                 u32 data_len;
 302                 u32 max_name_len;
 303                 u32 total_size;
 304                 u32 name_hash;
 305                 u8 dir_type;
 306
 307                 /* header itself should not cross item boundary */
 308                 if (cur + sizeof(*di) > item_size) {
 309                         dir_item_err(leaf, slot,
 310                 "dir item header crosses item boundary, have %zu boundary %u",
 311                                 cur + sizeof(*di), item_size);
 312                         return -EUCLEAN;
 313                 }
 314
 315                 /* dir type check */
 316                 dir_type = btrfs_dir_type(leaf, di);
 317                 if (dir_type >= BTRFS_FT_MAX) {
 318                         dir_item_err(leaf, slot,
 319                         "invalid dir item type, have %u expect [0, %u)",
 320                                 dir_type, BTRFS_FT_MAX);
 321                         return -EUCLEAN;
 322                 }
 323
 324                 if (key->type == BTRFS_XATTR_ITEM_KEY &&
 325                     dir_type != BTRFS_FT_XATTR) {
 326                         dir_item_err(leaf, slot,
 327                 "invalid dir item type for XATTR key, have %u expect %u",
 328                                 dir_type, BTRFS_FT_XATTR);
 329                         return -EUCLEAN;
 330                 }
 331                 if (dir_type == BTRFS_FT_XATTR &&
 332                     key->type != BTRFS_XATTR_ITEM_KEY) {
 333                         dir_item_err(leaf, slot,
 334                         "xattr dir type found for non-XATTR key");
 335                         return -EUCLEAN;
 336                 }
 337                 if (dir_type == BTRFS_FT_XATTR)
 338                         max_name_len = XATTR_NAME_MAX;
 339                 else
 340                         max_name_len = BTRFS_NAME_LEN;
 341
 342                 /* Name/data length check */
 343                 name_len = btrfs_dir_name_len(leaf, di);
 344                 data_len = btrfs_dir_data_len(leaf, di);
 345                 if (name_len > max_name_len) {
 346                         dir_item_err(leaf, slot,
 347                         "dir item name len too long, have %u max %u",
 348                                 name_len, max_name_len);
 349                         return -EUCLEAN;
 350                 }
 351                 if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) {
 352                         dir_item_err(leaf, slot,
 353                         "dir item name and data len too long, have %u max %u",
 354                                 name_len + data_len,
 355                                 BTRFS_MAX_XATTR_SIZE(fs_info));
 356                         return -EUCLEAN;
 357                 }
 358
 359                 if (data_len && dir_type != BTRFS_FT_XATTR) {
 360                         dir_item_err(leaf, slot,
 361                         "dir item with invalid data len, have %u expect 0",
 362                                 data_len);
 363                         return -EUCLEAN;
 364                 }
 365
 366                 total_size = sizeof(*di) + name_len + data_len;
 367
 368                 /* header and name/data should not cross item boundary */
 369                 if (cur + total_size > item_size) {
 370                         dir_item_err(leaf, slot,
 371                 "dir item data crosses item boundary, have %u boundary %u",
 372                                 cur + total_size, item_size);
 373                         return -EUCLEAN;
 374                 }
 375
 376                 /*
 377                  * Special check for XATTR/DIR_ITEM, as key->offset is name
 378                  * hash, should match its name
 379                  */
 380                 if (key->type == BTRFS_DIR_ITEM_KEY ||
 381                     key->type == BTRFS_XATTR_ITEM_KEY) {
 382                         char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
 383
 384                         read_extent_buffer(leaf, namebuf,
 385                                         (unsigned long)(di + 1), name_len);
 386                         name_hash = btrfs_name_hash(namebuf, name_len);
 387                         if (key->offset != name_hash) {
 388                                 dir_item_err(leaf, slot,
 389                 "name hash mismatch with key, have 0x%016x expect 0x%016llx",
 390                                         name_hash, key->offset);
 391                                 return -EUCLEAN;
 392                         }
 393                 }
 394                 cur += total_size;
 395                 di = (struct btrfs_dir_item *)((void *)di + total_size);
 396         }
 397         return 0;
 398 }
 399
 400 __printf(3, 4)
 401 __cold
 402 static void block_group_err(const struct extent_buffer *eb, int slot,
 403                             const char *fmt, ...)
 404 {
 405         const struct btrfs_fs_info *fs_info = eb->fs_info;
 406         struct btrfs_key key;
 407         struct va_format vaf;
 408         va_list args;
 409
 410         btrfs_item_key_to_cpu(eb, &key, slot);
 411         va_start(args, fmt);
 412
 413         vaf.fmt = fmt;
 414         vaf.va = &args;
 415
 416         btrfs_crit(fs_info,
 417         "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
 418                 btrfs_header_level(eb) == 0 ? "leaf" : "node",
 419                 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
 420                 key.objectid, key.offset, &vaf);
 421         va_end(args);
 422 }
 423
 424 static int check_block_group_item(struct extent_buffer *leaf,
 425                                   struct btrfs_key *key, int slot)
 426 {
 427         struct btrfs_block_group_item bgi;
 428         u32 item_size = btrfs_item_size_nr(leaf, slot);
 429         u64 flags;
 430         u64 type;
 431
 432         /*
 433          * Here we don't really care about alignment since extent allocator can
 434          * handle it.  We care more about the size.
 435          */
 436         if (key->offset == 0) {
 437                 block_group_err(leaf, slot,
 438                                 "invalid block group size 0");
 439                 return -EUCLEAN;
 440         }
 441
 442         if (item_size != sizeof(bgi)) {
 443                 block_group_err(leaf, slot,
 444                         "invalid item size, have %u expect %zu",
 445                                 item_size, sizeof(bgi));
 446                 return -EUCLEAN;
 447         }
 448
 449         read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
 450                            sizeof(bgi));
 451         if (btrfs_block_group_chunk_objectid(&bgi) !=
 452             BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
 453                 block_group_err(leaf, slot,
 454                 "invalid block group chunk objectid, have %llu expect %llu",
 455                                 btrfs_block_group_chunk_objectid(&bgi),
 456                                 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
 457                 return -EUCLEAN;
 458         }
 459
 460         if (btrfs_block_group_used(&bgi) > key->offset) {
 461                 block_group_err(leaf, slot,
 462                         "invalid block group used, have %llu expect [0, %llu)",
 463                                 btrfs_block_group_used(&bgi), key->offset);
 464                 return -EUCLEAN;
 465         }
 466
 467         flags = btrfs_block_group_flags(&bgi);
 468         if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
 469                 block_group_err(leaf, slot,
 470 "invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
 471                         flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
 472                         hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
 473                 return -EUCLEAN;
 474         }
 475
 476         type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
 477         if (type != BTRFS_BLOCK_GROUP_DATA &&
 478             type != BTRFS_BLOCK_GROUP_METADATA &&
 479             type != BTRFS_BLOCK_GROUP_SYSTEM &&
 480             type != (BTRFS_BLOCK_GROUP_METADATA |
 481                            BTRFS_BLOCK_GROUP_DATA)) {
 482                 block_group_err(leaf, slot,
 483 "invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
 484                         type, hweight64(type),
 485                         BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
 486                         BTRFS_BLOCK_GROUP_SYSTEM,
 487                         BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
 488                 return -EUCLEAN;
 489         }
 490         return 0;
 491 }
 492
 493 __printf(4, 5)
 494 __cold
 495 static void chunk_err(const struct extent_buffer *leaf,
 496                       const struct btrfs_chunk *chunk, u64 logical,
 497                       const char *fmt, ...)
 498 {
 499         const struct btrfs_fs_info *fs_info = leaf->fs_info;
 500         bool is_sb;
 501         struct va_format vaf;
 502         va_list args;
 503         int i;
 504         int slot = -1;
 505
 506         /* Only superblock eb is able to have such small offset */
 507         is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET);
 508
 509         if (!is_sb) {
 510                 /*
 511                  * Get the slot number by iterating through all slots, this
 512                  * would provide better readability.
 513                  */
 514                 for (i = 0; i < btrfs_header_nritems(leaf); i++) {
 515                         if (btrfs_item_ptr_offset(leaf, i) ==
 516                                         (unsigned long)chunk) {
 517                                 slot = i;
 518                                 break;
 519                         }
 520                 }
 521         }
 522         va_start(args, fmt);
 523         vaf.fmt = fmt;
 524         vaf.va = &args;
 525
 526         if (is_sb)
 527                 btrfs_crit(fs_info,
 528                 "corrupt superblock syschunk array: chunk_start=%llu, %pV",
 529                            logical, &vaf);
 530         else
 531                 btrfs_crit(fs_info,
 532         "corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV",
 533                            BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot,
 534                            logical, &vaf);
 535         va_end(args);
 536 }
 537
 538 /*
 539  * The common chunk check which could also work on super block sys chunk array.
 540  *
 541  * Return -EUCLEAN if anything is corrupted.
 542  * Return 0 if everything is OK.
 543  */
 544 int btrfs_check_chunk_valid(struct extent_buffer *leaf,
 545                             struct btrfs_chunk *chunk, u64 logical)
 546 {
 547         struct btrfs_fs_info *fs_info = leaf->fs_info;
 548         u64 length;
 549         u64 stripe_len;
 550         u16 num_stripes;
 551         u16 sub_stripes;
 552         u64 type;
 553         u64 features;
 554         bool mixed = false;
 555
 556         length = btrfs_chunk_length(leaf, chunk);
 557         stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
 558         num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
 559         sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
 560         type = btrfs_chunk_type(leaf, chunk);
 561
 562         if (!num_stripes) {
 563                 chunk_err(leaf, chunk, logical,
 564                           "invalid chunk num_stripes, have %u", num_stripes);
 565                 return -EUCLEAN;
 566         }
 567         if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
 568                 chunk_err(leaf, chunk, logical,
 569                 "invalid chunk logical, have %llu should aligned to %u",
 570                           logical, fs_info->sectorsize);
 571                 return -EUCLEAN;
 572         }
 573         if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
 574                 chunk_err(leaf, chunk, logical,
 575                           "invalid chunk sectorsize, have %u expect %u",
 576                           btrfs_chunk_sector_size(leaf, chunk),
 577                           fs_info->sectorsize);
 578                 return -EUCLEAN;
 579         }
 580         if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
 581                 chunk_err(leaf, chunk, logical,
 582                           "invalid chunk length, have %llu", length);
 583                 return -EUCLEAN;
 584         }
 585         if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
 586                 chunk_err(leaf, chunk, logical,
 587                           "invalid chunk stripe length: %llu",
 588                           stripe_len);
 589                 return -EUCLEAN;
 590         }
 591         if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
 592             type) {
 593                 chunk_err(leaf, chunk, logical,
 594                           "unrecognized chunk type: 0x%llx",
 595                           ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
 596                             BTRFS_BLOCK_GROUP_PROFILE_MASK) &
 597                           btrfs_chunk_type(leaf, chunk));
 598                 return -EUCLEAN;
 599         }
 600
 601         if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
 602             (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) {
 603                 chunk_err(leaf, chunk, logical,
 604                 "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
 605                           type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
 606                 return -EUCLEAN;
 607         }
 608         if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
 609                 chunk_err(leaf, chunk, logical,
 610         "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
 611                           type, BTRFS_BLOCK_GROUP_TYPE_MASK);
 612                 return -EUCLEAN;
 613         }
 614
 615         if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
 616             (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
 617                 chunk_err(leaf, chunk, logical,
 618                           "system chunk with data or metadata type: 0x%llx",
 619                           type);
 620                 return -EUCLEAN;
 621         }
 622
 623         features = btrfs_super_incompat_flags(fs_info->super_copy);
 624         if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
 625                 mixed = true;
 626
 627         if (!mixed) {
 628                 if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
 629                     (type & BTRFS_BLOCK_GROUP_DATA)) {
 630                         chunk_err(leaf, chunk, logical,
 631                         "mixed chunk type in non-mixed mode: 0x%llx", type);
 632                         return -EUCLEAN;
 633                 }
 634         }
 635
 636         if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
 637             (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
 638             (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
 639             (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
 640             (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
 641             ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) {
 642                 chunk_err(leaf, chunk, logical,
 643                         "invalid num_stripes:sub_stripes %u:%u for profile %llu",
 644                         num_stripes, sub_stripes,
 645                         type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
 646                 return -EUCLEAN;
 647         }
 648
 649         return 0;
 650 }
 651
 652 __printf(3, 4)
 653 __cold
 654 static void dev_item_err(const struct extent_buffer *eb, int slot,
 655                          const char *fmt, ...)
 656 {
 657         struct btrfs_key key;
 658         struct va_format vaf;
 659         va_list args;
 660
 661         btrfs_item_key_to_cpu(eb, &key, slot);
 662         va_start(args, fmt);
 663
 664         vaf.fmt = fmt;
 665         vaf.va = &args;
 666
 667         btrfs_crit(eb->fs_info,
 668         "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
 669                 btrfs_header_level(eb) == 0 ? "leaf" : "node",
 670                 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
 671                 key.objectid, &vaf);
 672         va_end(args);
 673 }
 674
 675 static int check_dev_item(struct extent_buffer *leaf,
 676                           struct btrfs_key *key, int slot)
 677 {
 678         struct btrfs_fs_info *fs_info = leaf->fs_info;
 679         struct btrfs_dev_item *ditem;
 680         u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK);
 681
 682         if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) {
 683                 dev_item_err(leaf, slot,
 684                              "invalid objectid: has=%llu expect=%llu",
 685                              key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
 686                 return -EUCLEAN;
 687         }
 688         if (key->offset > max_devid) {
 689                 dev_item_err(leaf, slot,
 690                              "invalid devid: has=%llu expect=[0, %llu]",
 691                              key->offset, max_devid);
 692                 return -EUCLEAN;
 693         }
 694         ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
 695         if (btrfs_device_id(leaf, ditem) != key->offset) {
 696                 dev_item_err(leaf, slot,
 697                              "devid mismatch: key has=%llu item has=%llu",
 698                              key->offset, btrfs_device_id(leaf, ditem));
 699                 return -EUCLEAN;
 700         }
 701
 702         /*
 703          * For device total_bytes, we don't have reliable way to check it, as
 704          * it can be 0 for device removal. Device size check can only be done
 705          * by dev extents check.
 706          */
 707         if (btrfs_device_bytes_used(leaf, ditem) >
 708             btrfs_device_total_bytes(leaf, ditem)) {
 709                 dev_item_err(leaf, slot,
 710                              "invalid bytes used: have %llu expect [0, %llu]",
 711                              btrfs_device_bytes_used(leaf, ditem),
 712                              btrfs_device_total_bytes(leaf, ditem));
 713                 return -EUCLEAN;
 714         }
 715         /*
 716          * Remaining members like io_align/type/gen/dev_group aren't really
 717          * utilized.  Skip them to make later usage of them easier.
 718          */
 719         return 0;
 720 }
 721
 722 /* Inode item error output has the same format as dir_item_err() */
 723 #define inode_item_err(fs_info, eb, slot, fmt, ...)                     \
 724         dir_item_err(eb, slot, fmt, __VA_ARGS__)
 725
 726 static int check_inode_item(struct extent_buffer *leaf,
 727                             struct btrfs_key *key, int slot)
 728 {
 729         struct btrfs_fs_info *fs_info = leaf->fs_info;
 730         struct btrfs_inode_item *iitem;
 731         u64 super_gen = btrfs_super_generation(fs_info->super_copy);
 732         u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
 733         u32 mode;
 734
 735         if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
 736              key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
 737             key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
 738             key->objectid != BTRFS_FREE_INO_OBJECTID) {
 739                 generic_err(leaf, slot,
 740         "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
 741                             key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
 742                             BTRFS_FIRST_FREE_OBJECTID,
 743                             BTRFS_LAST_FREE_OBJECTID,
 744                             BTRFS_FREE_INO_OBJECTID);
 745                 return -EUCLEAN;
 746         }
 747         if (key->offset != 0) {
 748                 inode_item_err(fs_info, leaf, slot,
 749                         "invalid key offset: has %llu expect 0",
 750                         key->offset);
 751                 return -EUCLEAN;
 752         }
 753         iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
 754
 755         /* Here we use super block generation + 1 to handle log tree */
 756         if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) {
 757                 inode_item_err(fs_info, leaf, slot,
 758                         "invalid inode generation: has %llu expect (0, %llu]",
 759                                btrfs_inode_generation(leaf, iitem),
 760                                super_gen + 1);
 761                 return -EUCLEAN;
 762         }
 763         /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
 764         if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
 765                 inode_item_err(fs_info, leaf, slot,
 766                         "invalid inode generation: has %llu expect [0, %llu]",
 767                                btrfs_inode_transid(leaf, iitem), super_gen + 1);
 768                 return -EUCLEAN;
 769         }
 770
 771         /*
 772          * For size and nbytes it's better not to be too strict, as for dir
 773          * item its size/nbytes can easily get wrong, but doesn't affect
 774          * anything in the fs. So here we skip the check.
 775          */
 776         mode = btrfs_inode_mode(leaf, iitem);
 777         if (mode & ~valid_mask) {
 778                 inode_item_err(fs_info, leaf, slot,
 779                                "unknown mode bit detected: 0x%x",
 780                                mode & ~valid_mask);
 781                 return -EUCLEAN;
 782         }
 783
 784         /*
 785          * S_IFMT is not bit mapped so we can't completely rely on is_power_of_2,
 786          * but is_power_of_2() can save us from checking FIFO/CHR/DIR/REG.
 787          * Only needs to check BLK, LNK and SOCKS
 788          */
 789         if (!is_power_of_2(mode & S_IFMT)) {
 790                 if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) {
 791                         inode_item_err(fs_info, leaf, slot,
 792                         "invalid mode: has 0%o expect valid S_IF* bit(s)",
 793                                        mode & S_IFMT);
 794                         return -EUCLEAN;
 795                 }
 796         }
 797         if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) {
 798                 inode_item_err(fs_info, leaf, slot,
 799                        "invalid nlink: has %u expect no more than 1 for dir",
 800                         btrfs_inode_nlink(leaf, iitem));
 801                 return -EUCLEAN;
 802         }
 803         if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) {
 804                 inode_item_err(fs_info, leaf, slot,
 805                                "unknown flags detected: 0x%llx",
 806                                btrfs_inode_flags(leaf, iitem) &
 807                                ~BTRFS_INODE_FLAG_MASK);
 808                 return -EUCLEAN;
 809         }
 810         return 0;
 811 }
 812
 813 /*
 814  * Common point to switch the item-specific validation.
 815  */
 816 static int check_leaf_item(struct extent_buffer *leaf,
 817                            struct btrfs_key *key, int slot,
 818                            struct btrfs_key *prev_key)
 819 {
 820         int ret = 0;
 821         struct btrfs_chunk *chunk;
 822
 823         switch (key->type) {
 824         case BTRFS_EXTENT_DATA_KEY:
 825                 ret = check_extent_data_item(leaf, key, slot, prev_key);
 826                 break;
 827         case BTRFS_EXTENT_CSUM_KEY:
 828                 ret = check_csum_item(leaf, key, slot);
 829                 break;
 830         case BTRFS_DIR_ITEM_KEY:
 831         case BTRFS_DIR_INDEX_KEY:
 832         case BTRFS_XATTR_ITEM_KEY:
 833                 ret = check_dir_item(leaf, key, slot);
 834                 break;
 835         case BTRFS_BLOCK_GROUP_ITEM_KEY:
 836                 ret = check_block_group_item(leaf, key, slot);
 837                 break;
 838         case BTRFS_CHUNK_ITEM_KEY:
 839                 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
 840                 ret = btrfs_check_chunk_valid(leaf, chunk, key->offset);
 841                 break;
 842         case BTRFS_DEV_ITEM_KEY:
 843                 ret = check_dev_item(leaf, key, slot);
 844                 break;
 845         case BTRFS_INODE_ITEM_KEY:
 846                 ret = check_inode_item(leaf, key, slot);
 847                 break;
 848         }
 849         return ret;
 850 }
 851
 852 static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
 853 {
 854         struct btrfs_fs_info *fs_info = leaf->fs_info;
 855         /* No valid key type is 0, so all key should be larger than this key */
 856         struct btrfs_key prev_key = {0, 0, 0};
 857         struct btrfs_key key;
 858         u32 nritems = btrfs_header_nritems(leaf);
 859         int slot;
 860
 861         if (btrfs_header_level(leaf) != 0) {
 862                 generic_err(leaf, 0,
 863                         "invalid level for leaf, have %d expect 0",
 864                         btrfs_header_level(leaf));
 865                 return -EUCLEAN;
 866         }
 867
 868         /*
 869          * Extent buffers from a relocation tree have a owner field that
 870          * corresponds to the subvolume tree they are based on. So just from an
 871          * extent buffer alone we can not find out what is the id of the
 872          * corresponding subvolume tree, so we can not figure out if the extent
 873          * buffer corresponds to the root of the relocation tree or not. So
 874          * skip this check for relocation trees.
 875          */
 876         if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
 877                 u64 owner = btrfs_header_owner(leaf);
 878
 879                 /* These trees must never be empty */
 880                 if (owner == BTRFS_ROOT_TREE_OBJECTID ||
 881                     owner == BTRFS_CHUNK_TREE_OBJECTID ||
 882                     owner == BTRFS_EXTENT_TREE_OBJECTID ||
 883                     owner == BTRFS_DEV_TREE_OBJECTID ||
 884                     owner == BTRFS_FS_TREE_OBJECTID ||
 885                     owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
 886                         generic_err(leaf, 0,
 887                         "invalid root, root %llu must never be empty",
 888                                     owner);
 889                         return -EUCLEAN;
 890                 }
 891                 return 0;
 892         }
 893
 894         if (nritems == 0)
 895                 return 0;
 896
 897         /*
 898          * Check the following things to make sure this is a good leaf, and
 899          * leaf users won't need to bother with similar sanity checks:
 900          *
 901          * 1) key ordering
 902          * 2) item offset and size
 903          *    No overlap, no hole, all inside the leaf.
 904          * 3) item content
 905          *    If possible, do comprehensive sanity check.
 906          *    NOTE: All checks must only rely on the item data itself.
 907          */
 908         for (slot = 0; slot < nritems; slot++) {
 909                 u32 item_end_expected;
 910                 int ret;
 911
 912                 btrfs_item_key_to_cpu(leaf, &key, slot);
 913
 914                 /* Make sure the keys are in the right order */
 915                 if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
 916                         generic_err(leaf, slot,
 917         "bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
 918                                 prev_key.objectid, prev_key.type,
 919                                 prev_key.offset, key.objectid, key.type,
 920                                 key.offset);
 921                         return -EUCLEAN;
 922                 }
 923
 924                 /*
 925                  * Make sure the offset and ends are right, remember that the
 926                  * item data starts at the end of the leaf and grows towards the
 927                  * front.
 928                  */
 929                 if (slot == 0)
 930                         item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
 931                 else
 932                         item_end_expected = btrfs_item_offset_nr(leaf,
 933                                                                  slot - 1);
 934                 if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
 935                         generic_err(leaf, slot,
 936                                 "unexpected item end, have %u expect %u",
 937                                 btrfs_item_end_nr(leaf, slot),
 938                                 item_end_expected);
 939                         return -EUCLEAN;
 940                 }
 941
 942                 /*
 943                  * Check to make sure that we don't point outside of the leaf,
 944                  * just in case all the items are consistent to each other, but
 945                  * all point outside of the leaf.
 946                  */
 947                 if (btrfs_item_end_nr(leaf, slot) >
 948                     BTRFS_LEAF_DATA_SIZE(fs_info)) {
 949                         generic_err(leaf, slot,
 950                         "slot end outside of leaf, have %u expect range [0, %u]",
 951                                 btrfs_item_end_nr(leaf, slot),
 952                                 BTRFS_LEAF_DATA_SIZE(fs_info));
 953                         return -EUCLEAN;
 954                 }
 955
 956                 /* Also check if the item pointer overlaps with btrfs item. */
 957                 if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
 958                     btrfs_item_ptr_offset(leaf, slot)) {
 959                         generic_err(leaf, slot,
 960                 "slot overlaps with its data, item end %lu data start %lu",
 961                                 btrfs_item_nr_offset(slot) +
 962                                 sizeof(struct btrfs_item),
 963                                 btrfs_item_ptr_offset(leaf, slot));
 964                         return -EUCLEAN;
 965                 }
 966
 967                 if (check_item_data) {
 968                         /*
 969                          * Check if the item size and content meet other
 970                          * criteria
 971                          */
 972                         ret = check_leaf_item(leaf, &key, slot, &prev_key);
 973                         if (ret < 0)
 974                                 return ret;
 975                 }
 976
 977                 prev_key.objectid = key.objectid;
 978                 prev_key.type = key.type;
 979                 prev_key.offset = key.offset;
 980         }
 981
 982         return 0;
 983 }
 984
 985 int btrfs_check_leaf_full(struct extent_buffer *leaf)
 986 {
 987         return check_leaf(leaf, true);
 988 }
 989 ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
 990
 991 int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
 992 {
 993         return check_leaf(leaf, false);
 994 }
 995
 996 int btrfs_check_node(struct extent_buffer *node)
 997 {
 998         struct btrfs_fs_info *fs_info = node->fs_info;
 999         unsigned long nr = btrfs_header_nritems(node);
1000         struct btrfs_key key, next_key;
1001         int slot;
1002         int level = btrfs_header_level(node);
1003         u64 bytenr;
1004         int ret = 0;
1005
1006         if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
1007                 generic_err(node, 0,
1008                         "invalid level for node, have %d expect [1, %d]",
1009                         level, BTRFS_MAX_LEVEL - 1);
1010                 return -EUCLEAN;
1011         }
1012         if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info)) {
1013                 btrfs_crit(fs_info,
1014 "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
1015                            btrfs_header_owner(node), node->start,
1016                            nr == 0 ? "small" : "large", nr,
1017                            BTRFS_NODEPTRS_PER_BLOCK(fs_info));
1018                 return -EUCLEAN;
1019         }
1020
1021         for (slot = 0; slot < nr - 1; slot++) {
1022                 bytenr = btrfs_node_blockptr(node, slot);
1023                 btrfs_node_key_to_cpu(node, &key, slot);
1024                 btrfs_node_key_to_cpu(node, &next_key, slot + 1);
1025
1026                 if (!bytenr) {
1027                         generic_err(node, slot,
1028                                 "invalid NULL node pointer");
1029                         ret = -EUCLEAN;
1030                         goto out;
1031                 }
1032                 if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) {
1033                         generic_err(node, slot,
1034                         "unaligned pointer, have %llu should be aligned to %u",
1035                                 bytenr, fs_info->sectorsize);
1036                         ret = -EUCLEAN;
1037                         goto out;
1038                 }
1039
1040                 if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
1041                         generic_err(node, slot,
1042         "bad key order, current (%llu %u %llu) next (%llu %u %llu)",
1043                                 key.objectid, key.type, key.offset,
1044                                 next_key.objectid, next_key.type,
1045                                 next_key.offset);
1046                         ret = -EUCLEAN;
1047                         goto out;
1048                 }
1049         }
1050 out:
1051         return ret;
1052 }
1053 ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);