diff -Nru btrfs-progs-4.7/btrfs-convert.c btrfs-progs-4.7.1/btrfs-convert.c --- btrfs-progs-4.7/btrfs-convert.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/btrfs-convert.c 2016-08-25 17:33:48.000000000 +0000 @@ -37,12 +37,13 @@ #include "crc32c.h" #include "utils.h" #include "task-utils.h" + +#if BTRFSCONVERT_EXT2 #include #include #include #define INO_OFFSET (BTRFS_FIRST_FREE_OBJECTID - EXT2_ROOT_INO) -#define CONV_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID /* * Compatibility code for e2fsprogs 1.41 which doesn't support RO compat flag @@ -56,6 +57,10 @@ #define EXT2FS_B2C(fs, blk) (blk) #endif +#endif + +#define CONV_IMAGE_SUBVOL_OBJECTID BTRFS_FIRST_FREE_OBJECTID + struct task_ctx { uint32_t max_copy_inodes; uint32_t cur_copy_inodes; @@ -127,153 +132,6 @@ cctx->convert_ops->close_fs(cctx); } -/* - * Open Ext2fs in readonly mode, read block allocation bitmap and - * inode bitmap into memory. - */ -static int ext2_open_fs(struct btrfs_convert_context *cctx, const char *name) -{ - errcode_t ret; - ext2_filsys ext2_fs; - ext2_ino_t ino; - u32 ro_feature; - - ret = ext2fs_open(name, 0, 0, 0, unix_io_manager, &ext2_fs); - if (ret) { - fprintf(stderr, "ext2fs_open: %s\n", error_message(ret)); - return -1; - } - /* - * We need to know exactly the used space, some RO compat flags like - * BIGALLOC will affect how used space is present. - * So we need manuall check any unsupported RO compat flags - */ - ro_feature = ext2_fs->super->s_feature_ro_compat; - if (ro_feature & ~EXT2_LIB_FEATURE_RO_COMPAT_SUPP) { - error( -"unsupported RO features detected: %x, abort convert to avoid possible corruption", - ro_feature & ~EXT2_LIB_FEATURE_COMPAT_SUPP); - goto fail; - } - ret = ext2fs_read_inode_bitmap(ext2_fs); - if (ret) { - fprintf(stderr, "ext2fs_read_inode_bitmap: %s\n", - error_message(ret)); - goto fail; - } - ret = ext2fs_read_block_bitmap(ext2_fs); - if (ret) { - fprintf(stderr, "ext2fs_read_block_bitmap: %s\n", - error_message(ret)); - goto fail; - } - /* - * search each block group for a free inode. this set up - * uninit block/inode bitmaps appropriately. - */ - ino = 1; - while (ino <= ext2_fs->super->s_inodes_count) { - ext2_ino_t foo; - ext2fs_new_inode(ext2_fs, ino, 0, NULL, &foo); - ino += EXT2_INODES_PER_GROUP(ext2_fs->super); - } - - if (!(ext2_fs->super->s_feature_incompat & - EXT2_FEATURE_INCOMPAT_FILETYPE)) { - fprintf(stderr, "filetype feature is missing\n"); - goto fail; - } - - cctx->fs_data = ext2_fs; - cctx->blocksize = ext2_fs->blocksize; - cctx->block_count = ext2_fs->super->s_blocks_count; - cctx->total_bytes = ext2_fs->blocksize * ext2_fs->super->s_blocks_count; - cctx->volume_name = strndup(ext2_fs->super->s_volume_name, 16); - cctx->first_data_block = ext2_fs->super->s_first_data_block; - cctx->inodes_count = ext2_fs->super->s_inodes_count; - cctx->free_inodes_count = ext2_fs->super->s_free_inodes_count; - return 0; -fail: - ext2fs_close(ext2_fs); - return -1; -} - -static int __ext2_add_one_block(ext2_filsys fs, char *bitmap, - unsigned long group_nr, struct cache_tree *used) -{ - unsigned long offset; - unsigned i; - int ret = 0; - - offset = fs->super->s_first_data_block; - offset /= EXT2FS_CLUSTER_RATIO(fs); - offset += group_nr * EXT2_CLUSTERS_PER_GROUP(fs->super); - for (i = 0; i < EXT2_CLUSTERS_PER_GROUP(fs->super); i++) { - if (ext2fs_test_bit(i, bitmap)) { - u64 start; - - start = (i + offset) * EXT2FS_CLUSTER_RATIO(fs); - start *= fs->blocksize; - ret = add_merge_cache_extent(used, start, - fs->blocksize); - if (ret < 0) - break; - } - } - return ret; -} - -/* - * Read all used ext2 space into cctx->used cache tree - */ -static int ext2_read_used_space(struct btrfs_convert_context *cctx) -{ - ext2_filsys fs = (ext2_filsys)cctx->fs_data; - blk64_t blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block); - struct cache_tree *used_tree = &cctx->used; - char *block_bitmap = NULL; - unsigned long i; - int block_nbytes; - int ret = 0; - - block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8; - /* Shouldn't happen */ - BUG_ON(!fs->block_map); - - block_bitmap = malloc(block_nbytes); - if (!block_bitmap) - return -ENOMEM; - - for (i = 0; i < fs->group_desc_count; i++) { - ret = ext2fs_get_block_bitmap_range(fs->block_map, blk_itr, - block_nbytes * 8, block_bitmap); - if (ret) { - error("fail to get bitmap from ext2, %s", - strerror(-ret)); - break; - } - ret = __ext2_add_one_block(fs, block_bitmap, i, used_tree); - if (ret < 0) { - error("fail to build used space tree, %s", - strerror(-ret)); - break; - } - blk_itr += EXT2_CLUSTERS_PER_GROUP(fs->super); - } - - free(block_bitmap); - return ret; -} - -static void ext2_close_fs(struct btrfs_convert_context *cctx) -{ - if (cctx->volume_name) { - free(cctx->volume_name); - cctx->volume_name = NULL; - } - ext2fs_close(cctx->fs_data); -} - static int intersect_with_sb(u64 bytenr, u64 num_bytes) { int i; @@ -319,100 +177,6 @@ return 0; } -struct dir_iterate_data { - struct btrfs_trans_handle *trans; - struct btrfs_root *root; - struct btrfs_inode_item *inode; - u64 objectid; - u64 index_cnt; - u64 parent; - int errcode; -}; - -static u8 filetype_conversion_table[EXT2_FT_MAX] = { - [EXT2_FT_UNKNOWN] = BTRFS_FT_UNKNOWN, - [EXT2_FT_REG_FILE] = BTRFS_FT_REG_FILE, - [EXT2_FT_DIR] = BTRFS_FT_DIR, - [EXT2_FT_CHRDEV] = BTRFS_FT_CHRDEV, - [EXT2_FT_BLKDEV] = BTRFS_FT_BLKDEV, - [EXT2_FT_FIFO] = BTRFS_FT_FIFO, - [EXT2_FT_SOCK] = BTRFS_FT_SOCK, - [EXT2_FT_SYMLINK] = BTRFS_FT_SYMLINK, -}; - -static int dir_iterate_proc(ext2_ino_t dir, int entry, - struct ext2_dir_entry *dirent, - int offset, int blocksize, - char *buf,void *priv_data) -{ - int ret; - int file_type; - u64 objectid; - char dotdot[] = ".."; - struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data; - int name_len; - - name_len = dirent->name_len & 0xFF; - - objectid = dirent->inode + INO_OFFSET; - if (!strncmp(dirent->name, dotdot, name_len)) { - if (name_len == 2) { - BUG_ON(idata->parent != 0); - idata->parent = objectid; - } - return 0; - } - if (dirent->inode < EXT2_GOOD_OLD_FIRST_INO) - return 0; - - file_type = dirent->name_len >> 8; - BUG_ON(file_type > EXT2_FT_SYMLINK); - - ret = convert_insert_dirent(idata->trans, idata->root, dirent->name, - name_len, idata->objectid, objectid, - filetype_conversion_table[file_type], - idata->index_cnt, idata->inode); - if (ret < 0) { - idata->errcode = ret; - return BLOCK_ABORT; - } - - idata->index_cnt++; - return 0; -} - -static int create_dir_entries(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 objectid, - struct btrfs_inode_item *btrfs_inode, - ext2_filsys ext2_fs, ext2_ino_t ext2_ino) -{ - int ret; - errcode_t err; - struct dir_iterate_data data = { - .trans = trans, - .root = root, - .inode = btrfs_inode, - .objectid = objectid, - .index_cnt = 2, - .parent = 0, - .errcode = 0, - }; - - err = ext2fs_dir_iterate2(ext2_fs, ext2_ino, 0, NULL, - dir_iterate_proc, &data); - if (err) - goto error; - ret = data.errcode; - if (ret == 0 && data.parent == objectid) { - ret = btrfs_insert_inode_ref(trans, root, "..", 2, - objectid, objectid, 0); - } - return ret; -error: - fprintf(stderr, "ext2fs_dir_iterate2: %s\n", error_message(err)); - return -1; -} - static int read_disk_extent(struct btrfs_root *root, u64 bytenr, u32 num_bytes, char *buffer) { @@ -646,915 +410,589 @@ return ret; } -static int __block_iterate_proc(ext2_filsys fs, blk_t *blocknr, - e2_blkcnt_t blockcnt, blk_t ref_block, - int ref_offset, void *priv_data) +static int create_image_file_range(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct cache_tree *used, + struct btrfs_inode_item *inode, + u64 ino, u64 bytenr, u64 *ret_len, + int datacsum) { + struct cache_extent *cache; + struct btrfs_block_group_cache *bg_cache; + u64 len = *ret_len; + u64 disk_bytenr; + int i; int ret; - struct blk_iterate_data *idata; - idata = (struct blk_iterate_data *)priv_data; - ret = block_iterate_proc(*blocknr, blockcnt, idata); - if (ret) { - idata->errcode = ret; - return BLOCK_ABORT; - } - return 0; -} -/* - * traverse file's data blocks, record these data blocks as file extents. - */ -static int create_file_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 objectid, - struct btrfs_inode_item *btrfs_inode, - ext2_filsys ext2_fs, ext2_ino_t ext2_ino, - int datacsum, int packing) -{ - int ret; - char *buffer = NULL; - errcode_t err; - u32 last_block; - u32 sectorsize = root->sectorsize; - u64 inode_size = btrfs_stack_inode_size(btrfs_inode); - struct blk_iterate_data data; + BUG_ON(bytenr != round_down(bytenr, root->sectorsize)); + BUG_ON(len != round_down(len, root->sectorsize)); + len = min_t(u64, len, BTRFS_MAX_EXTENT_SIZE); - init_blk_iterate_data(&data, trans, root, btrfs_inode, objectid, - datacsum); + /* + * Skip sb ranges first + * [0, 1M), [sb_offset(1), +64K), [sb_offset(2), +64K]. + * + * Or we will insert a hole into current image file, and later + * migrate block will fail as there is already a file extent. + */ + if (bytenr < 1024 * 1024) { + *ret_len = 1024 * 1024 - bytenr; + return 0; + } + for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { + u64 cur = btrfs_sb_offset(i); - err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY, - NULL, __block_iterate_proc, &data); - if (err) - goto error; - ret = data.errcode; - if (ret) - goto fail; - if (packing && data.first_block == 0 && data.num_blocks > 0 && - inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) { - u64 num_bytes = data.num_blocks * sectorsize; - u64 disk_bytenr = data.disk_block * sectorsize; - u64 nbytes; + if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) { + *ret_len = cur + BTRFS_STRIPE_LEN - bytenr; + return 0; + } + } + for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { + u64 cur = btrfs_sb_offset(i); - buffer = malloc(num_bytes); - if (!buffer) - return -ENOMEM; - ret = read_disk_extent(root, disk_bytenr, num_bytes, buffer); - if (ret) - goto fail; - if (num_bytes > inode_size) - num_bytes = inode_size; - ret = btrfs_insert_inline_extent(trans, root, objectid, - 0, buffer, num_bytes); - if (ret) - goto fail; - nbytes = btrfs_stack_inode_nbytes(btrfs_inode) + num_bytes; - btrfs_set_stack_inode_nbytes(btrfs_inode, nbytes); - } else if (data.num_blocks > 0) { - ret = record_file_blocks(&data, data.first_block, - data.disk_block, data.num_blocks); - if (ret) - goto fail; + /* + * |--reserved--| + * |----range-------| + * May still need to go through file extent inserts + */ + if (bytenr < cur && bytenr + len >= cur) { + len = min_t(u64, len, cur - bytenr); + break; + } + /* + * |--reserved--| + * |---range---| + * Drop out, no need to insert anything + */ + if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) { + *ret_len = cur + BTRFS_STRIPE_LEN - bytenr; + return 0; + } } - data.first_block += data.num_blocks; - last_block = (inode_size + sectorsize - 1) / sectorsize; - if (last_block > data.first_block) { - ret = record_file_blocks(&data, data.first_block, 0, - last_block - data.first_block); + + cache = search_cache_extent(used, bytenr); + if (cache) { + if (cache->start <= bytenr) { + /* + * |///////Used///////| + * |<--insert--->| + * bytenr + */ + len = min_t(u64, len, cache->start + cache->size - + bytenr); + disk_bytenr = bytenr; + } else { + /* + * |//Used//| + * |<-insert-->| + * bytenr + */ + len = min(len, cache->start - bytenr); + disk_bytenr = 0; + datacsum = 0; + } + } else { + /* + * |//Used//| |EOF + * |<-insert-->| + * bytenr + */ + disk_bytenr = 0; + datacsum = 0; } -fail: - free(buffer); - return ret; -error: - fprintf(stderr, "ext2fs_block_iterate2: %s\n", error_message(err)); - return -1; -} -static int create_symbol_link(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 objectid, - struct btrfs_inode_item *btrfs_inode, - ext2_filsys ext2_fs, ext2_ino_t ext2_ino, - struct ext2_inode *ext2_inode) -{ - int ret; - char *pathname; - u64 inode_size = btrfs_stack_inode_size(btrfs_inode); - if (ext2fs_inode_data_blocks(ext2_fs, ext2_inode)) { - btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1); - ret = create_file_extents(trans, root, objectid, btrfs_inode, - ext2_fs, ext2_ino, 1, 1); - btrfs_set_stack_inode_size(btrfs_inode, inode_size); - return ret; + if (disk_bytenr) { + /* Check if the range is in a data block group */ + bg_cache = btrfs_lookup_block_group(root->fs_info, bytenr); + if (!bg_cache) + return -ENOENT; + if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)) + return -EINVAL; + + /* The extent should never cross block group boundary */ + len = min_t(u64, len, bg_cache->key.objectid + + bg_cache->key.offset - bytenr); } - pathname = (char *)&(ext2_inode->i_block[0]); - BUG_ON(pathname[inode_size] != 0); - ret = btrfs_insert_inline_extent(trans, root, objectid, 0, - pathname, inode_size + 1); - btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1); + BUG_ON(len != round_down(len, root->sectorsize)); + ret = btrfs_record_file_extent(trans, root, ino, inode, bytenr, + disk_bytenr, len); + if (ret < 0) + return ret; + + if (datacsum) + ret = csum_disk_extent(trans, root, bytenr, len); + *ret_len = len; return ret; } /* - * Following xattr/acl related codes are based on codes in - * fs/ext3/xattr.c and fs/ext3/acl.c + * Relocate old fs data in one reserved ranges + * + * Since all old fs data in reserved range is not covered by any chunk nor + * data extent, we don't need to handle any reference but add new + * extent/reference, which makes codes more clear */ -#define EXT2_XATTR_BHDR(ptr) ((struct ext2_ext_attr_header *)(ptr)) -#define EXT2_XATTR_BFIRST(ptr) \ - ((struct ext2_ext_attr_entry *)(EXT2_XATTR_BHDR(ptr) + 1)) -#define EXT2_XATTR_IHDR(inode) \ - ((struct ext2_ext_attr_header *) ((void *)(inode) + \ - EXT2_GOOD_OLD_INODE_SIZE + (inode)->i_extra_isize)) -#define EXT2_XATTR_IFIRST(inode) \ - ((struct ext2_ext_attr_entry *) ((void *)EXT2_XATTR_IHDR(inode) + \ - sizeof(EXT2_XATTR_IHDR(inode)->h_magic))) - -static int ext2_xattr_check_names(struct ext2_ext_attr_entry *entry, - const void *end) +static int migrate_one_reserved_range(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct cache_tree *used, + struct btrfs_inode_item *inode, int fd, + u64 ino, u64 start, u64 len, int datacsum) { - struct ext2_ext_attr_entry *next; + u64 cur_off = start; + u64 cur_len = len; + u64 hole_start = start; + u64 hole_len; + struct cache_extent *cache; + struct btrfs_key key; + struct extent_buffer *eb; + int ret = 0; - while (!EXT2_EXT_IS_LAST_ENTRY(entry)) { - next = EXT2_EXT_ATTR_NEXT(entry); - if ((void *)next >= end) - return -EIO; - entry = next; - } - return 0; -} + while (cur_off < start + len) { + cache = lookup_cache_extent(used, cur_off, cur_len); + if (!cache) + break; + cur_off = max(cache->start, cur_off); + cur_len = min(cache->start + cache->size, start + len) - + cur_off; + BUG_ON(cur_len < root->sectorsize); -static int ext2_xattr_check_block(const char *buf, size_t size) -{ - int error; - struct ext2_ext_attr_header *header = EXT2_XATTR_BHDR(buf); + /* reserve extent for the data */ + ret = btrfs_reserve_extent(trans, root, cur_len, 0, 0, (u64)-1, + &key, 1); + if (ret < 0) + break; - if (header->h_magic != EXT2_EXT_ATTR_MAGIC || - header->h_blocks != 1) - return -EIO; - error = ext2_xattr_check_names(EXT2_XATTR_BFIRST(buf), buf + size); - return error; -} + eb = malloc(sizeof(*eb) + cur_len); + if (!eb) { + ret = -ENOMEM; + break; + } -static int ext2_xattr_check_entry(struct ext2_ext_attr_entry *entry, - size_t size) -{ - size_t value_size = entry->e_value_size; + ret = pread(fd, eb->data, cur_len, cur_off); + if (ret < cur_len) { + ret = (ret < 0 ? ret : -EIO); + free(eb); + break; + } + eb->start = key.objectid; + eb->len = key.offset; - if (entry->e_value_block != 0 || value_size > size || - entry->e_value_offs + value_size > size) - return -EIO; - return 0; -} - -#define EXT2_ACL_VERSION 0x0001 - -/* 23.2.5 acl_tag_t values */ - -#define ACL_UNDEFINED_TAG (0x00) -#define ACL_USER_OBJ (0x01) -#define ACL_USER (0x02) -#define ACL_GROUP_OBJ (0x04) -#define ACL_GROUP (0x08) -#define ACL_MASK (0x10) -#define ACL_OTHER (0x20) - -/* 23.2.7 ACL qualifier constants */ - -#define ACL_UNDEFINED_ID ((id_t)-1) - -typedef struct { - __le16 e_tag; - __le16 e_perm; - __le32 e_id; -} ext2_acl_entry; + /* Write the data */ + ret = write_and_map_eb(trans, root, eb); + free(eb); + if (ret < 0) + break; -typedef struct { - __le16 e_tag; - __le16 e_perm; -} ext2_acl_entry_short; + /* Now handle extent item and file extent things */ + ret = btrfs_record_file_extent(trans, root, ino, inode, cur_off, + key.objectid, key.offset); + if (ret < 0) + break; + /* Finally, insert csum items */ + if (datacsum) + ret = csum_disk_extent(trans, root, key.objectid, + key.offset); -typedef struct { - __le32 a_version; -} ext2_acl_header; + /* Don't forget to insert hole */ + hole_len = cur_off - hole_start; + if (hole_len) { + ret = btrfs_record_file_extent(trans, root, ino, inode, + hole_start, 0, hole_len); + if (ret < 0) + break; + } -static inline int ext2_acl_count(size_t size) -{ - ssize_t s; - size -= sizeof(ext2_acl_header); - s = size - 4 * sizeof(ext2_acl_entry_short); - if (s < 0) { - if (size % sizeof(ext2_acl_entry_short)) - return -1; - return size / sizeof(ext2_acl_entry_short); - } else { - if (s % sizeof(ext2_acl_entry)) - return -1; - return s / sizeof(ext2_acl_entry) + 4; + cur_off += key.offset; + hole_start = cur_off; + cur_len = start + len - cur_off; } + /* Last hole */ + if (start + len - hole_start > 0) + ret = btrfs_record_file_extent(trans, root, ino, inode, + hole_start, 0, start + len - hole_start); + return ret; } -#define ACL_EA_VERSION 0x0002 +/* + * Relocate the used ext2 data in reserved ranges + * [0,1M) + * [btrfs_sb_offset(1), +BTRFS_STRIPE_LEN) + * [btrfs_sb_offset(2), +BTRFS_STRIPE_LEN) + */ +static int migrate_reserved_ranges(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct cache_tree *used, + struct btrfs_inode_item *inode, int fd, + u64 ino, u64 total_bytes, int datacsum) +{ + u64 cur_off; + u64 cur_len; + int ret = 0; -typedef struct { - __le16 e_tag; - __le16 e_perm; - __le32 e_id; -} acl_ea_entry; + /* 0 ~ 1M */ + cur_off = 0; + cur_len = 1024 * 1024; + ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino, + cur_off, cur_len, datacsum); + if (ret < 0) + return ret; -typedef struct { - __le32 a_version; - acl_ea_entry a_entries[0]; -} acl_ea_header; + /* second sb(fisrt sb is included in 0~1M) */ + cur_off = btrfs_sb_offset(1); + cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off; + if (cur_off > total_bytes) + return ret; + ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino, + cur_off, cur_len, datacsum); + if (ret < 0) + return ret; -static inline size_t acl_ea_size(int count) -{ - return sizeof(acl_ea_header) + count * sizeof(acl_ea_entry); + /* Last sb */ + cur_off = btrfs_sb_offset(2); + cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off; + if (cur_off > total_bytes) + return ret; + ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino, + cur_off, cur_len, datacsum); + return ret; } -static int ext2_acl_to_xattr(void *dst, const void *src, - size_t dst_size, size_t src_size) +/* + * Helper for expand and merge extent_cache for wipe_one_reserved_range() to + * handle wiping a range that exists in cache. + */ +static int _expand_extent_cache(struct cache_tree *tree, + struct cache_extent *entry, + u64 min_stripe_size, int backward) { - int i, count; - const void *end = src + src_size; - acl_ea_header *ext_acl = (acl_ea_header *)dst; - acl_ea_entry *dst_entry = ext_acl->a_entries; - ext2_acl_entry *src_entry; + struct cache_extent *ce; + int diff; - if (src_size < sizeof(ext2_acl_header)) - goto fail; - if (((ext2_acl_header *)src)->a_version != - cpu_to_le32(EXT2_ACL_VERSION)) - goto fail; - src += sizeof(ext2_acl_header); - count = ext2_acl_count(src_size); - if (count <= 0) - goto fail; + if (entry->size >= min_stripe_size) + return 0; + diff = min_stripe_size - entry->size; - BUG_ON(dst_size < acl_ea_size(count)); - ext_acl->a_version = cpu_to_le32(ACL_EA_VERSION); - for (i = 0; i < count; i++, dst_entry++) { - src_entry = (ext2_acl_entry *)src; - if (src + sizeof(ext2_acl_entry_short) > end) - goto fail; - dst_entry->e_tag = src_entry->e_tag; - dst_entry->e_perm = src_entry->e_perm; - switch (le16_to_cpu(src_entry->e_tag)) { - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - src += sizeof(ext2_acl_entry_short); - dst_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); - break; - case ACL_USER: - case ACL_GROUP: - src += sizeof(ext2_acl_entry); - if (src > end) - goto fail; - dst_entry->e_id = src_entry->e_id; - break; - default: - goto fail; + if (backward) { + ce = prev_cache_extent(entry); + if (!ce) + goto expand_back; + if (ce->start + ce->size >= entry->start - diff) { + /* Directly merge with previous extent */ + ce->size = entry->start + entry->size - ce->start; + remove_cache_extent(tree, entry); + free(entry); + return 0; } +expand_back: + /* No overlap, normal extent */ + if (entry->start < diff) { + error("cannot find space for data chunk layout"); + return -ENOSPC; + } + entry->start -= diff; + entry->size += diff; + return 0; } - if (src != end) - goto fail; + ce = next_cache_extent(entry); + if (!ce) + goto expand_after; + if (entry->start + entry->size + diff >= ce->start) { + /* Directly merge with next extent */ + entry->size = ce->start + ce->size - entry->start; + remove_cache_extent(tree, ce); + free(ce); + return 0; + } +expand_after: + entry->size += diff; return 0; -fail: - return -EINVAL; } -static char *xattr_prefix_table[] = { - [1] = "user.", - [2] = "system.posix_acl_access", - [3] = "system.posix_acl_default", - [4] = "trusted.", - [6] = "security.", -}; - -static int copy_single_xattr(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 objectid, - struct ext2_ext_attr_entry *entry, - const void *data, u32 datalen) +/* + * Remove one reserve range from given cache tree + * if min_stripe_size is non-zero, it will ensure for split case, + * all its split cache extent is no smaller than @min_strip_size / 2. + */ +static int wipe_one_reserved_range(struct cache_tree *tree, + u64 start, u64 len, u64 min_stripe_size, + int ensure_size) { - int ret = 0; - int name_len; - int name_index; - void *databuf = NULL; - char namebuf[XATTR_NAME_MAX + 1]; - - name_index = entry->e_name_index; - if (name_index >= ARRAY_SIZE(xattr_prefix_table) || - xattr_prefix_table[name_index] == NULL) - return -EOPNOTSUPP; - name_len = strlen(xattr_prefix_table[name_index]) + - entry->e_name_len; - if (name_len >= sizeof(namebuf)) - return -ERANGE; + struct cache_extent *cache; + int ret; - if (name_index == 2 || name_index == 3) { - size_t bufsize = acl_ea_size(ext2_acl_count(datalen)); - databuf = malloc(bufsize); - if (!databuf) - return -ENOMEM; - ret = ext2_acl_to_xattr(databuf, data, bufsize, datalen); - if (ret) - goto out; - data = databuf; - datalen = bufsize; - } - strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX); - strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len); - if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) - - sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) { - fprintf(stderr, "skip large xattr on inode %Lu name %.*s\n", - objectid - INO_OFFSET, name_len, namebuf); - goto out; - } - ret = btrfs_insert_xattr_item(trans, root, namebuf, name_len, - data, datalen, objectid); -out: - free(databuf); - return ret; -} + BUG_ON(ensure_size && min_stripe_size == 0); + /* + * The logical here is simplified to handle special cases only + * So we don't need to consider merge case for ensure_size + */ + BUG_ON(min_stripe_size && (min_stripe_size < len * 2 || + min_stripe_size / 2 < BTRFS_STRIPE_LEN)); -static int copy_extended_attrs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 objectid, - struct btrfs_inode_item *btrfs_inode, - ext2_filsys ext2_fs, ext2_ino_t ext2_ino) -{ - int ret = 0; - int inline_ea = 0; - errcode_t err; - u32 datalen; - u32 block_size = ext2_fs->blocksize; - u32 inode_size = EXT2_INODE_SIZE(ext2_fs->super); - struct ext2_inode_large *ext2_inode; - struct ext2_ext_attr_entry *entry; - void *data; - char *buffer = NULL; - char inode_buf[EXT2_GOOD_OLD_INODE_SIZE]; + /* Also, wipe range should already be aligned */ + BUG_ON(start != round_down(start, BTRFS_STRIPE_LEN) || + start + len != round_up(start + len, BTRFS_STRIPE_LEN)); - if (inode_size <= EXT2_GOOD_OLD_INODE_SIZE) { - ext2_inode = (struct ext2_inode_large *)inode_buf; - } else { - ext2_inode = (struct ext2_inode_large *)malloc(inode_size); - if (!ext2_inode) - return -ENOMEM; - } - err = ext2fs_read_inode_full(ext2_fs, ext2_ino, (void *)ext2_inode, - inode_size); - if (err) { - fprintf(stderr, "ext2fs_read_inode_full: %s\n", - error_message(err)); - ret = -1; - goto out; - } + min_stripe_size /= 2; - if (ext2_ino > ext2_fs->super->s_first_ino && - inode_size > EXT2_GOOD_OLD_INODE_SIZE) { - if (EXT2_GOOD_OLD_INODE_SIZE + - ext2_inode->i_extra_isize > inode_size) { - ret = -EIO; - goto out; - } - if (ext2_inode->i_extra_isize != 0 && - EXT2_XATTR_IHDR(ext2_inode)->h_magic == - EXT2_EXT_ATTR_MAGIC) { - inline_ea = 1; - } - } - if (inline_ea) { - int total; - void *end = (void *)ext2_inode + inode_size; - entry = EXT2_XATTR_IFIRST(ext2_inode); - total = end - (void *)entry; - ret = ext2_xattr_check_names(entry, end); - if (ret) - goto out; - while (!EXT2_EXT_IS_LAST_ENTRY(entry)) { - ret = ext2_xattr_check_entry(entry, total); - if (ret) - goto out; - data = (void *)EXT2_XATTR_IFIRST(ext2_inode) + - entry->e_value_offs; - datalen = entry->e_value_size; - ret = copy_single_xattr(trans, root, objectid, - entry, data, datalen); - if (ret) - goto out; - entry = EXT2_EXT_ATTR_NEXT(entry); - } - } + cache = lookup_cache_extent(tree, start, len); + if (!cache) + return 0; - if (ext2_inode->i_file_acl == 0) - goto out; + if (start <= cache->start) { + /* + * |--------cache---------| + * |-wipe-| + */ + BUG_ON(start + len <= cache->start); - buffer = malloc(block_size); - if (!buffer) { - ret = -ENOMEM; - goto out; - } - err = ext2fs_read_ext_attr(ext2_fs, ext2_inode->i_file_acl, buffer); - if (err) { - fprintf(stderr, "ext2fs_read_ext_attr: %s\n", - error_message(err)); - ret = -1; - goto out; - } - ret = ext2_xattr_check_block(buffer, block_size); - if (ret) - goto out; + /* + * The wipe size is smaller than min_stripe_size / 2, + * so the result length should still meet min_stripe_size + * And no need to do alignment + */ + cache->size -= (start + len - cache->start); + if (cache->size == 0) { + remove_cache_extent(tree, cache); + free(cache); + return 0; + } - entry = EXT2_XATTR_BFIRST(buffer); - while (!EXT2_EXT_IS_LAST_ENTRY(entry)) { - ret = ext2_xattr_check_entry(entry, block_size); - if (ret) - goto out; - data = buffer + entry->e_value_offs; - datalen = entry->e_value_size; - ret = copy_single_xattr(trans, root, objectid, - entry, data, datalen); - if (ret) - goto out; - entry = EXT2_EXT_ATTR_NEXT(entry); - } -out: - free(buffer); - if ((void *)ext2_inode != inode_buf) - free(ext2_inode); - return ret; -} -#define MINORBITS 20 -#define MKDEV(ma, mi) (((ma) << MINORBITS) | (mi)) + BUG_ON(ensure_size && cache->size < min_stripe_size); -static inline dev_t old_decode_dev(u16 val) -{ - return MKDEV((val >> 8) & 255, val & 255); -} + cache->start = start + len; + return 0; + } else if (start > cache->start && start + len < cache->start + + cache->size) { + /* + * |-------cache-----| + * |-wipe-| + */ + u64 old_start = cache->start; + u64 old_len = cache->size; + u64 insert_start = start + len; + u64 insert_len; -static inline dev_t new_decode_dev(u32 dev) -{ - unsigned major = (dev & 0xfff00) >> 8; - unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00); - return MKDEV(major, minor); -} + cache->size = start - cache->start; + /* Expand the leading half part if needed */ + if (ensure_size && cache->size < min_stripe_size) { + ret = _expand_extent_cache(tree, cache, + min_stripe_size, 1); + if (ret < 0) + return ret; + } -static int copy_inode_item(struct btrfs_inode_item *dst, - struct ext2_inode *src, u32 blocksize) -{ - btrfs_set_stack_inode_generation(dst, 1); - btrfs_set_stack_inode_sequence(dst, 0); - btrfs_set_stack_inode_transid(dst, 1); - btrfs_set_stack_inode_size(dst, src->i_size); - btrfs_set_stack_inode_nbytes(dst, 0); - btrfs_set_stack_inode_block_group(dst, 0); - btrfs_set_stack_inode_nlink(dst, src->i_links_count); - btrfs_set_stack_inode_uid(dst, src->i_uid | (src->i_uid_high << 16)); - btrfs_set_stack_inode_gid(dst, src->i_gid | (src->i_gid_high << 16)); - btrfs_set_stack_inode_mode(dst, src->i_mode); - btrfs_set_stack_inode_rdev(dst, 0); - btrfs_set_stack_inode_flags(dst, 0); - btrfs_set_stack_timespec_sec(&dst->atime, src->i_atime); - btrfs_set_stack_timespec_nsec(&dst->atime, 0); - btrfs_set_stack_timespec_sec(&dst->ctime, src->i_ctime); - btrfs_set_stack_timespec_nsec(&dst->ctime, 0); - btrfs_set_stack_timespec_sec(&dst->mtime, src->i_mtime); - btrfs_set_stack_timespec_nsec(&dst->mtime, 0); - btrfs_set_stack_timespec_sec(&dst->otime, 0); - btrfs_set_stack_timespec_nsec(&dst->otime, 0); + /* And insert the new one */ + insert_len = old_start + old_len - start - len; + ret = add_merge_cache_extent(tree, insert_start, insert_len); + if (ret < 0) + return ret; - if (S_ISDIR(src->i_mode)) { - btrfs_set_stack_inode_size(dst, 0); - btrfs_set_stack_inode_nlink(dst, 1); - } - if (S_ISREG(src->i_mode)) { - btrfs_set_stack_inode_size(dst, (u64)src->i_size_high << 32 | - (u64)src->i_size); - } - if (!S_ISREG(src->i_mode) && !S_ISDIR(src->i_mode) && - !S_ISLNK(src->i_mode)) { - if (src->i_block[0]) { - btrfs_set_stack_inode_rdev(dst, - old_decode_dev(src->i_block[0])); - } else { - btrfs_set_stack_inode_rdev(dst, - new_decode_dev(src->i_block[1])); + /* Expand the last half part if needed */ + if (ensure_size && insert_len < min_stripe_size) { + cache = lookup_cache_extent(tree, insert_start, + insert_len); + if (!cache || cache->start != insert_start || + cache->size != insert_len) + return -ENOENT; + ret = _expand_extent_cache(tree, cache, + min_stripe_size, 0); } - } - memset(&dst->reserved, 0, sizeof(dst->reserved)); + return ret; + } + /* + * |----cache-----| + * |--wipe-| + * Wipe len should be small enough and no need to expand the + * remaining extent + */ + cache->size = start - cache->start; + BUG_ON(ensure_size && cache->size < min_stripe_size); return 0; } /* - * copy a single inode. do all the required works, such as cloning - * inode item, creating file extents and creating directory entries. - */ -static int copy_single_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 objectid, - ext2_filsys ext2_fs, ext2_ino_t ext2_ino, - struct ext2_inode *ext2_inode, - int datacsum, int packing, int noxattr) -{ + * Remove reserved ranges from given cache_tree + * + * It will remove the following ranges + * 1) 0~1M + * 2) 2nd superblock, +64K (make sure chunks are 64K aligned) + * 3) 3rd superblock, +64K + * + * @min_stripe must be given for safety check + * and if @ensure_size is given, it will ensure affected cache_extent will be + * larger than min_stripe_size + */ +static int wipe_reserved_ranges(struct cache_tree *tree, u64 min_stripe_size, + int ensure_size) +{ int ret; - struct btrfs_inode_item btrfs_inode; - if (ext2_inode->i_links_count == 0) - return 0; + ret = wipe_one_reserved_range(tree, 0, 1024 * 1024, min_stripe_size, + ensure_size); + if (ret < 0) + return ret; + ret = wipe_one_reserved_range(tree, btrfs_sb_offset(1), + BTRFS_STRIPE_LEN, min_stripe_size, ensure_size); + if (ret < 0) + return ret; + ret = wipe_one_reserved_range(tree, btrfs_sb_offset(2), + BTRFS_STRIPE_LEN, min_stripe_size, ensure_size); + return ret; +} - copy_inode_item(&btrfs_inode, ext2_inode, ext2_fs->blocksize); - if (!datacsum && S_ISREG(ext2_inode->i_mode)) { - u32 flags = btrfs_stack_inode_flags(&btrfs_inode) | - BTRFS_INODE_NODATASUM; - btrfs_set_stack_inode_flags(&btrfs_inode, flags); +static int calculate_available_space(struct btrfs_convert_context *cctx) +{ + struct cache_tree *used = &cctx->used; + struct cache_tree *data_chunks = &cctx->data_chunks; + struct cache_tree *free = &cctx->free; + struct cache_extent *cache; + u64 cur_off = 0; + /* + * Twice the minimal chunk size, to allow later wipe_reserved_ranges() + * works without need to consider overlap + */ + u64 min_stripe_size = 2 * 16 * 1024 * 1024; + int ret; + + /* Calculate data_chunks */ + for (cache = first_cache_extent(used); cache; + cache = next_cache_extent(cache)) { + u64 cur_len; + + if (cache->start + cache->size < cur_off) + continue; + if (cache->start > cur_off + min_stripe_size) + cur_off = cache->start; + cur_len = max(cache->start + cache->size - cur_off, + min_stripe_size); + ret = add_merge_cache_extent(data_chunks, cur_off, cur_len); + if (ret < 0) + goto out; + cur_off += cur_len; } + /* + * remove reserved ranges, so we won't ever bother relocating an old + * filesystem extent to other place. + */ + ret = wipe_reserved_ranges(data_chunks, min_stripe_size, 1); + if (ret < 0) + goto out; - switch (ext2_inode->i_mode & S_IFMT) { - case S_IFREG: - ret = create_file_extents(trans, root, objectid, &btrfs_inode, - ext2_fs, ext2_ino, datacsum, packing); - break; - case S_IFDIR: - ret = create_dir_entries(trans, root, objectid, &btrfs_inode, - ext2_fs, ext2_ino); - break; - case S_IFLNK: - ret = create_symbol_link(trans, root, objectid, &btrfs_inode, - ext2_fs, ext2_ino, ext2_inode); - break; - default: - ret = 0; - break; + cur_off = 0; + /* + * Calculate free space + * Always round up the start bytenr, to avoid metadata extent corss + * stripe boundary, as later mkfs_convert() won't have all the extent + * allocation check + */ + for (cache = first_cache_extent(data_chunks); cache; + cache = next_cache_extent(cache)) { + if (cache->start < cur_off) + continue; + if (cache->start > cur_off) { + u64 insert_start; + u64 len; + + len = cache->start - round_up(cur_off, + BTRFS_STRIPE_LEN); + insert_start = round_up(cur_off, BTRFS_STRIPE_LEN); + + ret = add_merge_cache_extent(free, insert_start, len); + if (ret < 0) + goto out; + } + cur_off = cache->start + cache->size; } - if (ret) - return ret; + /* Don't forget the last range */ + if (cctx->total_bytes > cur_off) { + u64 len = cctx->total_bytes - cur_off; + u64 insert_start; - if (!noxattr) { - ret = copy_extended_attrs(trans, root, objectid, &btrfs_inode, - ext2_fs, ext2_ino); - if (ret) - return ret; + insert_start = round_up(cur_off, BTRFS_STRIPE_LEN); + + ret = add_merge_cache_extent(free, insert_start, len); + if (ret < 0) + goto out; } - return btrfs_insert_inode(trans, root, objectid, &btrfs_inode); + + /* Remove reserved bytes */ + ret = wipe_reserved_ranges(free, min_stripe_size, 0); +out: + return ret; } /* - * scan ext2's inode bitmap and copy all used inodes. + * Read used space, and since we have the used space, + * calcuate data_chunks and free for later mkfs */ -static int ext2_copy_inodes(struct btrfs_convert_context *cctx, - struct btrfs_root *root, - int datacsum, int packing, int noxattr, struct task_ctx *p) +static int convert_read_used_space(struct btrfs_convert_context *cctx) { - ext2_filsys ext2_fs = cctx->fs_data; int ret; - errcode_t err; - ext2_inode_scan ext2_scan; - struct ext2_inode ext2_inode; - ext2_ino_t ext2_ino; - u64 objectid; - struct btrfs_trans_handle *trans; - trans = btrfs_start_transaction(root, 1); - if (!trans) - return -ENOMEM; - err = ext2fs_open_inode_scan(ext2_fs, 0, &ext2_scan); - if (err) { - fprintf(stderr, "ext2fs_open_inode_scan: %s\n", error_message(err)); - return -1; - } - while (!(err = ext2fs_get_next_inode(ext2_scan, &ext2_ino, - &ext2_inode))) { - /* no more inodes */ - if (ext2_ino == 0) - break; - /* skip special inode in ext2fs */ - if (ext2_ino < EXT2_GOOD_OLD_FIRST_INO && - ext2_ino != EXT2_ROOT_INO) - continue; - objectid = ext2_ino + INO_OFFSET; - ret = copy_single_inode(trans, root, - objectid, ext2_fs, ext2_ino, - &ext2_inode, datacsum, packing, - noxattr); - p->cur_copy_inodes++; - if (ret) - return ret; - if (trans->blocks_used >= 4096) { - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); - } - } - if (err) { - fprintf(stderr, "ext2fs_get_next_inode: %s\n", error_message(err)); - return -1; - } - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); - ext2fs_close_inode_scan(ext2_scan); + ret = cctx->convert_ops->read_used_space(cctx); + if (ret) + return ret; + ret = calculate_available_space(cctx); return ret; } -static int create_image_file_range(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct cache_tree *used, - struct btrfs_inode_item *inode, - u64 ino, u64 bytenr, u64 *ret_len, - int datacsum) +/* + * Create the fs image file of old filesystem. + * + * This is completely fs independent as we have cctx->used, only + * need to create file extents pointing to all the positions. + */ +static int create_image(struct btrfs_root *root, + struct btrfs_mkfs_config *cfg, + struct btrfs_convert_context *cctx, int fd, + u64 size, char *name, int datacsum) { + struct btrfs_inode_item buf; + struct btrfs_trans_handle *trans; + struct btrfs_path *path = NULL; + struct btrfs_key key; struct cache_extent *cache; - struct btrfs_block_group_cache *bg_cache; - u64 len = *ret_len; - u64 disk_bytenr; - int i; + struct cache_tree used_tmp; + u64 cur; + u64 ino; + u64 flags = BTRFS_INODE_READONLY; int ret; - BUG_ON(bytenr != round_down(bytenr, root->sectorsize)); - BUG_ON(len != round_down(len, root->sectorsize)); - len = min_t(u64, len, BTRFS_MAX_EXTENT_SIZE); + if (!datacsum) + flags |= BTRFS_INODE_NODATASUM; - /* - * Skip sb ranges first - * [0, 1M), [sb_offset(1), +64K), [sb_offset(2), +64K]. - * - * Or we will insert a hole into current image file, and later - * migrate block will fail as there is already a file extent. - */ - if (bytenr < 1024 * 1024) { - *ret_len = 1024 * 1024 - bytenr; - return 0; - } - for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { - u64 cur = btrfs_sb_offset(i); + trans = btrfs_start_transaction(root, 1); + if (!trans) + return -ENOMEM; - if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) { - *ret_len = cur + BTRFS_STRIPE_LEN - bytenr; - return 0; - } + cache_tree_init(&used_tmp); + + ret = btrfs_find_free_objectid(trans, root, BTRFS_FIRST_FREE_OBJECTID, + &ino); + if (ret < 0) + goto out; + ret = btrfs_new_inode(trans, root, ino, 0400 | S_IFREG); + if (ret < 0) + goto out; + ret = btrfs_change_inode_flags(trans, root, ino, flags); + if (ret < 0) + goto out; + ret = btrfs_add_link(trans, root, ino, BTRFS_FIRST_FREE_OBJECTID, name, + strlen(name), BTRFS_FT_REG_FILE, NULL, 1); + if (ret < 0) + goto out; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; } - for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { - u64 cur = btrfs_sb_offset(i); - - /* - * |--reserved--| - * |----range-------| - * May still need to go through file extent inserts - */ - if (bytenr < cur && bytenr + len >= cur) { - len = min_t(u64, len, cur - bytenr); - break; - } - /* - * |--reserved--| - * |---range---| - * Drop out, no need to insert anything - */ - if (bytenr >= cur && bytenr < cur + BTRFS_STRIPE_LEN) { - *ret_len = cur + BTRFS_STRIPE_LEN - bytenr; - return 0; - } - } - - cache = search_cache_extent(used, bytenr); - if (cache) { - if (cache->start <= bytenr) { - /* - * |///////Used///////| - * |<--insert--->| - * bytenr - */ - len = min_t(u64, len, cache->start + cache->size - - bytenr); - disk_bytenr = bytenr; - } else { - /* - * |//Used//| - * |<-insert-->| - * bytenr - */ - len = min(len, cache->start - bytenr); - disk_bytenr = 0; - datacsum = 0; - } - } else { - /* - * |//Used//| |EOF - * |<-insert-->| - * bytenr - */ - disk_bytenr = 0; - datacsum = 0; - } - - if (disk_bytenr) { - /* Check if the range is in a data block group */ - bg_cache = btrfs_lookup_block_group(root->fs_info, bytenr); - if (!bg_cache) - return -ENOENT; - if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)) - return -EINVAL; - - /* The extent should never cross block group boundary */ - len = min_t(u64, len, bg_cache->key.objectid + - bg_cache->key.offset - bytenr); - } - - BUG_ON(len != round_down(len, root->sectorsize)); - ret = btrfs_record_file_extent(trans, root, ino, inode, bytenr, - disk_bytenr, len); - if (ret < 0) - return ret; - - if (datacsum) - ret = csum_disk_extent(trans, root, bytenr, len); - *ret_len = len; - return ret; -} - - -/* - * Relocate old fs data in one reserved ranges - * - * Since all old fs data in reserved range is not covered by any chunk nor - * data extent, we don't need to handle any reference but add new - * extent/reference, which makes codes more clear - */ -static int migrate_one_reserved_range(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct cache_tree *used, - struct btrfs_inode_item *inode, int fd, - u64 ino, u64 start, u64 len, int datacsum) -{ - u64 cur_off = start; - u64 cur_len = len; - u64 hole_start = start; - u64 hole_len; - struct cache_extent *cache; - struct btrfs_key key; - struct extent_buffer *eb; - int ret = 0; - - while (cur_off < start + len) { - cache = lookup_cache_extent(used, cur_off, cur_len); - if (!cache) - break; - cur_off = max(cache->start, cur_off); - cur_len = min(cache->start + cache->size, start + len) - - cur_off; - BUG_ON(cur_len < root->sectorsize); - - /* reserve extent for the data */ - ret = btrfs_reserve_extent(trans, root, cur_len, 0, 0, (u64)-1, - &key, 1); - if (ret < 0) - break; - - eb = malloc(sizeof(*eb) + cur_len); - if (!eb) { - ret = -ENOMEM; - break; - } - - ret = pread(fd, eb->data, cur_len, cur_off); - if (ret < cur_len) { - ret = (ret < 0 ? ret : -EIO); - free(eb); - break; - } - eb->start = key.objectid; - eb->len = key.offset; - - /* Write the data */ - ret = write_and_map_eb(trans, root, eb); - free(eb); - if (ret < 0) - break; - - /* Now handle extent item and file extent things */ - ret = btrfs_record_file_extent(trans, root, ino, inode, cur_off, - key.objectid, key.offset); - if (ret < 0) - break; - /* Finally, insert csum items */ - if (datacsum) - ret = csum_disk_extent(trans, root, key.objectid, - key.offset); - - /* Don't forget to insert hole */ - hole_len = cur_off - hole_start; - if (hole_len) { - ret = btrfs_record_file_extent(trans, root, ino, inode, - hole_start, 0, hole_len); - if (ret < 0) - break; - } - - cur_off += key.offset; - hole_start = cur_off; - cur_len = start + len - cur_off; - } - /* Last hole */ - if (start + len - hole_start > 0) - ret = btrfs_record_file_extent(trans, root, ino, inode, - hole_start, 0, start + len - hole_start); - return ret; -} - -/* - * Relocate the used ext2 data in reserved ranges - * [0,1M) - * [btrfs_sb_offset(1), +BTRFS_STRIPE_LEN) - * [btrfs_sb_offset(2), +BTRFS_STRIPE_LEN) - */ -static int migrate_reserved_ranges(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct cache_tree *used, - struct btrfs_inode_item *inode, int fd, - u64 ino, u64 total_bytes, int datacsum) -{ - u64 cur_off; - u64 cur_len; - int ret = 0; - - /* 0 ~ 1M */ - cur_off = 0; - cur_len = 1024 * 1024; - ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino, - cur_off, cur_len, datacsum); - if (ret < 0) - return ret; - - /* second sb(fisrt sb is included in 0~1M) */ - cur_off = btrfs_sb_offset(1); - cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off; - if (cur_off > total_bytes) - return ret; - ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino, - cur_off, cur_len, datacsum); - if (ret < 0) - return ret; - - /* Last sb */ - cur_off = btrfs_sb_offset(2); - cur_len = min(total_bytes, cur_off + BTRFS_STRIPE_LEN) - cur_off; - if (cur_off > total_bytes) - return ret; - ret = migrate_one_reserved_range(trans, root, used, inode, fd, ino, - cur_off, cur_len, datacsum); - return ret; -} - -static int wipe_reserved_ranges(struct cache_tree *tree, u64 min_stripe_size, - int ensure_size); - -/* - * Create the fs image file of old filesystem. - * - * This is completely fs independent as we have cctx->used, only - * need to create file extents pointing to all the positions. - */ -static int create_image(struct btrfs_root *root, - struct btrfs_mkfs_config *cfg, - struct btrfs_convert_context *cctx, int fd, - u64 size, char *name, int datacsum) -{ - struct btrfs_inode_item buf; - struct btrfs_trans_handle *trans; - struct btrfs_path *path = NULL; - struct btrfs_key key; - struct cache_extent *cache; - struct cache_tree used_tmp; - u64 cur; - u64 ino; - int ret; - - trans = btrfs_start_transaction(root, 1); - if (!trans) - return -ENOMEM; - - cache_tree_init(&used_tmp); - - ret = btrfs_find_free_objectid(trans, root, BTRFS_FIRST_FREE_OBJECTID, - &ino); - if (ret < 0) - goto out; - ret = btrfs_new_inode(trans, root, ino, 0600 | S_IFREG); - if (ret < 0) - goto out; - ret = btrfs_add_link(trans, root, ino, BTRFS_FIRST_FREE_OBJECTID, name, - strlen(name), BTRFS_FT_REG_FILE, NULL, 1); - if (ret < 0) - goto out; - - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - key.objectid = ino; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret) { @@ -1618,7 +1056,7 @@ return ret; } -static struct btrfs_root * link_subvol(struct btrfs_root *root, +static struct btrfs_root* link_subvol(struct btrfs_root *root, const char *base, u64 root_objectid) { struct btrfs_trans_handle *trans; @@ -1641,14 +1079,19 @@ return NULL; path = btrfs_alloc_path(); - BUG_ON(!path); - + if (!path) + return NULL; + key.objectid = dirid; key.type = BTRFS_DIR_INDEX_KEY; key.offset = (u64)-1; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - BUG_ON(ret <= 0); + if (ret <= 0) { + error("search for DIR_INDEX dirid %llu failed: %d", + (unsigned long long)dirid, ret); + goto fail; + } if (path->slots[0] > 0) { path->slots[0]--; @@ -1659,14 +1102,21 @@ btrfs_release_path(path); trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); + if (!trans) { + error("unable to start transaction"); + goto fail; + } key.objectid = dirid; key.offset = 0; key.type = BTRFS_INODE_ITEM_KEY; ret = btrfs_lookup_inode(trans, root, path, &key, 1); - BUG_ON(ret); + if (ret) { + error("search for INODE_ITEM %llu failed: %d", + (unsigned long long)dirid, ret); + goto fail; + } leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); @@ -1700,19 +1150,33 @@ BTRFS_ROOT_BACKREF_KEY, root->root_key.objectid, dirid, index, buf, len); - BUG_ON(ret); + if (ret) { + error("unable to add root backref for %llu: %d", + root->root_key.objectid, ret); + goto fail; + } /* now add the forward ref */ ret = btrfs_add_root_ref(trans, tree_root, root->root_key.objectid, BTRFS_ROOT_REF_KEY, root_objectid, dirid, index, buf, len); + if (ret) { + error("unable to add root ref for %llu: %d", + root->root_key.objectid, ret); + goto fail; + } ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); + if (ret) { + error("transaction commit failed: %d", ret); + goto fail; + } new_root = btrfs_read_fs_root(fs_info, &key); - if (IS_ERR(new_root)) + if (IS_ERR(new_root)) { + error("unable to fs read root: %lu", PTR_ERR(new_root)); new_root = NULL; + } fail: btrfs_free_path(path); return new_root; @@ -1729,7 +1193,8 @@ ret = btrfs_copy_root(trans, root, root->node, &tmp, root_objectid); - BUG_ON(ret); + if (ret) + return ret; memcpy(&root_item, &root->root_item, sizeof(root_item)); btrfs_set_root_bytenr(&root_item, tmp->start); @@ -1745,12 +1210,14 @@ key.offset = (u64)-1; new_root = btrfs_read_fs_root(root->fs_info, &key); - BUG_ON(!new_root || IS_ERR(new_root)); + if (!new_root || IS_ERR(new_root)) { + error("unable to fs read root: %lu", PTR_ERR(new_root)); + return PTR_ERR(new_root); + } ret = btrfs_make_root_dir(trans, new_root, BTRFS_FIRST_FREE_OBJECTID); - BUG_ON(ret); - return 0; + return ret; } /* @@ -1831,7 +1298,11 @@ fs_info->avoid_sys_chunk_alloc = 1; fs_info->avoid_meta_chunk_alloc = 1; trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); + if (!trans) { + error("unable to start transaction"); + ret = -EINVAL; + goto err; + } ret = btrfs_fix_block_accounting(trans, root); if (ret) goto err; @@ -1859,12 +1330,16 @@ /* subvol for fs image file */ ret = create_subvol(trans, root, CONV_IMAGE_SUBVOL_OBJECTID); - if (ret < 0) + if (ret < 0) { + error("failed to create subvolume image root: %d", ret); goto err; + } /* subvol for data relocation tree */ ret = create_subvol(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID); - if (ret < 0) + if (ret < 0) { + error("failed to create DATA_RELOC root: %d", ret); goto err; + } ret = btrfs_commit_transaction(trans, root); fs_info->avoid_sys_chunk_alloc = 0; @@ -1884,7 +1359,6 @@ u32 len; u32 bytenr; - BUG_ON(sectorsize < sizeof(*super)); buf = malloc(sizeof(*buf) + sectorsize); if (!buf) return -ENOMEM; @@ -1958,316 +1432,891 @@ return 0; } -static const struct btrfs_convert_operations ext2_convert_ops = { - .name = "ext2", - .open_fs = ext2_open_fs, - .read_used_space = ext2_read_used_space, - .copy_inodes = ext2_copy_inodes, - .close_fs = ext2_close_fs, -}; - -static const struct btrfs_convert_operations *convert_operations[] = { - &ext2_convert_ops, -}; +#if BTRFSCONVERT_EXT2 -static int convert_open_fs(const char *devname, - struct btrfs_convert_context *cctx) +/* + * Open Ext2fs in readonly mode, read block allocation bitmap and + * inode bitmap into memory. + */ +static int ext2_open_fs(struct btrfs_convert_context *cctx, const char *name) { - int i; - - memset(cctx, 0, sizeof(*cctx)); + errcode_t ret; + ext2_filsys ext2_fs; + ext2_ino_t ino; + u32 ro_feature; - for (i = 0; i < ARRAY_SIZE(convert_operations); i++) { - int ret = convert_operations[i]->open_fs(cctx, devname); + ret = ext2fs_open(name, 0, 0, 0, unix_io_manager, &ext2_fs); + if (ret) { + fprintf(stderr, "ext2fs_open: %s\n", error_message(ret)); + return -1; + } + /* + * We need to know exactly the used space, some RO compat flags like + * BIGALLOC will affect how used space is present. + * So we need manuall check any unsupported RO compat flags + */ + ro_feature = ext2_fs->super->s_feature_ro_compat; + if (ro_feature & ~EXT2_LIB_FEATURE_RO_COMPAT_SUPP) { + error( +"unsupported RO features detected: %x, abort convert to avoid possible corruption", + ro_feature & ~EXT2_LIB_FEATURE_COMPAT_SUPP); + goto fail; + } + ret = ext2fs_read_inode_bitmap(ext2_fs); + if (ret) { + fprintf(stderr, "ext2fs_read_inode_bitmap: %s\n", + error_message(ret)); + goto fail; + } + ret = ext2fs_read_block_bitmap(ext2_fs); + if (ret) { + fprintf(stderr, "ext2fs_read_block_bitmap: %s\n", + error_message(ret)); + goto fail; + } + /* + * search each block group for a free inode. this set up + * uninit block/inode bitmaps appropriately. + */ + ino = 1; + while (ino <= ext2_fs->super->s_inodes_count) { + ext2_ino_t foo; + ext2fs_new_inode(ext2_fs, ino, 0, NULL, &foo); + ino += EXT2_INODES_PER_GROUP(ext2_fs->super); + } - if (ret == 0) { - cctx->convert_ops = convert_operations[i]; - return ret; - } + if (!(ext2_fs->super->s_feature_incompat & + EXT2_FEATURE_INCOMPAT_FILETYPE)) { + fprintf(stderr, "filetype feature is missing\n"); + goto fail; } - fprintf(stderr, "No file system found to convert.\n"); + cctx->fs_data = ext2_fs; + cctx->blocksize = ext2_fs->blocksize; + cctx->block_count = ext2_fs->super->s_blocks_count; + cctx->total_bytes = ext2_fs->blocksize * ext2_fs->super->s_blocks_count; + cctx->volume_name = strndup(ext2_fs->super->s_volume_name, 16); + cctx->first_data_block = ext2_fs->super->s_first_data_block; + cctx->inodes_count = ext2_fs->super->s_inodes_count; + cctx->free_inodes_count = ext2_fs->super->s_free_inodes_count; + return 0; +fail: + ext2fs_close(ext2_fs); return -1; } -/* - * Helper for expand and merge extent_cache for wipe_one_reserved_range() to - * handle wiping a range that exists in cache. - */ -static int _expand_extent_cache(struct cache_tree *tree, - struct cache_extent *entry, - u64 min_stripe_size, int backward) +static int __ext2_add_one_block(ext2_filsys fs, char *bitmap, + unsigned long group_nr, struct cache_tree *used) { - struct cache_extent *ce; - int diff; + unsigned long offset; + unsigned i; + int ret = 0; - if (entry->size >= min_stripe_size) - return 0; - diff = min_stripe_size - entry->size; + offset = fs->super->s_first_data_block; + offset /= EXT2FS_CLUSTER_RATIO(fs); + offset += group_nr * EXT2_CLUSTERS_PER_GROUP(fs->super); + for (i = 0; i < EXT2_CLUSTERS_PER_GROUP(fs->super); i++) { + if (ext2fs_test_bit(i, bitmap)) { + u64 start; - if (backward) { - ce = prev_cache_extent(entry); - if (!ce) - goto expand_back; - if (ce->start + ce->size >= entry->start - diff) { - /* Directly merge with previous extent */ - ce->size = entry->start + entry->size - ce->start; - remove_cache_extent(tree, entry); - free(entry); - return 0; - } -expand_back: - /* No overlap, normal extent */ - if (entry->start < diff) { - error("cannot find space for data chunk layout"); - return -ENOSPC; + start = (i + offset) * EXT2FS_CLUSTER_RATIO(fs); + start *= fs->blocksize; + ret = add_merge_cache_extent(used, start, + fs->blocksize); + if (ret < 0) + break; } - entry->start -= diff; - entry->size += diff; - return 0; - } - ce = next_cache_extent(entry); - if (!ce) - goto expand_after; - if (entry->start + entry->size + diff >= ce->start) { - /* Directly merge with next extent */ - entry->size = ce->start + ce->size - entry->start; - remove_cache_extent(tree, ce); - free(ce); - return 0; } -expand_after: - entry->size += diff; - return 0; + return ret; } /* - * Remove one reserve range from given cache tree - * if min_stripe_size is non-zero, it will ensure for split case, - * all its split cache extent is no smaller than @min_strip_size / 2. + * Read all used ext2 space into cctx->used cache tree */ -static int wipe_one_reserved_range(struct cache_tree *tree, - u64 start, u64 len, u64 min_stripe_size, - int ensure_size) +static int ext2_read_used_space(struct btrfs_convert_context *cctx) { - struct cache_extent *cache; - int ret; - - BUG_ON(ensure_size && min_stripe_size == 0); - /* - * The logical here is simplified to handle special cases only - * So we don't need to consider merge case for ensure_size - */ - BUG_ON(min_stripe_size && (min_stripe_size < len * 2 || - min_stripe_size / 2 < BTRFS_STRIPE_LEN)); + ext2_filsys fs = (ext2_filsys)cctx->fs_data; + blk64_t blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block); + struct cache_tree *used_tree = &cctx->used; + char *block_bitmap = NULL; + unsigned long i; + int block_nbytes; + int ret = 0; - /* Also, wipe range should already be aligned */ - BUG_ON(start != round_down(start, BTRFS_STRIPE_LEN) || - start + len != round_up(start + len, BTRFS_STRIPE_LEN)); + block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8; + /* Shouldn't happen */ + BUG_ON(!fs->block_map); - min_stripe_size /= 2; + block_bitmap = malloc(block_nbytes); + if (!block_bitmap) + return -ENOMEM; - cache = lookup_cache_extent(tree, start, len); - if (!cache) - return 0; + for (i = 0; i < fs->group_desc_count; i++) { + ret = ext2fs_get_block_bitmap_range(fs->block_map, blk_itr, + block_nbytes * 8, block_bitmap); + if (ret) { + error("fail to get bitmap from ext2, %s", + strerror(-ret)); + break; + } + ret = __ext2_add_one_block(fs, block_bitmap, i, used_tree); + if (ret < 0) { + error("fail to build used space tree, %s", + strerror(-ret)); + break; + } + blk_itr += EXT2_CLUSTERS_PER_GROUP(fs->super); + } - if (start <= cache->start) { - /* - * |--------cache---------| - * |-wipe-| - */ - BUG_ON(start + len <= cache->start); + free(block_bitmap); + return ret; +} - /* - * The wipe size is smaller than min_stripe_size / 2, - * so the result length should still meet min_stripe_size - * And no need to do alignment - */ - cache->size -= (start + len - cache->start); - if (cache->size == 0) { - remove_cache_extent(tree, cache); - free(cache); - return 0; +static void ext2_close_fs(struct btrfs_convert_context *cctx) +{ + if (cctx->volume_name) { + free(cctx->volume_name); + cctx->volume_name = NULL; + } + ext2fs_close(cctx->fs_data); +} + +struct dir_iterate_data { + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + struct btrfs_inode_item *inode; + u64 objectid; + u64 index_cnt; + u64 parent; + int errcode; +}; + +static u8 ext2_filetype_conversion_table[EXT2_FT_MAX] = { + [EXT2_FT_UNKNOWN] = BTRFS_FT_UNKNOWN, + [EXT2_FT_REG_FILE] = BTRFS_FT_REG_FILE, + [EXT2_FT_DIR] = BTRFS_FT_DIR, + [EXT2_FT_CHRDEV] = BTRFS_FT_CHRDEV, + [EXT2_FT_BLKDEV] = BTRFS_FT_BLKDEV, + [EXT2_FT_FIFO] = BTRFS_FT_FIFO, + [EXT2_FT_SOCK] = BTRFS_FT_SOCK, + [EXT2_FT_SYMLINK] = BTRFS_FT_SYMLINK, +}; + +static int ext2_dir_iterate_proc(ext2_ino_t dir, int entry, + struct ext2_dir_entry *dirent, + int offset, int blocksize, + char *buf,void *priv_data) +{ + int ret; + int file_type; + u64 objectid; + char dotdot[] = ".."; + struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data; + int name_len; + + name_len = dirent->name_len & 0xFF; + + objectid = dirent->inode + INO_OFFSET; + if (!strncmp(dirent->name, dotdot, name_len)) { + if (name_len == 2) { + BUG_ON(idata->parent != 0); + idata->parent = objectid; + } + return 0; + } + if (dirent->inode < EXT2_GOOD_OLD_FIRST_INO) + return 0; + + file_type = dirent->name_len >> 8; + BUG_ON(file_type > EXT2_FT_SYMLINK); + + ret = convert_insert_dirent(idata->trans, idata->root, dirent->name, + name_len, idata->objectid, objectid, + ext2_filetype_conversion_table[file_type], + idata->index_cnt, idata->inode); + if (ret < 0) { + idata->errcode = ret; + return BLOCK_ABORT; + } + + idata->index_cnt++; + return 0; +} + +static int ext2_create_dir_entries(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *btrfs_inode, + ext2_filsys ext2_fs, ext2_ino_t ext2_ino) +{ + int ret; + errcode_t err; + struct dir_iterate_data data = { + .trans = trans, + .root = root, + .inode = btrfs_inode, + .objectid = objectid, + .index_cnt = 2, + .parent = 0, + .errcode = 0, + }; + + err = ext2fs_dir_iterate2(ext2_fs, ext2_ino, 0, NULL, + ext2_dir_iterate_proc, &data); + if (err) + goto error; + ret = data.errcode; + if (ret == 0 && data.parent == objectid) { + ret = btrfs_insert_inode_ref(trans, root, "..", 2, + objectid, objectid, 0); + } + return ret; +error: + fprintf(stderr, "ext2fs_dir_iterate2: %s\n", error_message(err)); + return -1; +} + +static int ext2_block_iterate_proc(ext2_filsys fs, blk_t *blocknr, + e2_blkcnt_t blockcnt, blk_t ref_block, + int ref_offset, void *priv_data) +{ + int ret; + struct blk_iterate_data *idata; + idata = (struct blk_iterate_data *)priv_data; + ret = block_iterate_proc(*blocknr, blockcnt, idata); + if (ret) { + idata->errcode = ret; + return BLOCK_ABORT; + } + return 0; +} + +/* + * traverse file's data blocks, record these data blocks as file extents. + */ +static int ext2_create_file_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *btrfs_inode, + ext2_filsys ext2_fs, ext2_ino_t ext2_ino, + int datacsum, int packing) +{ + int ret; + char *buffer = NULL; + errcode_t err; + u32 last_block; + u32 sectorsize = root->sectorsize; + u64 inode_size = btrfs_stack_inode_size(btrfs_inode); + struct blk_iterate_data data; + + init_blk_iterate_data(&data, trans, root, btrfs_inode, objectid, + datacsum); + + err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY, + NULL, ext2_block_iterate_proc, &data); + if (err) + goto error; + ret = data.errcode; + if (ret) + goto fail; + if (packing && data.first_block == 0 && data.num_blocks > 0 && + inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) { + u64 num_bytes = data.num_blocks * sectorsize; + u64 disk_bytenr = data.disk_block * sectorsize; + u64 nbytes; + + buffer = malloc(num_bytes); + if (!buffer) + return -ENOMEM; + ret = read_disk_extent(root, disk_bytenr, num_bytes, buffer); + if (ret) + goto fail; + if (num_bytes > inode_size) + num_bytes = inode_size; + ret = btrfs_insert_inline_extent(trans, root, objectid, + 0, buffer, num_bytes); + if (ret) + goto fail; + nbytes = btrfs_stack_inode_nbytes(btrfs_inode) + num_bytes; + btrfs_set_stack_inode_nbytes(btrfs_inode, nbytes); + } else if (data.num_blocks > 0) { + ret = record_file_blocks(&data, data.first_block, + data.disk_block, data.num_blocks); + if (ret) + goto fail; + } + data.first_block += data.num_blocks; + last_block = (inode_size + sectorsize - 1) / sectorsize; + if (last_block > data.first_block) { + ret = record_file_blocks(&data, data.first_block, 0, + last_block - data.first_block); + } +fail: + free(buffer); + return ret; +error: + fprintf(stderr, "ext2fs_block_iterate2: %s\n", error_message(err)); + return -1; +} + +static int ext2_create_symbol_link(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *btrfs_inode, + ext2_filsys ext2_fs, ext2_ino_t ext2_ino, + struct ext2_inode *ext2_inode) +{ + int ret; + char *pathname; + u64 inode_size = btrfs_stack_inode_size(btrfs_inode); + if (ext2fs_inode_data_blocks(ext2_fs, ext2_inode)) { + btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1); + ret = ext2_create_file_extents(trans, root, objectid, + btrfs_inode, ext2_fs, ext2_ino, 1, 1); + btrfs_set_stack_inode_size(btrfs_inode, inode_size); + return ret; + } + + pathname = (char *)&(ext2_inode->i_block[0]); + BUG_ON(pathname[inode_size] != 0); + ret = btrfs_insert_inline_extent(trans, root, objectid, 0, + pathname, inode_size + 1); + btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1); + return ret; +} + +/* + * Following xattr/acl related codes are based on codes in + * fs/ext3/xattr.c and fs/ext3/acl.c + */ +#define EXT2_XATTR_BHDR(ptr) ((struct ext2_ext_attr_header *)(ptr)) +#define EXT2_XATTR_BFIRST(ptr) \ + ((struct ext2_ext_attr_entry *)(EXT2_XATTR_BHDR(ptr) + 1)) +#define EXT2_XATTR_IHDR(inode) \ + ((struct ext2_ext_attr_header *) ((void *)(inode) + \ + EXT2_GOOD_OLD_INODE_SIZE + (inode)->i_extra_isize)) +#define EXT2_XATTR_IFIRST(inode) \ + ((struct ext2_ext_attr_entry *) ((void *)EXT2_XATTR_IHDR(inode) + \ + sizeof(EXT2_XATTR_IHDR(inode)->h_magic))) + +static int ext2_xattr_check_names(struct ext2_ext_attr_entry *entry, + const void *end) +{ + struct ext2_ext_attr_entry *next; + + while (!EXT2_EXT_IS_LAST_ENTRY(entry)) { + next = EXT2_EXT_ATTR_NEXT(entry); + if ((void *)next >= end) + return -EIO; + entry = next; + } + return 0; +} + +static int ext2_xattr_check_block(const char *buf, size_t size) +{ + int error; + struct ext2_ext_attr_header *header = EXT2_XATTR_BHDR(buf); + + if (header->h_magic != EXT2_EXT_ATTR_MAGIC || + header->h_blocks != 1) + return -EIO; + error = ext2_xattr_check_names(EXT2_XATTR_BFIRST(buf), buf + size); + return error; +} + +static int ext2_xattr_check_entry(struct ext2_ext_attr_entry *entry, + size_t size) +{ + size_t value_size = entry->e_value_size; + + if (entry->e_value_block != 0 || value_size > size || + entry->e_value_offs + value_size > size) + return -EIO; + return 0; +} + +#define EXT2_ACL_VERSION 0x0001 + +/* 23.2.5 acl_tag_t values */ + +#define ACL_UNDEFINED_TAG (0x00) +#define ACL_USER_OBJ (0x01) +#define ACL_USER (0x02) +#define ACL_GROUP_OBJ (0x04) +#define ACL_GROUP (0x08) +#define ACL_MASK (0x10) +#define ACL_OTHER (0x20) + +/* 23.2.7 ACL qualifier constants */ + +#define ACL_UNDEFINED_ID ((id_t)-1) + +typedef struct { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +} ext2_acl_entry; + +typedef struct { + __le16 e_tag; + __le16 e_perm; +} ext2_acl_entry_short; + +typedef struct { + __le32 a_version; +} ext2_acl_header; + +static inline int ext2_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(ext2_acl_header); + s = size - 4 * sizeof(ext2_acl_entry_short); + if (s < 0) { + if (size % sizeof(ext2_acl_entry_short)) + return -1; + return size / sizeof(ext2_acl_entry_short); + } else { + if (s % sizeof(ext2_acl_entry)) + return -1; + return s / sizeof(ext2_acl_entry) + 4; + } +} + +#define ACL_EA_VERSION 0x0002 + +typedef struct { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +} acl_ea_entry; + +typedef struct { + __le32 a_version; + acl_ea_entry a_entries[0]; +} acl_ea_header; + +static inline size_t acl_ea_size(int count) +{ + return sizeof(acl_ea_header) + count * sizeof(acl_ea_entry); +} + +static int ext2_acl_to_xattr(void *dst, const void *src, + size_t dst_size, size_t src_size) +{ + int i, count; + const void *end = src + src_size; + acl_ea_header *ext_acl = (acl_ea_header *)dst; + acl_ea_entry *dst_entry = ext_acl->a_entries; + ext2_acl_entry *src_entry; + + if (src_size < sizeof(ext2_acl_header)) + goto fail; + if (((ext2_acl_header *)src)->a_version != + cpu_to_le32(EXT2_ACL_VERSION)) + goto fail; + src += sizeof(ext2_acl_header); + count = ext2_acl_count(src_size); + if (count <= 0) + goto fail; + + BUG_ON(dst_size < acl_ea_size(count)); + ext_acl->a_version = cpu_to_le32(ACL_EA_VERSION); + for (i = 0; i < count; i++, dst_entry++) { + src_entry = (ext2_acl_entry *)src; + if (src + sizeof(ext2_acl_entry_short) > end) + goto fail; + dst_entry->e_tag = src_entry->e_tag; + dst_entry->e_perm = src_entry->e_perm; + switch (le16_to_cpu(src_entry->e_tag)) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + src += sizeof(ext2_acl_entry_short); + dst_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); + break; + case ACL_USER: + case ACL_GROUP: + src += sizeof(ext2_acl_entry); + if (src > end) + goto fail; + dst_entry->e_id = src_entry->e_id; + break; + default: + goto fail; } + } + if (src != end) + goto fail; + return 0; +fail: + return -EINVAL; +} - BUG_ON(ensure_size && cache->size < min_stripe_size); +static char *xattr_prefix_table[] = { + [1] = "user.", + [2] = "system.posix_acl_access", + [3] = "system.posix_acl_default", + [4] = "trusted.", + [6] = "security.", +}; - cache->start = start + len; - return 0; - } else if (start > cache->start && start + len < cache->start + - cache->size) { - /* - * |-------cache-----| - * |-wipe-| - */ - u64 old_start = cache->start; - u64 old_len = cache->size; - u64 insert_start = start + len; - u64 insert_len; +static int ext2_copy_single_xattr(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct ext2_ext_attr_entry *entry, + const void *data, u32 datalen) +{ + int ret = 0; + int name_len; + int name_index; + void *databuf = NULL; + char namebuf[XATTR_NAME_MAX + 1]; - cache->size = start - cache->start; - /* Expand the leading half part if needed */ - if (ensure_size && cache->size < min_stripe_size) { - ret = _expand_extent_cache(tree, cache, - min_stripe_size, 1); - if (ret < 0) - return ret; - } + name_index = entry->e_name_index; + if (name_index >= ARRAY_SIZE(xattr_prefix_table) || + xattr_prefix_table[name_index] == NULL) + return -EOPNOTSUPP; + name_len = strlen(xattr_prefix_table[name_index]) + + entry->e_name_len; + if (name_len >= sizeof(namebuf)) + return -ERANGE; - /* And insert the new one */ - insert_len = old_start + old_len - start - len; - ret = add_merge_cache_extent(tree, insert_start, insert_len); - if (ret < 0) - return ret; + if (name_index == 2 || name_index == 3) { + size_t bufsize = acl_ea_size(ext2_acl_count(datalen)); + databuf = malloc(bufsize); + if (!databuf) + return -ENOMEM; + ret = ext2_acl_to_xattr(databuf, data, bufsize, datalen); + if (ret) + goto out; + data = databuf; + datalen = bufsize; + } + strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX); + strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len); + if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) - + sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) { + fprintf(stderr, "skip large xattr on inode %Lu name %.*s\n", + objectid - INO_OFFSET, name_len, namebuf); + goto out; + } + ret = btrfs_insert_xattr_item(trans, root, namebuf, name_len, + data, datalen, objectid); +out: + free(databuf); + return ret; +} - /* Expand the last half part if needed */ - if (ensure_size && insert_len < min_stripe_size) { - cache = lookup_cache_extent(tree, insert_start, - insert_len); - if (!cache || cache->start != insert_start || - cache->size != insert_len) - return -ENOENT; - ret = _expand_extent_cache(tree, cache, - min_stripe_size, 0); +static int ext2_copy_extended_attrs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *btrfs_inode, + ext2_filsys ext2_fs, ext2_ino_t ext2_ino) +{ + int ret = 0; + int inline_ea = 0; + errcode_t err; + u32 datalen; + u32 block_size = ext2_fs->blocksize; + u32 inode_size = EXT2_INODE_SIZE(ext2_fs->super); + struct ext2_inode_large *ext2_inode; + struct ext2_ext_attr_entry *entry; + void *data; + char *buffer = NULL; + char inode_buf[EXT2_GOOD_OLD_INODE_SIZE]; + + if (inode_size <= EXT2_GOOD_OLD_INODE_SIZE) { + ext2_inode = (struct ext2_inode_large *)inode_buf; + } else { + ext2_inode = (struct ext2_inode_large *)malloc(inode_size); + if (!ext2_inode) + return -ENOMEM; + } + err = ext2fs_read_inode_full(ext2_fs, ext2_ino, (void *)ext2_inode, + inode_size); + if (err) { + fprintf(stderr, "ext2fs_read_inode_full: %s\n", + error_message(err)); + ret = -1; + goto out; + } + + if (ext2_ino > ext2_fs->super->s_first_ino && + inode_size > EXT2_GOOD_OLD_INODE_SIZE) { + if (EXT2_GOOD_OLD_INODE_SIZE + + ext2_inode->i_extra_isize > inode_size) { + ret = -EIO; + goto out; + } + if (ext2_inode->i_extra_isize != 0 && + EXT2_XATTR_IHDR(ext2_inode)->h_magic == + EXT2_EXT_ATTR_MAGIC) { + inline_ea = 1; + } + } + if (inline_ea) { + int total; + void *end = (void *)ext2_inode + inode_size; + entry = EXT2_XATTR_IFIRST(ext2_inode); + total = end - (void *)entry; + ret = ext2_xattr_check_names(entry, end); + if (ret) + goto out; + while (!EXT2_EXT_IS_LAST_ENTRY(entry)) { + ret = ext2_xattr_check_entry(entry, total); + if (ret) + goto out; + data = (void *)EXT2_XATTR_IFIRST(ext2_inode) + + entry->e_value_offs; + datalen = entry->e_value_size; + ret = ext2_copy_single_xattr(trans, root, objectid, + entry, data, datalen); + if (ret) + goto out; + entry = EXT2_EXT_ATTR_NEXT(entry); } + } + + if (ext2_inode->i_file_acl == 0) + goto out; + + buffer = malloc(block_size); + if (!buffer) { + ret = -ENOMEM; + goto out; + } + err = ext2fs_read_ext_attr(ext2_fs, ext2_inode->i_file_acl, buffer); + if (err) { + fprintf(stderr, "ext2fs_read_ext_attr: %s\n", + error_message(err)); + ret = -1; + goto out; + } + ret = ext2_xattr_check_block(buffer, block_size); + if (ret) + goto out; + + entry = EXT2_XATTR_BFIRST(buffer); + while (!EXT2_EXT_IS_LAST_ENTRY(entry)) { + ret = ext2_xattr_check_entry(entry, block_size); + if (ret) + goto out; + data = buffer + entry->e_value_offs; + datalen = entry->e_value_size; + ret = ext2_copy_single_xattr(trans, root, objectid, + entry, data, datalen); + if (ret) + goto out; + entry = EXT2_EXT_ATTR_NEXT(entry); + } +out: + free(buffer); + if ((void *)ext2_inode != inode_buf) + free(ext2_inode); + return ret; +} +#define MINORBITS 20 +#define MKDEV(ma, mi) (((ma) << MINORBITS) | (mi)) + +static inline dev_t old_decode_dev(u16 val) +{ + return MKDEV((val >> 8) & 255, val & 255); +} + +static inline dev_t new_decode_dev(u32 dev) +{ + unsigned major = (dev & 0xfff00) >> 8; + unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00); + return MKDEV(major, minor); +} + +static void ext2_copy_inode_item(struct btrfs_inode_item *dst, + struct ext2_inode *src, u32 blocksize) +{ + btrfs_set_stack_inode_generation(dst, 1); + btrfs_set_stack_inode_sequence(dst, 0); + btrfs_set_stack_inode_transid(dst, 1); + btrfs_set_stack_inode_size(dst, src->i_size); + btrfs_set_stack_inode_nbytes(dst, 0); + btrfs_set_stack_inode_block_group(dst, 0); + btrfs_set_stack_inode_nlink(dst, src->i_links_count); + btrfs_set_stack_inode_uid(dst, src->i_uid | (src->i_uid_high << 16)); + btrfs_set_stack_inode_gid(dst, src->i_gid | (src->i_gid_high << 16)); + btrfs_set_stack_inode_mode(dst, src->i_mode); + btrfs_set_stack_inode_rdev(dst, 0); + btrfs_set_stack_inode_flags(dst, 0); + btrfs_set_stack_timespec_sec(&dst->atime, src->i_atime); + btrfs_set_stack_timespec_nsec(&dst->atime, 0); + btrfs_set_stack_timespec_sec(&dst->ctime, src->i_ctime); + btrfs_set_stack_timespec_nsec(&dst->ctime, 0); + btrfs_set_stack_timespec_sec(&dst->mtime, src->i_mtime); + btrfs_set_stack_timespec_nsec(&dst->mtime, 0); + btrfs_set_stack_timespec_sec(&dst->otime, 0); + btrfs_set_stack_timespec_nsec(&dst->otime, 0); - return ret; + if (S_ISDIR(src->i_mode)) { + btrfs_set_stack_inode_size(dst, 0); + btrfs_set_stack_inode_nlink(dst, 1); } - /* - * |----cache-----| - * |--wipe-| - * Wipe len should be small enough and no need to expand the - * remaining extent - */ - cache->size = start - cache->start; - BUG_ON(ensure_size && cache->size < min_stripe_size); - return 0; + if (S_ISREG(src->i_mode)) { + btrfs_set_stack_inode_size(dst, (u64)src->i_size_high << 32 | + (u64)src->i_size); + } + if (!S_ISREG(src->i_mode) && !S_ISDIR(src->i_mode) && + !S_ISLNK(src->i_mode)) { + if (src->i_block[0]) { + btrfs_set_stack_inode_rdev(dst, + old_decode_dev(src->i_block[0])); + } else { + btrfs_set_stack_inode_rdev(dst, + new_decode_dev(src->i_block[1])); + } + } + memset(&dst->reserved, 0, sizeof(dst->reserved)); } /* - * Remove reserved ranges from given cache_tree - * - * It will remove the following ranges - * 1) 0~1M - * 2) 2nd superblock, +64K (make sure chunks are 64K aligned) - * 3) 3rd superblock, +64K - * - * @min_stripe must be given for safety check - * and if @ensure_size is given, it will ensure affected cache_extent will be - * larger than min_stripe_size + * copy a single inode. do all the required works, such as cloning + * inode item, creating file extents and creating directory entries. */ -static int wipe_reserved_ranges(struct cache_tree *tree, u64 min_stripe_size, - int ensure_size) +static int ext2_copy_single_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, + ext2_filsys ext2_fs, ext2_ino_t ext2_ino, + struct ext2_inode *ext2_inode, + int datacsum, int packing, int noxattr) { int ret; + struct btrfs_inode_item btrfs_inode; - ret = wipe_one_reserved_range(tree, 0, 1024 * 1024, min_stripe_size, - ensure_size); - if (ret < 0) - return ret; - ret = wipe_one_reserved_range(tree, btrfs_sb_offset(1), - BTRFS_STRIPE_LEN, min_stripe_size, ensure_size); - if (ret < 0) + if (ext2_inode->i_links_count == 0) + return 0; + + ext2_copy_inode_item(&btrfs_inode, ext2_inode, ext2_fs->blocksize); + if (!datacsum && S_ISREG(ext2_inode->i_mode)) { + u32 flags = btrfs_stack_inode_flags(&btrfs_inode) | + BTRFS_INODE_NODATASUM; + btrfs_set_stack_inode_flags(&btrfs_inode, flags); + } + + switch (ext2_inode->i_mode & S_IFMT) { + case S_IFREG: + ret = ext2_create_file_extents(trans, root, objectid, + &btrfs_inode, ext2_fs, ext2_ino, datacsum, packing); + break; + case S_IFDIR: + ret = ext2_create_dir_entries(trans, root, objectid, + &btrfs_inode, ext2_fs, ext2_ino); + break; + case S_IFLNK: + ret = ext2_create_symbol_link(trans, root, objectid, + &btrfs_inode, ext2_fs, ext2_ino, ext2_inode); + break; + default: + ret = 0; + break; + } + if (ret) return ret; - ret = wipe_one_reserved_range(tree, btrfs_sb_offset(2), - BTRFS_STRIPE_LEN, min_stripe_size, ensure_size); - return ret; + + if (!noxattr) { + ret = ext2_copy_extended_attrs(trans, root, objectid, + &btrfs_inode, ext2_fs, ext2_ino); + if (ret) + return ret; + } + return btrfs_insert_inode(trans, root, objectid, &btrfs_inode); } -static int calculate_available_space(struct btrfs_convert_context *cctx) +/* + * scan ext2's inode bitmap and copy all used inodes. + */ +static int ext2_copy_inodes(struct btrfs_convert_context *cctx, + struct btrfs_root *root, + int datacsum, int packing, int noxattr, struct task_ctx *p) { - struct cache_tree *used = &cctx->used; - struct cache_tree *data_chunks = &cctx->data_chunks; - struct cache_tree *free = &cctx->free; - struct cache_extent *cache; - u64 cur_off = 0; - /* - * Twice the minimal chunk size, to allow later wipe_reserved_ranges() - * works without need to consider overlap - */ - u64 min_stripe_size = 2 * 16 * 1024 * 1024; + ext2_filsys ext2_fs = cctx->fs_data; int ret; + errcode_t err; + ext2_inode_scan ext2_scan; + struct ext2_inode ext2_inode; + ext2_ino_t ext2_ino; + u64 objectid; + struct btrfs_trans_handle *trans; - /* Calculate data_chunks */ - for (cache = first_cache_extent(used); cache; - cache = next_cache_extent(cache)) { - u64 cur_len; - - if (cache->start + cache->size < cur_off) - continue; - if (cache->start > cur_off + min_stripe_size) - cur_off = cache->start; - cur_len = max(cache->start + cache->size - cur_off, - min_stripe_size); - ret = add_merge_cache_extent(data_chunks, cur_off, cur_len); - if (ret < 0) - goto out; - cur_off += cur_len; + trans = btrfs_start_transaction(root, 1); + if (!trans) + return -ENOMEM; + err = ext2fs_open_inode_scan(ext2_fs, 0, &ext2_scan); + if (err) { + fprintf(stderr, "ext2fs_open_inode_scan: %s\n", error_message(err)); + return -1; } - /* - * remove reserved ranges, so we won't ever bother relocating an old - * filesystem extent to other place. - */ - ret = wipe_reserved_ranges(data_chunks, min_stripe_size, 1); - if (ret < 0) - goto out; - - cur_off = 0; - /* - * Calculate free space - * Always round up the start bytenr, to avoid metadata extent corss - * stripe boundary, as later mkfs_convert() won't have all the extent - * allocation check - */ - for (cache = first_cache_extent(data_chunks); cache; - cache = next_cache_extent(cache)) { - if (cache->start < cur_off) + while (!(err = ext2fs_get_next_inode(ext2_scan, &ext2_ino, + &ext2_inode))) { + /* no more inodes */ + if (ext2_ino == 0) + break; + /* skip special inode in ext2fs */ + if (ext2_ino < EXT2_GOOD_OLD_FIRST_INO && + ext2_ino != EXT2_ROOT_INO) continue; - if (cache->start > cur_off) { - u64 insert_start; - u64 len; - - len = cache->start - round_up(cur_off, - BTRFS_STRIPE_LEN); - insert_start = round_up(cur_off, BTRFS_STRIPE_LEN); - - ret = add_merge_cache_extent(free, insert_start, len); - if (ret < 0) - goto out; + objectid = ext2_ino + INO_OFFSET; + ret = ext2_copy_single_inode(trans, root, + objectid, ext2_fs, ext2_ino, + &ext2_inode, datacsum, packing, + noxattr); + p->cur_copy_inodes++; + if (ret) + return ret; + if (trans->blocks_used >= 4096) { + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); } - cur_off = cache->start + cache->size; } - /* Don't forget the last range */ - if (cctx->total_bytes > cur_off) { - u64 len = cctx->total_bytes - cur_off; - u64 insert_start; - - insert_start = round_up(cur_off, BTRFS_STRIPE_LEN); - - ret = add_merge_cache_extent(free, insert_start, len); - if (ret < 0) - goto out; + if (err) { + fprintf(stderr, "ext2fs_get_next_inode: %s\n", error_message(err)); + return -1; } + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + ext2fs_close_inode_scan(ext2_scan); - /* Remove reserved bytes */ - ret = wipe_reserved_ranges(free, min_stripe_size, 0); -out: return ret; } -/* - * Read used space, and since we have the used space, - * calcuate data_chunks and free for later mkfs - */ -static int convert_read_used_space(struct btrfs_convert_context *cctx) + +static const struct btrfs_convert_operations ext2_convert_ops = { + .name = "ext2", + .open_fs = ext2_open_fs, + .read_used_space = ext2_read_used_space, + .copy_inodes = ext2_copy_inodes, + .close_fs = ext2_close_fs, +}; + +#endif + +static const struct btrfs_convert_operations *convert_operations[] = { +#if BTRFSCONVERT_EXT2 + &ext2_convert_ops, +#endif +}; + +static int convert_open_fs(const char *devname, + struct btrfs_convert_context *cctx) { - int ret; + int i; - ret = cctx->convert_ops->read_used_space(cctx); - if (ret) - return ret; + memset(cctx, 0, sizeof(*cctx)); - ret = calculate_available_space(cctx); - return ret; + for (i = 0; i < ARRAY_SIZE(convert_operations); i++) { + int ret = convert_operations[i]->open_fs(cctx, devname); + + if (ret == 0) { + cctx->convert_ops = convert_operations[i]; + return ret; + } + } + + fprintf(stderr, "No file system found to convert.\n"); + return -1; } static int do_convert(const char *devname, int datacsum, int packing, @@ -2276,7 +2325,6 @@ { int ret; int fd = -1; - int is_btrfs = 0; u32 blocksize; u64 total_bytes; struct btrfs_root *root; @@ -2299,14 +2347,14 @@ blocksize = cctx.blocksize; total_bytes = (u64)blocksize * (u64)cctx.block_count; if (blocksize < 4096) { - fprintf(stderr, "block size is too small\n"); + error("block size is too small: %u < 4096", blocksize); goto fail; } if (btrfs_check_nodesize(nodesize, blocksize, features)) goto fail; fd = open(devname, O_RDWR); if (fd < 0) { - fprintf(stderr, "unable to open %s\n", devname); + error("unable to open %s: %s", devname, strerror(errno)); goto fail; } btrfs_parse_features_to_string(features_buf, features); @@ -2332,27 +2380,26 @@ ret = make_btrfs(fd, &mkfs_cfg, &cctx); if (ret) { - fprintf(stderr, "unable to create initial ctree: %s\n", - strerror(-ret)); + error("unable to create initial ctree: %s", strerror(-ret)); goto fail; } root = open_ctree_fd(fd, devname, mkfs_cfg.super_bytenr, - OPEN_CTREE_WRITES); + OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL); if (!root) { - fprintf(stderr, "unable to open ctree\n"); + error("unable to open ctree"); goto fail; } ret = init_btrfs(&mkfs_cfg, root, &cctx, datacsum, packing, noxattr); if (ret) { - fprintf(stderr, "unable to setup the root tree\n"); + error("unable to setup the root tree: %d", ret); goto fail; } - printf("creating %s image file.\n", cctx.convert_ops->name); + printf("creating %s image file\n", cctx.convert_ops->name); ret = asprintf(&subvol_name, "%s_saved", cctx.convert_ops->name); if (ret < 0) { - fprintf(stderr, "error allocating subvolume name: %s_saved\n", + error("memory allocation failure for subvolume name: %s_saved", cctx.convert_ops->name); goto fail; } @@ -2361,17 +2408,17 @@ key.type = BTRFS_ROOT_ITEM_KEY; image_root = btrfs_read_fs_root(root->fs_info, &key); if (!image_root) { - fprintf(stderr, "unable to create subvol\n"); + error("unable to create image subvolume"); goto fail; } ret = create_image(image_root, &mkfs_cfg, &cctx, fd, mkfs_cfg.num_bytes, "image", datacsum); if (ret) { - fprintf(stderr, "error during create_image %d\n", ret); + error("failed to create %s/image: %d", subvol_name, ret); goto fail; } - printf("creating btrfs metadata.\n"); + printf("creating btrfs metadata"); ctx.max_copy_inodes = (cctx.inodes_count - cctx.free_inodes_count); ctx.cur_copy_inodes = 0; @@ -2382,7 +2429,7 @@ } ret = copy_inodes(&cctx, root, datacsum, packing, noxattr, &ctx); if (ret) { - fprintf(stderr, "error during copy_inodes %d\n", ret); + error("error during copy_inodes %d", ret); goto fail; } if (progress) { @@ -2391,6 +2438,10 @@ } image_root = link_subvol(root, subvol_name, CONV_IMAGE_SUBVOL_OBJECTID); + if (!image_root) { + error("unable to link subvolume %s", subvol_name); + goto fail; + } free(subvol_name); @@ -2398,16 +2449,15 @@ if (copylabel == 1) { __strncpy_null(root->fs_info->super_copy->label, cctx.volume_name, BTRFS_LABEL_SIZE - 1); - fprintf(stderr, "copy label '%s'\n", - root->fs_info->super_copy->label); + printf("copy label '%s'\n", root->fs_info->super_copy->label); } else if (copylabel == -1) { strcpy(root->fs_info->super_copy->label, fslabel); - fprintf(stderr, "set label to '%s'\n", fslabel); + printf("set label to '%s'\n", fslabel); } ret = close_ctree(root); if (ret) { - fprintf(stderr, "error during close_ctree %d\n", ret); + error("close_ctree failed: %d", ret); goto fail; } convert_close_fs(&cctx); @@ -2419,29 +2469,28 @@ */ ret = migrate_super_block(fd, mkfs_cfg.super_bytenr, blocksize); if (ret) { - fprintf(stderr, "unable to migrate super block\n"); + error("unable to migrate super block: %d", ret); goto fail; } - is_btrfs = 1; - root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES); + root = open_ctree_fd(fd, devname, 0, + OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL); if (!root) { - fprintf(stderr, "unable to open ctree\n"); + error("unable to open ctree for finalization"); goto fail; } + root->fs_info->finalize_on_close = 1; + close_ctree(root); close(fd); - printf("conversion complete.\n"); + printf("conversion complete"); return 0; fail: clean_convert_context(&cctx); if (fd != -1) close(fd); - if (is_btrfs) - fprintf(stderr, - "WARNING: an error occurred during chunk mapping fixup, filesystem mountable but not finalized\n"); - else - fprintf(stderr, "conversion aborted\n"); + warning( +"an error occurred during conversion, filesystem is partially created but not finalized and not mountable"); return -1; } @@ -2606,24 +2655,24 @@ fd = open(devname, O_RDWR); if (fd < 0) { - fprintf(stderr, "unable to open %s\n", devname); + error("unable to open %s: %s", devname, strerror(errno)); goto fail; } root = open_ctree_fd(fd, devname, 0, OPEN_CTREE_WRITES); if (!root) { - fprintf(stderr, "unable to open ctree\n"); + error("unable to open ctree"); goto fail; } ret = may_rollback(root); if (ret < 0) { - fprintf(stderr, "unable to do rollback\n"); + error("unable to do rollback: %d", ret); goto fail; } sectorsize = root->sectorsize; buf = malloc(sectorsize); if (!buf) { - fprintf(stderr, "unable to allocate memory\n"); + error("unable to allocate memory"); goto fail; } @@ -2636,12 +2685,10 @@ 0); btrfs_release_path(&path); if (ret > 0) { - fprintf(stderr, - "ERROR: unable to convert ext2 image subvolume, is it deleted?\n"); + error("unable to convert ext2 image subvolume, is it deleted?"); goto fail; } else if (ret < 0) { - fprintf(stderr, - "ERROR: unable to open ext2_saved, id=%llu: %s\n", + error("unable to open ext2_saved, id %llu: %s", (unsigned long long)key.objectid, strerror(-ret)); goto fail; } @@ -2651,8 +2698,8 @@ key.offset = (u64)-1; image_root = btrfs_read_fs_root(root->fs_info, &key); if (!image_root || IS_ERR(image_root)) { - fprintf(stderr, "unable to open subvol %llu\n", - (unsigned long long)key.objectid); + error("unable to open subvolume %llu: %ld", + (unsigned long long)key.objectid, PTR_ERR(image_root)); goto fail; } @@ -2661,7 +2708,7 @@ dir = btrfs_lookup_dir_item(NULL, image_root, &path, root_dir, name, strlen(name), 0); if (!dir || IS_ERR(dir)) { - fprintf(stderr, "unable to find file %s\n", name); + error("unable to find file %s: %ld", name, PTR_ERR(dir)); goto fail; } leaf = path.nodes[0]; @@ -2672,7 +2719,7 @@ ret = btrfs_lookup_inode(NULL, image_root, &path, &key, 0); if (ret) { - fprintf(stderr, "unable to find inode item\n"); + error("unable to find inode item: %d", ret); goto fail; } leaf = path.nodes[0]; @@ -2685,7 +2732,7 @@ btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_search_slot(NULL, image_root, &key, &path, 0, 0); if (ret != 0) { - fprintf(stderr, "unable to find first file extent\n"); + error("unable to find first file extent: %d", ret); btrfs_release_path(&path); goto fail; } @@ -2748,8 +2795,10 @@ btrfs_release_path(&path); if (offset < total_bytes) { - fprintf(stderr, "unable to build extent mapping\n"); - fprintf(stderr, "converted filesystem after balance is unable to rollback\n"); + error("unable to build extent mapping (offset %llu, total_bytes %llu)", + (unsigned long long)offset, + (unsigned long long)total_bytes); + error("converted filesystem after balance is unable to rollback"); goto fail; } @@ -2757,7 +2806,7 @@ first_free &= ~((u64)sectorsize - 1); /* backup for extent #0 should exist */ if(!test_range_bit(&io_tree, 0, first_free - 1, EXTENT_LOCKED, 1)) { - fprintf(stderr, "no backup for the first extent\n"); + error("no backup for the first extent"); goto fail; } /* force no allocation from system block group */ @@ -2798,13 +2847,16 @@ } /* only extent #0 left in system block group? */ if (num_bytes > first_free) { - fprintf(stderr, "unable to empty system block group\n"); + error( + "unable to empty system block group (num_bytes %llu, first_free %llu", + (unsigned long long)num_bytes, + (unsigned long long)first_free); goto fail; } /* create a system chunk that maps the whole device */ ret = prepare_system_chunk_sb(root->fs_info->super_copy); if (ret) { - fprintf(stderr, "unable to update system chunk\n"); + error("unable to update system chunk: %d", ret); goto fail; } @@ -2813,7 +2865,7 @@ ret = close_ctree(root); if (ret) { - fprintf(stderr, "error during close_ctree %d\n", ret); + error("close_ctree failed: %d", ret); goto fail; } @@ -2825,9 +2877,8 @@ break; ret = pwrite(fd, buf, sectorsize, bytenr); if (ret != sectorsize) { - fprintf(stderr, - "error during zeroing superblock %d: %d\n", - i, ret); + error("zeroing superblock mirror %d failed: %d", + i, ret); goto fail; } } @@ -2853,13 +2904,15 @@ } ret = pread(fd, buf, sectorsize, bytenr); if (ret < 0) { - fprintf(stderr, "error during pread %d\n", ret); + error("reading superblock at %llu failed: %d", + (unsigned long long)bytenr, ret); goto fail; } BUG_ON(ret != sectorsize); ret = pwrite(fd, buf, sectorsize, start); if (ret < 0) { - fprintf(stderr, "error during pwrite %d\n", ret); + error("writing superblock at %llu failed: %d", + (unsigned long long)start, ret); goto fail; } BUG_ON(ret != sectorsize); @@ -2870,8 +2923,8 @@ } ret = fsync(fd); - if (ret) { - fprintf(stderr, "error during fsync %d\n", ret); + if (ret < 0) { + error("fsync failed: %s", strerror(errno)); goto fail; } /* @@ -2879,33 +2932,35 @@ */ ret = pread(fd, buf, sectorsize, sb_bytenr); if (ret < 0) { - fprintf(stderr, "error during pread %d\n", ret); + error("reading primary superblock failed: %s", + strerror(errno)); goto fail; } BUG_ON(ret != sectorsize); ret = pwrite(fd, buf, sectorsize, BTRFS_SUPER_INFO_OFFSET); if (ret < 0) { - fprintf(stderr, "error during pwrite %d\n", ret); + error("writing primary superblock failed: %s", + strerror(errno)); goto fail; } BUG_ON(ret != sectorsize); ret = fsync(fd); - if (ret) { - fprintf(stderr, "error during fsync %d\n", ret); + if (ret < 0) { + error("fsync failed: %s", strerror(errno)); goto fail; } close(fd); free(buf); extent_io_tree_cleanup(&io_tree); - printf("rollback complete.\n"); + printf("rollback complete\n"); return 0; fail: if (fd != -1) close(fd); free(buf); - fprintf(stderr, "rollback aborted.\n"); + error("rollback aborted"); return -1; } @@ -2923,6 +2978,9 @@ printf("\t-p|--progress show converting progress (default)\n"); printf("\t-O|--features LIST comma separated list of filesystem features\n"); printf("\t--no-progress show only overview, not the detailed progress\n"); + printf("\n"); + printf("Suported filesystems:\n"); + printf("\text2/3/4: %s\n", BTRFSCONVERT_EXT2 ? "yes" : "no"); } int main(int argc, char *argv[]) diff -Nru btrfs-progs-4.7/btrfstune.c btrfs-progs-4.7.1/btrfstune.c --- btrfs-progs-4.7/btrfstune.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/btrfstune.c 2016-08-25 17:33:48.000000000 +0000 @@ -389,7 +389,7 @@ int main(int argc, char *argv[]) { struct btrfs_root *root; - enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_WRITES; + unsigned ctree_flags = OPEN_CTREE_WRITES; int success = 0; int total = 0; int seeding_flag = 0; diff -Nru btrfs-progs-4.7/CHANGES btrfs-progs-4.7.1/CHANGES --- btrfs-progs-4.7/CHANGES 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/CHANGES 2016-08-25 17:33:48.000000000 +0000 @@ -1,3 +1,19 @@ +btrfs-progs-4.7.1 (2016-08-25) + * check: + * new optional mode: optimized for low memory usage (memory/io tradeoff) + * --mode=lowmem, not default, still considered experimental + * does not work with --repair yet + * convert: regression fix, ext2_subvol/image rw permissions + * mkfs/convert: + * two-staged creation, partially created filesystem will not be recognized + * improved error handling (fewer BUG_ONs) + * convert: preparation for more filesystems to convert from + * documentation updates: quota, qgroup + * other + * message updates + * more tests + * more build options, enhanced debugging + btrfs-progs-4.7 (2016-07-29) * convert: fix creating discontig extents * check: speed up traversing heavily reflinked extents within a file diff -Nru btrfs-progs-4.7/chunk-recover.c btrfs-progs-4.7.1/chunk-recover.c --- btrfs-progs-4.7/chunk-recover.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/chunk-recover.c 2016-08-25 17:33:48.000000000 +0000 @@ -1470,7 +1470,8 @@ disk_super = fs_info->super_copy; ret = btrfs_read_dev_super(fs_info->fs_devices->latest_bdev, - disk_super, fs_info->super_bytenr, 1); + disk_super, fs_info->super_bytenr, + SBREAD_RECOVER); if (ret) { fprintf(stderr, "No valid btrfs found\n"); goto out_devices; @@ -1531,7 +1532,8 @@ } sb = (struct btrfs_super_block*)buf; - ret = btrfs_read_dev_super(fd, sb, BTRFS_SUPER_INFO_OFFSET, 1); + ret = btrfs_read_dev_super(fd, sb, BTRFS_SUPER_INFO_OFFSET, + SBREAD_RECOVER); if (ret) { fprintf(stderr, "read super block error\n"); goto out_close_fd; @@ -1550,7 +1552,7 @@ goto out_close_fd; } - ret = btrfs_scan_fs_devices(fd, path, &fs_devices, 0, 1, 0); + ret = btrfs_scan_fs_devices(fd, path, &fs_devices, 0, SBREAD_RECOVER, 0); if (ret) goto out_close_fd; diff -Nru btrfs-progs-4.7/cmds-check.c btrfs-progs-4.7.1/cmds-check.c --- btrfs-progs-4.7/cmds-check.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/cmds-check.c 2016-08-25 17:33:48.000000000 +0000 @@ -74,6 +74,15 @@ static struct task_ctx ctx = { 0 }; static struct cache_tree *roots_info_cache = NULL; +enum btrfs_check_mode { + CHECK_MODE_ORIGINAL, + CHECK_MODE_LOWMEM, + CHECK_MODE_UNKNOWN, + CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL +}; + +static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT; + struct extent_backref { struct rb_node node; unsigned int is_data:1; @@ -434,6 +443,23 @@ struct cache_extent cache_extent; }; +/* + * Error bit for low memory mode check. + * + * Currently no caller cares about it yet. Just internal use for error + * classification. + */ +#define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */ +#define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */ +#define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */ +#define REFERENCER_MISSING (1 << 3) /* Referencer not found */ +#define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */ +#define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */ +#define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */ +#define UNKNOWN_TYPE (1 << 6) /* Unknown type */ +#define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */ +#define CHUNK_TYPE_MISMATCH (1 << 8) + static void *print_status_check(void *p) { struct task_ctx *priv = p; @@ -468,6 +494,18 @@ return 0; } +static enum btrfs_check_mode parse_check_mode(const char *str) +{ + if (strcmp(str, "lowmem") == 0) + return CHECK_MODE_LOWMEM; + if (strcmp(str, "orig") == 0) + return CHECK_MODE_ORIGINAL; + if (strcmp(str, "original") == 0) + return CHECK_MODE_ORIGINAL; + + return CHECK_MODE_UNKNOWN; +} + /* Compatible function to allow reuse of old codes */ static u64 first_extent_gap(struct rb_root *holes) { @@ -1953,8 +1991,14 @@ return ret; } +struct node_refs { + u64 bytenr[BTRFS_MAX_LEVEL]; + u64 refs[BTRFS_MAX_LEVEL]; +}; + static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, - struct walk_control *wc, int *level) + struct walk_control *wc, int *level, + struct node_refs *nrefs) { enum btrfs_tree_block_status status; u64 bytenr; @@ -1967,12 +2011,20 @@ WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = btrfs_lookup_extent_info(NULL, root, + + if (path->nodes[*level]->start == nrefs->bytenr[*level]) { + refs = nrefs->refs[*level]; + ret = 0; + } else { + ret = btrfs_lookup_extent_info(NULL, root, path->nodes[*level]->start, *level, 1, &refs, NULL); - if (ret < 0) { - err = ret; - goto out; + if (ret < 0) { + err = ret; + goto out; + } + nrefs->bytenr[*level] = path->nodes[*level]->start; + nrefs->refs[*level] = refs; } if (refs > 1) { @@ -2003,10 +2055,19 @@ bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = root->nodesize; - ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1, - 1, &refs, NULL); - if (ret < 0) - refs = 0; + + if (bytenr == nrefs->bytenr[*level - 1]) { + refs = nrefs->refs[*level - 1]; + } else { + ret = btrfs_lookup_extent_info(NULL, root, bytenr, + *level - 1, 1, &refs, NULL); + if (ret < 0) { + refs = 0; + } else { + nrefs->bytenr[*level - 1] = bytenr; + nrefs->refs[*level - 1] = refs; + } + } if (refs > 1) { ret = enter_shared_node(root, bytenr, refs, @@ -3619,6 +3680,7 @@ struct orphan_data_extent *orphan; struct orphan_data_extent *tmp; enum btrfs_tree_block_status status; + struct node_refs nrefs; /* * Reuse the corrupt_block cache tree to record corrupted tree block @@ -3640,6 +3702,7 @@ memset(&root_node, 0, sizeof(root_node)); cache_tree_init(&root_node.root_cache); cache_tree_init(&root_node.inode_cache); + memset(&nrefs, 0, sizeof(nrefs)); /* Move the orphan extent record to corresponding inode_record */ list_for_each_entry_safe(orphan, tmp, @@ -3689,7 +3752,7 @@ } while (1) { - wret = walk_down_tree(root, &path, wc, &level); + wret = walk_down_tree(root, &path, wc, &level, &nrefs); if (wret < 0) ret = wret; if (wret != 0) @@ -8514,166 +8577,1619 @@ goto again; } -static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int overwrite) +/* + * Check backrefs of a tree block given by @bytenr or @eb. + * + * @root: the root containing the @bytenr or @eb + * @eb: tree block extent buffer, can be NULL + * @bytenr: bytenr of the tree block to search + * @level: tree level of the tree block + * @owner: owner of the tree block + * + * Return >0 for any error found and output error message + * Return 0 for no error found + */ +static int check_tree_block_ref(struct btrfs_root *root, + struct extent_buffer *eb, u64 bytenr, + int level, u64 owner) { - struct extent_buffer *c; - struct extent_buffer *old = root->node; - int level; + struct btrfs_key key; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_path path; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct extent_buffer *leaf; + unsigned long end; + unsigned long ptr; + int slot; + int skinny_level; + int type; + u32 nodesize = root->nodesize; + u32 item_size; + u64 offset; + int found_ref = 0; + int err = 0; int ret; - struct btrfs_disk_key disk_key = {0,0,0}; - level = 0; + btrfs_init_path(&path); + key.objectid = bytenr; + if (btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; - if (overwrite) { - c = old; - extent_buffer_get(c); - goto init; + /* Search for the backref in extent tree */ + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) { + err |= BACKREF_MISSING; + goto out; } - c = btrfs_alloc_free_block(trans, root, - root->nodesize, - root->root_key.objectid, - &disk_key, level, 0, 0); - if (IS_ERR(c)) { - c = old; - extent_buffer_get(c); - overwrite = 1; + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) { + err |= BACKREF_MISSING; + goto out; } -init: - memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_level(c, level); - btrfs_set_header_bytenr(c, c->start); - btrfs_set_header_generation(c, trans->transid); - btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); - btrfs_set_header_owner(c, root->root_key.objectid); - write_extent_buffer(c, root->fs_info->fsid, - btrfs_header_fsid(), BTRFS_FSID_SIZE); + leaf = path.nodes[0]; + slot = path.slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); - write_extent_buffer(c, root->fs_info->chunk_tree_uuid, - btrfs_header_chunk_tree_uuid(c), - BTRFS_UUID_SIZE); + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + if (key.type == BTRFS_METADATA_ITEM_KEY) { + skinny_level = (int)key.offset; + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + skinny_level = btrfs_tree_block_level(leaf, info); + iref = (struct btrfs_extent_inline_ref *)(info + 1); + } + + if (eb) { + u64 header_gen; + u64 extent_gen; + + if (!(btrfs_extent_flags(leaf, ei) & + BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + error( + "extent[%llu %u] backref type mismatch, missing bit: %llx", + key.objectid, nodesize, + BTRFS_EXTENT_FLAG_TREE_BLOCK); + err = BACKREF_MISMATCH; + } + header_gen = btrfs_header_generation(eb); + extent_gen = btrfs_extent_generation(leaf, ei); + if (header_gen != extent_gen) { + error( + "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu", + key.objectid, nodesize, header_gen, + extent_gen); + err = BACKREF_MISMATCH; + } + if (level != skinny_level) { + error( + "extent[%llu %u] level mismatch, wanted: %u, have: %u", + key.objectid, nodesize, level, skinny_level); + err = BACKREF_MISMATCH; + } + if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) { + error( + "extent[%llu %u] is referred by other roots than %llu", + key.objectid, nodesize, root->objectid); + err = BACKREF_MISMATCH; + } + } - btrfs_mark_buffer_dirty(c); /* - * this case can happen in the following case: - * - * 1.overwrite previous root. - * - * 2.reinit reloc data root, this is because we skip pin - * down reloc data tree before which means we can allocate - * same block bytenr here. + * Iterate the extent/metadata item to find the exact backref */ - if (old->start == c->start) { - btrfs_set_root_generation(&root->root_item, - trans->transid); - root->root_item.level = btrfs_header_level(root->node); - ret = btrfs_update_root(trans, root->fs_info->tree_root, - &root->root_key, &root->root_item); - if (ret) { - free_extent_buffer(c); - return ret; + item_size = btrfs_item_size_nr(leaf, slot); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + offset = btrfs_extent_inline_ref_offset(leaf, iref); + + if (type == BTRFS_TREE_BLOCK_REF_KEY && + (offset == root->objectid || offset == owner)) { + found_ref = 1; + } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { + /* Check if the backref points to valid referencer */ + found_ref = !check_tree_block_ref(root, NULL, offset, + level + 1, owner); } + + if (found_ref) + break; + ptr += btrfs_extent_inline_ref_size(type); } - free_extent_buffer(old); - root->node = c; - add_root_to_dirty_list(root); - return 0; + + /* + * Inlined extent item doesn't have what we need, check + * TREE_BLOCK_REF_KEY + */ + if (!found_ref) { + btrfs_release_path(&path); + key.objectid = bytenr; + key.type = BTRFS_TREE_BLOCK_REF_KEY; + key.offset = root->objectid; + + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (!ret) + found_ref = 1; + } + if (!found_ref) + err |= BACKREF_MISSING; +out: + btrfs_release_path(&path); + if (eb && (err & BACKREF_MISSING)) + error("extent[%llu %u] backref lost (owner: %llu, level: %u)", + bytenr, nodesize, owner, level); + return err; } -static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int tree_root) +/* + * Check EXTENT_DATA item, mainly for its dbackref in extent tree + * + * Return >0 any error found and output error message + * Return 0 for no error found + */ +static int check_extent_data_item(struct btrfs_root *root, + struct extent_buffer *eb, int slot) { - struct extent_buffer *tmp; - struct btrfs_root_item *ri; - struct btrfs_key key; - u64 bytenr; - u32 nodesize; - int level = btrfs_header_level(eb); - int nritems; + struct btrfs_file_extent_item *fi; + struct btrfs_path path; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_key fi_key; + struct btrfs_key dbref_key; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + u64 owner; + u64 file_extent_gen; + u64 disk_bytenr; + u64 disk_num_bytes; + u64 extent_num_bytes; + u64 extent_flags; + u64 extent_gen; + u32 item_size; + unsigned long end; + unsigned long ptr; + int type; + u64 ref_root; + int found_dbackref = 0; + int err = 0; int ret; - int i; - /* - * If we have pinned this block before, don't pin it again. - * This can not only avoid forever loop with broken filesystem - * but also give us some speedups. - */ - if (test_range_bit(&fs_info->pinned_extents, eb->start, - eb->start + eb->len - 1, EXTENT_DIRTY, 0)) + btrfs_item_key_to_cpu(eb, &fi_key, slot); + fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + file_extent_gen = btrfs_file_extent_generation(eb, fi); + + /* Nothing to check for hole and inline data extents */ + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE || + btrfs_file_extent_disk_bytenr(eb, fi) == 0) return 0; - btrfs_pin_extent(fs_info, eb->start, eb->len); + disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi); + disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi); + + /* Check unaligned disk_num_bytes and num_bytes */ + if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) { + error( +"file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u", + fi_key.objectid, fi_key.offset, disk_num_bytes, + root->sectorsize); + err |= BYTES_UNALIGNED; + } else { + data_bytes_allocated += disk_num_bytes; + } + if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) { + error( +"file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u", + fi_key.objectid, fi_key.offset, extent_num_bytes, + root->sectorsize); + err |= BYTES_UNALIGNED; + } else { + data_bytes_referenced += extent_num_bytes; + } + owner = btrfs_header_owner(eb); - nodesize = btrfs_super_nodesize(fs_info->super_copy); - nritems = btrfs_header_nritems(eb); - for (i = 0; i < nritems; i++) { - if (level == 0) { - btrfs_item_key_to_cpu(eb, &key, i); - if (key.type != BTRFS_ROOT_ITEM_KEY) - continue; - /* Skip the extent root and reloc roots */ - if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID || - key.objectid == BTRFS_TREE_RELOC_OBJECTID || - key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) - continue; - ri = btrfs_item_ptr(eb, i, struct btrfs_root_item); - bytenr = btrfs_disk_root_bytenr(eb, ri); + /* Check the extent item of the file extent in extent tree */ + btrfs_init_path(&path); + dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); + dbref_key.type = BTRFS_EXTENT_ITEM_KEY; + dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi); - /* - * If at any point we start needing the real root we - * will have to build a stump root for the root we are - * in, but for now this doesn't actually use the root so - * just pass in extent_root. - */ - tmp = read_tree_block(fs_info->extent_root, bytenr, - nodesize, 0); - if (!extent_buffer_uptodate(tmp)) { - fprintf(stderr, "Error reading root block\n"); - return -EIO; - } - ret = pin_down_tree_blocks(fs_info, tmp, 0); - free_extent_buffer(tmp); - if (ret) - return ret; - } else { - bytenr = btrfs_node_blockptr(eb, i); + ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0); + if (ret) { + err |= BACKREF_MISSING; + goto error; + } - /* If we aren't the tree root don't read the block */ - if (level == 1 && !tree_root) { - btrfs_pin_extent(fs_info, bytenr, nodesize); - continue; - } + leaf = path.nodes[0]; + slot = path.slots[0]; + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); - tmp = read_tree_block(fs_info->extent_root, bytenr, - nodesize, 0); - if (!extent_buffer_uptodate(tmp)) { - fprintf(stderr, "Error reading tree block\n"); - return -EIO; - } - ret = pin_down_tree_blocks(fs_info, tmp, tree_root); - free_extent_buffer(tmp); - if (ret) - return ret; + extent_flags = btrfs_extent_flags(leaf, ei); + extent_gen = btrfs_extent_generation(leaf, ei); + + if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) { + error( + "extent[%llu %llu] backref type mismatch, wanted bit: %llx", + disk_bytenr, disk_num_bytes, + BTRFS_EXTENT_FLAG_DATA); + err |= BACKREF_MISMATCH; + } + + if (file_extent_gen < extent_gen) { + error( +"extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu", + disk_bytenr, disk_num_bytes, file_extent_gen, + extent_gen); + err |= BACKREF_MISMATCH; + } + + /* Check data backref inside that extent item */ + item_size = btrfs_item_size_nr(leaf, path.slots[0]); + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + + if (type == BTRFS_EXTENT_DATA_REF_KEY) { + ref_root = btrfs_extent_data_ref_root(leaf, dref); + if (ref_root == owner || ref_root == root->objectid) + found_dbackref = 1; + } else if (type == BTRFS_SHARED_DATA_REF_KEY) { + found_dbackref = !check_tree_block_ref(root, NULL, + btrfs_extent_inline_ref_offset(leaf, iref), + 0, owner); } + + if (found_dbackref) + break; + ptr += btrfs_extent_inline_ref_size(type); } - return 0; -} + /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */ + if (!found_dbackref) { + btrfs_release_path(&path); -static int pin_metadata_blocks(struct btrfs_fs_info *fs_info) -{ - int ret; + btrfs_init_path(&path); + dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); + dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY; + dbref_key.offset = hash_extent_data_ref(root->objectid, + fi_key.objectid, fi_key.offset); - ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0); - if (ret) - return ret; + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &dbref_key, &path, 0, 0); + if (!ret) + found_dbackref = 1; + } - return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1); + if (!found_dbackref) + err |= BACKREF_MISSING; +error: + btrfs_release_path(&path); + if (err & BACKREF_MISSING) { + error("data extent[%llu %llu] backref lost", + disk_bytenr, disk_num_bytes); + } + return err; } -static int reset_block_groups(struct btrfs_fs_info *fs_info) -{ +/* + * Get real tree block level for the case like shared block + * Return >= 0 as tree level + * Return <0 for error + */ +static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr) +{ + struct extent_buffer *eb; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_extent_item *ei; + u64 flags; + u64 transid; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u8 backref_level; + u8 header_level; + int ret; + + /* Search extent tree for extent generation and level */ + key.objectid = bytenr; + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0); + if (ret < 0) + goto release_out; + ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr); + if (ret < 0) + goto release_out; + if (ret > 0) { + ret = -ENOENT; + goto release_out; + } + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + ei = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path.nodes[0], ei); + if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + ret = -ENOENT; + goto release_out; + } + + /* Get transid for later read_tree_block() check */ + transid = btrfs_extent_generation(path.nodes[0], ei); + + /* Get backref level as one source */ + if (key.type == BTRFS_METADATA_ITEM_KEY) { + backref_level = key.offset; + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + backref_level = btrfs_tree_block_level(path.nodes[0], info); + } + btrfs_release_path(&path); + + /* Get level from tree block as an alternative source */ + eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid); + if (!extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } + header_level = btrfs_header_level(eb); + free_extent_buffer(eb); + + if (header_level != backref_level) + return -EIO; + return header_level; + +release_out: + btrfs_release_path(&path); + return ret; +} + +/* + * Check if a tree block backref is valid (points to a valid tree block) + * if level == -1, level will be resolved + * Return >0 for any error found and print error message + */ +static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id, + u64 bytenr, int level) +{ + struct btrfs_root *root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *eb; + struct extent_buffer *node; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + int err = 0; + int ret; + + /* Query level for level == -1 special case */ + if (level == -1) + level = query_tree_block_level(fs_info, bytenr); + if (level < 0) { + err |= REFERENCER_MISSING; + goto out; + } + + key.objectid = root_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) { + err |= REFERENCER_MISSING; + goto out; + } + + /* Read out the tree block to get item/node key */ + eb = read_tree_block(root, bytenr, root->nodesize, 0); + if (!extent_buffer_uptodate(eb)) { + err |= REFERENCER_MISSING; + free_extent_buffer(eb); + goto out; + } + + /* Empty tree, no need to check key */ + if (!btrfs_header_nritems(eb) && !level) { + free_extent_buffer(eb); + goto out; + } + + if (level) + btrfs_node_key_to_cpu(eb, &key, 0); + else + btrfs_item_key_to_cpu(eb, &key, 0); + + free_extent_buffer(eb); + + btrfs_init_path(&path); + /* Search with the first key, to ensure we can reach it */ + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret) { + err |= REFERENCER_MISSING; + goto release_out; + } + + node = path.nodes[level]; + if (btrfs_header_bytenr(node) != bytenr) { + error( + "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu", + bytenr, nodesize, bytenr, + btrfs_header_bytenr(node)); + err |= REFERENCER_MISMATCH; + } + if (btrfs_header_level(node) != level) { + error( + "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d", + bytenr, nodesize, level, + btrfs_header_level(node)); + err |= REFERENCER_MISMATCH; + } + +release_out: + btrfs_release_path(&path); +out: + if (err & REFERENCER_MISSING) { + if (level < 0) + error("extent [%llu %d] lost referencer (owner: %llu)", + bytenr, nodesize, root_id); + else + error( + "extent [%llu %d] lost referencer (owner: %llu, level: %u)", + bytenr, nodesize, root_id, level); + } + + return err; +} + +/* + * Check referencer for shared block backref + * If level == -1, this function will resolve the level. + */ +static int check_shared_block_backref(struct btrfs_fs_info *fs_info, + u64 parent, u64 bytenr, int level) +{ + struct extent_buffer *eb; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 nr; + int found_parent = 0; + int i; + + eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0); + if (!extent_buffer_uptodate(eb)) + goto out; + + if (level == -1) + level = query_tree_block_level(fs_info, bytenr); + if (level < 0) + goto out; + + if (level + 1 != btrfs_header_level(eb)) + goto out; + + nr = btrfs_header_nritems(eb); + for (i = 0; i < nr; i++) { + if (bytenr == btrfs_node_blockptr(eb, i)) { + found_parent = 1; + break; + } + } +out: + free_extent_buffer(eb); + if (!found_parent) { + error( + "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)", + bytenr, nodesize, parent, level); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * Check referencer for normal (inlined) data ref + * If len == 0, it will be resolved by searching in extent tree + */ +static int check_extent_data_backref(struct btrfs_fs_info *fs_info, + u64 root_id, u64 objectid, u64 offset, + u64 bytenr, u64 len, u32 count) +{ + struct btrfs_root *root; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + u32 found_count = 0; + int slot; + int ret = 0; + + if (!len) { + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) + goto out; + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) + goto out; + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.objectid != bytenr || + key.type != BTRFS_EXTENT_ITEM_KEY) + goto out; + len = key.offset; + btrfs_release_path(&path); + } + key.objectid = root_id; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.offset = (u64)-1; + btrfs_init_path(&path); + + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) + goto out; + + key.objectid = objectid; + key.type = BTRFS_EXTENT_DATA_KEY; + /* + * It can be nasty as data backref offset is + * file offset - file extent offset, which is smaller or + * equal to original backref offset. The only special case is + * overflow. So we need to special check and do further search. + */ + key.offset = offset & (1ULL << 63) ? 0 : offset; + + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto out; + + /* + * Search afterwards to get correct one + * NOTE: As we must do a comprehensive check on the data backref to + * make sure the dref count also matches, we must iterate all file + * extents for that inode. + */ + while (1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) + break; + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + /* + * Except normal disk bytenr and disk num bytes, we still + * need to do extra check on dbackref offset as + * dbackref offset = file_offset - file_extent_offset + */ + if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr && + btrfs_file_extent_disk_num_bytes(leaf, fi) == len && + (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) == + offset) + found_count++; + + ret = btrfs_next_item(root, &path); + if (ret) + break; + } +out: + btrfs_release_path(&path); + if (found_count != count) { + error( +"extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u", + bytenr, len, root_id, objectid, offset, count, found_count); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * Check if the referencer of a shared data backref exists + */ +static int check_shared_data_backref(struct btrfs_fs_info *fs_info, + u64 parent, u64 bytenr) +{ + struct extent_buffer *eb; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 nr; + int found_parent = 0; + int i; + + eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0); + if (!extent_buffer_uptodate(eb)) + goto out; + + nr = btrfs_header_nritems(eb); + for (i = 0; i < nr; i++) { + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + + fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE) + continue; + + if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) { + found_parent = 1; + break; + } + } + +out: + free_extent_buffer(eb); + if (!found_parent) { + error("shared extent %llu referencer lost (parent: %llu)", + bytenr, parent); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * This function will check a given extent item, including its backref and + * itself (like crossing stripe boundary and type) + * + * Since we don't use extent_record anymore, introduce new error bit + */ +static int check_extent_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + unsigned long end; + unsigned long ptr; + int type; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 item_size = btrfs_item_size_nr(eb, slot); + u64 flags; + u64 offset; + int metadata = 0; + int level; + struct btrfs_key key; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.type == BTRFS_EXTENT_ITEM_KEY) + bytes_used += key.offset; + else + bytes_used += nodesize; + + if (item_size < sizeof(*ei)) { + /* + * COMPAT_EXTENT_TREE_V0 case, but it's already a super + * old thing when on disk format is still un-determined. + * No need to care about it anymore + */ + error("unsupported COMPAT_EXTENT_TREE_V0 detected"); + return -ENOTTY; + } + + ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); + flags = btrfs_extent_flags(eb, ei); + + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) + metadata = 1; + if (metadata && check_crossing_stripes(key.objectid, eb->len)) { + error("bad metadata [%llu, %llu) crossing stripe boundary", + key.objectid, key.objectid + nodesize); + err |= CROSSING_STRIPE_BOUNDARY; + } + + ptr = (unsigned long)(ei + 1); + + if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) { + /* Old EXTENT_ITEM metadata */ + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)ptr; + level = btrfs_tree_block_level(eb, info); + ptr += sizeof(struct btrfs_tree_block_info); + } else { + /* New METADATA_ITEM */ + level = key.offset; + } + end = (unsigned long)ei + item_size; + + if (ptr >= end) { + err |= ITEM_SIZE_MISMATCH; + goto out; + } + + /* Now check every backref in this extent item */ +next: + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(eb, iref); + offset = btrfs_extent_inline_ref_offset(eb, iref); + switch (type) { + case BTRFS_TREE_BLOCK_REF_KEY: + ret = check_tree_block_backref(fs_info, offset, key.objectid, + level); + err |= ret; + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = check_shared_block_backref(fs_info, offset, key.objectid, + level); + err |= ret; + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + ret = check_extent_data_backref(fs_info, + btrfs_extent_data_ref_root(eb, dref), + btrfs_extent_data_ref_objectid(eb, dref), + btrfs_extent_data_ref_offset(eb, dref), + key.objectid, key.offset, + btrfs_extent_data_ref_count(eb, dref)); + err |= ret; + break; + case BTRFS_SHARED_DATA_REF_KEY: + ret = check_shared_data_backref(fs_info, offset, key.objectid); + err |= ret; + break; + default: + error("extent[%llu %d %llu] has unknown ref type: %d", + key.objectid, key.type, key.offset, type); + err |= UNKNOWN_TYPE; + goto out; + } + + ptr += btrfs_extent_inline_ref_size(type); + if (ptr < end) + goto next; + +out: + return err; +} + +/* + * Check if a dev extent item is referred correctly by its chunk + */ +static int check_dev_extent_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_dev_extent *ptr; + struct btrfs_path path; + struct btrfs_key chunk_key; + struct btrfs_key devext_key; + struct btrfs_chunk *chunk; + struct extent_buffer *l; + int num_stripes; + u64 length; + int i; + int found_chunk = 0; + int ret; + + btrfs_item_key_to_cpu(eb, &devext_key, slot); + ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent); + length = btrfs_dev_extent_length(eb, ptr); + + chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr); + chunk_key.type = BTRFS_CHUNK_ITEM_KEY; + chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr); + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); + if (ret) + goto out; + + l = path.nodes[0]; + chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk); + if (btrfs_chunk_length(l, chunk) != length) + goto out; + + num_stripes = btrfs_chunk_num_stripes(l, chunk); + for (i = 0; i < num_stripes; i++) { + u64 devid = btrfs_stripe_devid_nr(l, chunk, i); + u64 offset = btrfs_stripe_offset_nr(l, chunk, i); + + if (devid == devext_key.objectid && + offset == devext_key.offset) { + found_chunk = 1; + break; + } + } +out: + btrfs_release_path(&path); + if (!found_chunk) { + error( + "device extent[%llu, %llu, %llu] did not find the related chunk", + devext_key.objectid, devext_key.offset, length); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * Check if the used space is correct with the dev item + */ +static int check_dev_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *dev_root = fs_info->dev_root; + struct btrfs_dev_item *dev_item; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_dev_extent *ptr; + u64 dev_id; + u64 used; + u64 total = 0; + int ret; + + dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item); + dev_id = btrfs_device_id(eb, dev_item); + used = btrfs_device_bytes_used(eb, dev_item); + + key.objectid = dev_id; + key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = 0; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0); + if (ret < 0) { + btrfs_item_key_to_cpu(eb, &key, slot); + error("cannot find any related dev extent for dev[%llu, %u, %llu]", + key.objectid, key.type, key.offset); + btrfs_release_path(&path); + return REFERENCER_MISSING; + } + + /* Iterate dev_extents to calculate the used space of a device */ + while (1) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + + if (key.objectid > dev_id) + break; + if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id) + goto next; + + ptr = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_dev_extent); + total += btrfs_dev_extent_length(path.nodes[0], ptr); +next: + ret = btrfs_next_item(dev_root, &path); + if (ret) + break; + } + btrfs_release_path(&path); + + if (used != total) { + btrfs_item_key_to_cpu(eb, &key, slot); + error( +"Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]", + total, used, BTRFS_ROOT_TREE_OBJECTID, + BTRFS_DEV_EXTENT_KEY, dev_id); + return ACCOUNTING_MISMATCH; + } + return 0; +} + +/* + * Check a block group item with its referener (chunk) and its used space + * with extent/metadata item + */ +static int check_block_group_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_item bg_item; + struct btrfs_path path; + struct btrfs_key bg_key; + struct btrfs_key chunk_key; + struct btrfs_key extent_key; + struct btrfs_chunk *chunk; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u64 flags; + u64 bg_flags; + u64 used; + u64 total = 0; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &bg_key, slot); + bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item); + read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item)); + used = btrfs_block_group_used(&bg_item); + bg_flags = btrfs_block_group_flags(&bg_item); + + chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + chunk_key.type = BTRFS_CHUNK_ITEM_KEY; + chunk_key.offset = bg_key.objectid; + + btrfs_init_path(&path); + /* Search for the referencer chunk */ + ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); + if (ret) { + error( + "block group[%llu %llu] did not find the related chunk item", + bg_key.objectid, bg_key.offset); + err |= REFERENCER_MISSING; + } else { + chunk = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_chunk); + if (btrfs_chunk_length(path.nodes[0], chunk) != + bg_key.offset) { + error( + "block group[%llu %llu] related chunk item length does not match", + bg_key.objectid, bg_key.offset); + err |= REFERENCER_MISMATCH; + } + } + btrfs_release_path(&path); + + /* Search from the block group bytenr */ + extent_key.objectid = bg_key.objectid; + extent_key.type = 0; + extent_key.offset = 0; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0); + if (ret < 0) + goto out; + + /* Iterate extent tree to account used space */ + while (1) { + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]); + if (extent_key.objectid >= bg_key.objectid + bg_key.offset) + break; + + if (extent_key.type != BTRFS_METADATA_ITEM_KEY && + extent_key.type != BTRFS_EXTENT_ITEM_KEY) + goto next; + if (extent_key.objectid < bg_key.objectid) + goto next; + + if (extent_key.type == BTRFS_METADATA_ITEM_KEY) + total += nodesize; + else + total += extent_key.offset; + + ei = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(leaf, ei); + if (flags & BTRFS_EXTENT_FLAG_DATA) { + if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) { + error( + "bad extent[%llu, %llu) type mismatch with chunk", + extent_key.objectid, + extent_key.objectid + extent_key.offset); + err |= CHUNK_TYPE_MISMATCH; + } + } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA))) { + error( + "bad extent[%llu, %llu) type mismatch with chunk", + extent_key.objectid, + extent_key.objectid + nodesize); + err |= CHUNK_TYPE_MISMATCH; + } + } +next: + ret = btrfs_next_item(extent_root, &path); + if (ret) + break; + } + +out: + btrfs_release_path(&path); + + if (total != used) { + error( + "block group[%llu %llu] used %llu but extent items used %llu", + bg_key.objectid, bg_key.offset, used, total); + err |= ACCOUNTING_MISMATCH; + } + return err; +} + +/* + * Check a chunk item. + * Including checking all referred dev_extents and block group + */ +static int check_chunk_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *dev_root = fs_info->dev_root; + struct btrfs_path path; + struct btrfs_key chunk_key; + struct btrfs_key bg_key; + struct btrfs_key devext_key; + struct btrfs_chunk *chunk; + struct extent_buffer *leaf; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_item bg_item; + struct btrfs_dev_extent *ptr; + u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy); + u64 length; + u64 chunk_end; + u64 type; + u64 profile; + int num_stripes; + u64 offset; + u64 objectid; + int i; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &chunk_key, slot); + chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); + length = btrfs_chunk_length(eb, chunk); + chunk_end = chunk_key.offset + length; + if (!IS_ALIGNED(length, sectorsize)) { + error("chunk[%llu %llu) not aligned to %u", + chunk_key.offset, chunk_end, sectorsize); + err |= BYTES_UNALIGNED; + goto out; + } + + type = btrfs_chunk_type(eb, chunk); + profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK; + if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + error("chunk[%llu %llu) has no chunk type", + chunk_key.offset, chunk_end); + err |= UNKNOWN_TYPE; + } + if (profile && (profile & (profile - 1))) { + error("chunk[%llu %llu) multiple profiles detected: %llx", + chunk_key.offset, chunk_end, profile); + err |= UNKNOWN_TYPE; + } + + bg_key.objectid = chunk_key.offset; + bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + bg_key.offset = length; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0); + if (ret) { + error( + "chunk[%llu %llu) did not find the related block group item", + chunk_key.offset, chunk_end); + err |= REFERENCER_MISSING; + } else{ + leaf = path.nodes[0]; + bi = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_block_group_item); + read_extent_buffer(leaf, &bg_item, (unsigned long)bi, + sizeof(bg_item)); + if (btrfs_block_group_flags(&bg_item) != type) { + error( +"chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu", + chunk_key.offset, chunk_end, type, + btrfs_block_group_flags(&bg_item)); + err |= REFERENCER_MISSING; + } + } + + num_stripes = btrfs_chunk_num_stripes(eb, chunk); + for (i = 0; i < num_stripes; i++) { + btrfs_release_path(&path); + btrfs_init_path(&path); + devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i); + devext_key.type = BTRFS_DEV_EXTENT_KEY; + devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i); + + ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path, + 0, 0); + if (ret) + goto not_match_dev; + + leaf = path.nodes[0]; + ptr = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_dev_extent); + objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr); + offset = btrfs_dev_extent_chunk_offset(leaf, ptr); + if (objectid != chunk_key.objectid || + offset != chunk_key.offset || + btrfs_dev_extent_length(leaf, ptr) != length) + goto not_match_dev; + continue; +not_match_dev: + err |= BACKREF_MISSING; + error( + "chunk[%llu %llu) stripe %d did not find the related dev extent", + chunk_key.objectid, chunk_end, i); + continue; + } + btrfs_release_path(&path); +out: + return err; +} + +/* + * Main entry function to check known items and update related accounting info + */ +static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_key key; + int slot = 0; + int type; + struct btrfs_extent_data_ref *dref; + int ret; + int err = 0; + +next: + btrfs_item_key_to_cpu(eb, &key, slot); + type = btrfs_key_type(&key); + + switch (type) { + case BTRFS_EXTENT_DATA_KEY: + ret = check_extent_data_item(root, eb, slot); + err |= ret; + break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + ret = check_block_group_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_DEV_ITEM_KEY: + ret = check_dev_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_CHUNK_ITEM_KEY: + ret = check_chunk_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_DEV_EXTENT_KEY: + ret = check_dev_extent_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_EXTENT_ITEM_KEY: + case BTRFS_METADATA_ITEM_KEY: + ret = check_extent_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_EXTENT_CSUM_KEY: + total_csum_bytes += btrfs_item_size_nr(eb, slot); + break; + case BTRFS_TREE_BLOCK_REF_KEY: + ret = check_tree_block_backref(fs_info, key.offset, + key.objectid, -1); + err |= ret; + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref); + ret = check_extent_data_backref(fs_info, + btrfs_extent_data_ref_root(eb, dref), + btrfs_extent_data_ref_objectid(eb, dref), + btrfs_extent_data_ref_offset(eb, dref), + key.objectid, 0, + btrfs_extent_data_ref_count(eb, dref)); + err |= ret; + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = check_shared_block_backref(fs_info, key.offset, + key.objectid, -1); + err |= ret; + break; + case BTRFS_SHARED_DATA_REF_KEY: + ret = check_shared_data_backref(fs_info, key.offset, + key.objectid); + err |= ret; + break; + default: + break; + } + + if (++slot < btrfs_header_nritems(eb)) + goto next; + + return err; +} + +/* + * Helper function for later fs/subvol tree check. To determine if a tree + * block should be checked. + * This function will ensure only the direct referencer with lowest rootid to + * check a fs/subvolume tree block. + * + * Backref check at extent tree would detect errors like missing subvolume + * tree, so we can do aggressive check to reduce duplicated checks. + */ +static int should_check(struct btrfs_root *root, struct extent_buffer *eb) +{ + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *leaf; + int slot; + struct btrfs_extent_item *ei; + unsigned long ptr; + unsigned long end; + int type; + u32 item_size; + u64 offset; + struct btrfs_extent_inline_ref *iref; + int ret; + + btrfs_init_path(&path); + key.objectid = btrfs_header_bytenr(eb); + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + + /* + * Any failure in backref resolving means we can't determine + * whom the tree block belongs to. + * So in that case, we need to check that tree block + */ + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) + goto need_check; + + ret = btrfs_previous_extent_item(extent_root, &path, + btrfs_header_bytenr(eb)); + if (ret) + goto need_check; + + leaf = path.nodes[0]; + slot = path.slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + if (key.type == BTRFS_METADATA_ITEM_KEY) { + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + iref = (struct btrfs_extent_inline_ref *)(info + 1); + } + + item_size = btrfs_item_size_nr(leaf, slot); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + offset = btrfs_extent_inline_ref_offset(leaf, iref); + + /* + * We only check the tree block if current root is + * the lowest referencer of it. + */ + if (type == BTRFS_TREE_BLOCK_REF_KEY && + offset < root->objectid) { + btrfs_release_path(&path); + return 0; + } + + ptr += btrfs_extent_inline_ref_size(type); + } + /* + * Normally we should also check keyed tree block ref, but that may be + * very time consuming. Inlined ref should already make us skip a lot + * of refs now. So skip search keyed tree block ref. + */ + +need_check: + btrfs_release_path(&path); + return 1; +} + +/* + * Traversal function for tree block. We will do: + * 1) Skip shared fs/subvolume tree blocks + * 2) Update related bytes accounting + * 3) Pre-order traversal + */ +static int traverse_tree_block(struct btrfs_root *root, + struct extent_buffer *node) +{ + struct extent_buffer *eb; + int level; + u64 nr; + int i; + int err = 0; + int ret; + + /* + * Skip shared fs/subvolume tree block, in that case they will + * be checked by referencer with lowest rootid + */ + if (is_fstree(root->objectid) && !should_check(root, node)) + return 0; + + /* Update bytes accounting */ + total_btree_bytes += node->len; + if (fs_root_objectid(btrfs_header_owner(node))) + total_fs_tree_bytes += node->len; + if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID) + total_extent_tree_bytes += node->len; + if (!found_old_backref && + btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID && + btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV && + !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC)) + found_old_backref = 1; + + /* pre-order tranversal, check itself first */ + level = btrfs_header_level(node); + ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node), + btrfs_header_level(node), + btrfs_header_owner(node)); + err |= ret; + if (err) + error( + "check %s failed root %llu bytenr %llu level %d, force continue check", + level ? "node":"leaf", root->objectid, + btrfs_header_bytenr(node), btrfs_header_level(node)); + + if (!level) { + btree_space_waste += btrfs_leaf_free_space(root, node); + ret = check_leaf_items(root, node); + err |= ret; + return err; + } + + nr = btrfs_header_nritems(node); + btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) * + sizeof(struct btrfs_key_ptr); + + /* Then check all its children */ + for (i = 0; i < nr; i++) { + u64 blocknr = btrfs_node_blockptr(node, i); + + /* + * As a btrfs tree has most 8 levels (0..7), so it's quite safe + * to call the function itself. + */ + eb = read_tree_block(root, blocknr, root->nodesize, 0); + if (extent_buffer_uptodate(eb)) { + ret = traverse_tree_block(root, eb); + err |= ret; + } + free_extent_buffer(eb); + } + + return err; +} + +/* + * Low memory usage version check_chunks_and_extents. + */ +static int check_chunks_and_extents_v2(struct btrfs_root *root) +{ + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_root *root1; + struct btrfs_root *cur_root; + int err = 0; + int ret; + + root1 = root->fs_info->chunk_root; + ret = traverse_tree_block(root1, root1->node); + err |= ret; + + root1 = root->fs_info->tree_root; + ret = traverse_tree_block(root1, root1->node); + err |= ret; + + btrfs_init_path(&path); + key.objectid = BTRFS_EXTENT_TREE_OBJECTID; + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + + ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0); + if (ret) { + error("cannot find extent treet in tree_root"); + goto out; + } + + while (1) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.type != BTRFS_ROOT_ITEM_KEY) + goto next; + key.offset = (u64)-1; + + cur_root = btrfs_read_fs_root(root->fs_info, &key); + if (IS_ERR(cur_root) || !cur_root) { + error("failed to read tree: %lld", key.objectid); + goto next; + } + + ret = traverse_tree_block(cur_root, cur_root->node); + err |= ret; + +next: + ret = btrfs_next_item(root1, &path); + if (ret) + goto out; + } + +out: + btrfs_release_path(&path); + return err; +} + +static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int overwrite) +{ + struct extent_buffer *c; + struct extent_buffer *old = root->node; + int level; + int ret; + struct btrfs_disk_key disk_key = {0,0,0}; + + level = 0; + + if (overwrite) { + c = old; + extent_buffer_get(c); + goto init; + } + c = btrfs_alloc_free_block(trans, root, + root->nodesize, + root->root_key.objectid, + &disk_key, level, 0, 0); + if (IS_ERR(c)) { + c = old; + extent_buffer_get(c); + overwrite = 1; + } +init: + memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_level(c, level); + btrfs_set_header_bytenr(c, c->start); + btrfs_set_header_generation(c, trans->transid); + btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(c, root->root_key.objectid); + + write_extent_buffer(c, root->fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE); + + write_extent_buffer(c, root->fs_info->chunk_tree_uuid, + btrfs_header_chunk_tree_uuid(c), + BTRFS_UUID_SIZE); + + btrfs_mark_buffer_dirty(c); + /* + * this case can happen in the following case: + * + * 1.overwrite previous root. + * + * 2.reinit reloc data root, this is because we skip pin + * down reloc data tree before which means we can allocate + * same block bytenr here. + */ + if (old->start == c->start) { + btrfs_set_root_generation(&root->root_item, + trans->transid); + root->root_item.level = btrfs_header_level(root->node); + ret = btrfs_update_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); + if (ret) { + free_extent_buffer(c); + return ret; + } + } + free_extent_buffer(old); + root->node = c; + add_root_to_dirty_list(root); + return 0; +} + +static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int tree_root) +{ + struct extent_buffer *tmp; + struct btrfs_root_item *ri; + struct btrfs_key key; + u64 bytenr; + u32 nodesize; + int level = btrfs_header_level(eb); + int nritems; + int ret; + int i; + + /* + * If we have pinned this block before, don't pin it again. + * This can not only avoid forever loop with broken filesystem + * but also give us some speedups. + */ + if (test_range_bit(&fs_info->pinned_extents, eb->start, + eb->start + eb->len - 1, EXTENT_DIRTY, 0)) + return 0; + + btrfs_pin_extent(fs_info, eb->start, eb->len); + + nodesize = btrfs_super_nodesize(fs_info->super_copy); + nritems = btrfs_header_nritems(eb); + for (i = 0; i < nritems; i++) { + if (level == 0) { + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_ROOT_ITEM_KEY) + continue; + /* Skip the extent root and reloc roots */ + if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID || + key.objectid == BTRFS_TREE_RELOC_OBJECTID || + key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + continue; + ri = btrfs_item_ptr(eb, i, struct btrfs_root_item); + bytenr = btrfs_disk_root_bytenr(eb, ri); + + /* + * If at any point we start needing the real root we + * will have to build a stump root for the root we are + * in, but for now this doesn't actually use the root so + * just pass in extent_root. + */ + tmp = read_tree_block(fs_info->extent_root, bytenr, + nodesize, 0); + if (!extent_buffer_uptodate(tmp)) { + fprintf(stderr, "Error reading root block\n"); + return -EIO; + } + ret = pin_down_tree_blocks(fs_info, tmp, 0); + free_extent_buffer(tmp); + if (ret) + return ret; + } else { + bytenr = btrfs_node_blockptr(eb, i); + + /* If we aren't the tree root don't read the block */ + if (level == 1 && !tree_root) { + btrfs_pin_extent(fs_info, bytenr, nodesize); + continue; + } + + tmp = read_tree_block(fs_info->extent_root, bytenr, + nodesize, 0); + if (!extent_buffer_uptodate(tmp)) { + fprintf(stderr, "Error reading tree block\n"); + return -EIO; + } + ret = pin_down_tree_blocks(fs_info, tmp, tree_root); + free_extent_buffer(tmp); + if (ret) + return ret; + } + } + + return 0; +} + +static int pin_metadata_blocks(struct btrfs_fs_info *fs_info) +{ + int ret; + + ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0); + if (ret) + return ret; + + return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1); +} + +static int reset_block_groups(struct btrfs_fs_info *fs_info) +{ struct btrfs_block_group_cache *cache; struct btrfs_path *path; struct extent_buffer *leaf; @@ -9621,7 +11137,7 @@ "Check structural integrity of a filesystem (unmounted).", "Check structural integrity of an unmounted filesystem. Verify internal", "trees' consistency and item connectivity. In the repair mode try to", - "fix the problems found.", + "fix the problems found. ", "WARNING: the repair mode is considered dangerous", "", "-s|--super use this superblock copy", @@ -9630,6 +11146,12 @@ "--readonly run in read-only mode (default)", "--init-csum-tree create a new CRC tree", "--init-extent-tree create a new extent tree", + "--mode select mode, allows to make some memory/IO", + " trade-offs, where MODE is one of:", + " original - read inodes and extents to memory (requires", + " more memory, does less IO)", + " lowmem - try to use less memory but read blocks again", + " when needed", "--check-data-csum verify checksums of data blocks", "-Q|--qgroup-report print a report on qgroup consistency", "-E|--subvol-extents ", @@ -9656,13 +11178,14 @@ int readonly = 0; int qgroup_report = 0; int qgroups_repaired = 0; - enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE; + unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE; while(1) { int c; enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM, GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM, - GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE }; + GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE, + GETOPT_VAL_MODE }; static const struct option long_options[] = { { "super", required_argument, NULL, 's' }, { "repair", no_argument, NULL, GETOPT_VAL_REPAIR }, @@ -9680,6 +11203,8 @@ { "chunk-root", required_argument, NULL, GETOPT_VAL_CHUNK_TREE }, { "progress", no_argument, NULL, 'p' }, + { "mode", required_argument, NULL, + GETOPT_VAL_MODE }, { NULL, 0, NULL, 0} }; @@ -9744,6 +11269,13 @@ case GETOPT_VAL_CHECK_CSUM: check_data_csum = 1; break; + case GETOPT_VAL_MODE: + check_mode = parse_check_mode(optarg); + if (check_mode == CHECK_MODE_UNKNOWN) { + error("unknown mode: %s", optarg); + exit(1); + } + break; } } @@ -9761,6 +11293,14 @@ exit(1); } + /* + * Not supported yet + */ + if (repair && check_mode == CHECK_MODE_LOWMEM) { + error("Low memory mode doesn't support repair yet"); + exit(1); + } + radix_tree_init(); cache_tree_init(&root_cache); @@ -9884,7 +11424,10 @@ if (!ctx.progress_enabled) fprintf(stderr, "checking extents\n"); - ret = check_chunks_and_extents(root); + if (check_mode == CHECK_MODE_LOWMEM) + ret = check_chunks_and_extents_v2(root); + else + ret = check_chunks_and_extents(root); if (ret) fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n"); diff -Nru btrfs-progs-4.7/cmds-filesystem.c btrfs-progs-4.7.1/cmds-filesystem.c --- btrfs-progs-4.7/cmds-filesystem.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/cmds-filesystem.c 2016-08-25 17:33:48.000000000 +0000 @@ -524,7 +524,7 @@ disk_super = (struct btrfs_super_block *)buf; ret = btrfs_read_dev_super(fd, disk_super, - BTRFS_SUPER_INFO_OFFSET, 0); + BTRFS_SUPER_INFO_OFFSET, SBREAD_DEFAULT); if (ret) goto out; diff -Nru btrfs-progs-4.7/cmds-subvolume.c btrfs-progs-4.7.1/cmds-subvolume.c --- btrfs-progs-4.7/cmds-subvolume.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/cmds-subvolume.c 2016-08-25 17:33:48.000000000 +0000 @@ -937,11 +937,13 @@ goto out; } if (ret) { - ret < 0 ? + if (ret < 0) { error("Failed to get subvol info %s: %s\n", - fullpath, strerror(-ret)): + fullpath, strerror(-ret)); + } else { error("Failed to get subvol info %s: %d\n", - fullpath, ret); + fullpath, ret); + } return ret; } diff -Nru btrfs-progs-4.7/configure btrfs-progs-4.7.1/configure --- btrfs-progs-4.7/configure 2016-07-29 13:07:32.000000000 +0000 +++ btrfs-progs-4.7.1/configure 2016-08-25 17:35:58.000000000 +0000 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for btrfs-progs v4.7. +# Generated by GNU Autoconf 2.69 for btrfs-progs v4.7.1. # # Report bugs to . # @@ -580,8 +580,8 @@ # Identity of this package. PACKAGE_NAME='btrfs-progs' PACKAGE_TARNAME='btrfs-progs' -PACKAGE_VERSION='v4.7' -PACKAGE_STRING='btrfs-progs v4.7' +PACKAGE_VERSION='v4.7.1' +PACKAGE_STRING='btrfs-progs v4.7.1' PACKAGE_BUGREPORT='linux-btrfs@vger.kernel.org' PACKAGE_URL='http://btrfs.wiki.kernel.org' @@ -641,6 +641,7 @@ BLKID_LIBS_STATIC BLKID_LIBS BLKID_CFLAGS +BTRFSCONVERT_EXT2 COM_ERR_LIBS COM_ERR_CFLAGS EXT2FS_LIBS @@ -725,6 +726,7 @@ enable_backtrace enable_documentation enable_convert +with_convert ' ac_precious_vars='build_alias host_alias @@ -1288,7 +1290,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures btrfs-progs v4.7 to adapt to many kinds of systems. +\`configure' configures btrfs-progs v4.7.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1353,7 +1355,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of btrfs-progs v4.7:";; + short | recursive ) echo "Configuration of btrfs-progs v4.7.1:";; esac cat <<\_ACEOF @@ -1366,6 +1368,13 @@ --disable-documentation do not build domumentation --disable-convert do not build btrfs-convert +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-convert[=auto] + built-in filesystems for convert (default: auto) + supported (comma separated list): ext2 + Some influential environment variables: CC C compiler command CFLAGS C compiler flags @@ -1462,7 +1471,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -btrfs-progs configure v4.7 +btrfs-progs configure v4.7.1 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1831,7 +1840,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by btrfs-progs $as_me v4.7, which was +It was created by btrfs-progs $as_me v4.7.1, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -5360,7 +5369,28 @@ fi + +# Check whether --with-convert was given. +if test "${with_convert+set}" = set; then : + withval=$with_convert; +else + with_convert=auto + +fi + + +if test "$with_convert" = "yes"; then + with_convert=auto +fi + +if test "$with_convert" = "no"; then + with_convert= +fi + +convertfs= +BTRFSCONVERT_EXT2=0 if test "x$enable_convert" = xyes; then + if test "x$with_convert" = "xauto" || echo "$with_convert" | grep -q "ext2"; then pkg_failed=no { $as_echo "$as_me:${as_lineno-$LINENO}: checking for EXT2FS" >&5 @@ -5709,6 +5739,20 @@ $as_echo "yes" >&6; } fi + convertfs="${convertfs:+$convertfs,}ext2" + BTRFSCONVERT_EXT2=1 + fi +fi + + +# catch typos +tmp=$(echo "$with_convert" | sed -e 's/auto//' | sed -e 's/ext2//' | sed -e 's/,\+/,/') +if ! test "x$tmp" = "x"; then + as_fn_error $? "unknown tokens for --with-convert: $tmp" "$LINENO" 5 +fi + +if test "$DISABLE_BTRFSCONVERT" = 0 && test "x$convertfs" = "x"; then + as_fn_error $? "no filesystems for convert, use --disable-convert instead" "$LINENO" 5 fi @@ -6634,7 +6678,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by btrfs-progs $as_me v4.7, which was +This file was extended by btrfs-progs $as_me v4.7.1, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6697,7 +6741,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -btrfs-progs config.status v4.7 +btrfs-progs config.status v4.7.1 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" @@ -7426,7 +7470,7 @@ documentation: ${enable_documentation} backtrace support: ${enable_backtrace} - btrfs-convert: ${enable_convert} + btrfs-convert: ${enable_convert} ${convertfs:+($convertfs)} Type 'make' to compile. " >&5 @@ -7446,7 +7490,7 @@ documentation: ${enable_documentation} backtrace support: ${enable_backtrace} - btrfs-convert: ${enable_convert} + btrfs-convert: ${enable_convert} ${convertfs:+($convertfs)} Type 'make' to compile. " >&6; } diff -Nru btrfs-progs-4.7/configure.ac btrfs-progs-4.7.1/configure.ac --- btrfs-progs-4.7/configure.ac 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/configure.ac 2016-08-25 17:33:48.000000000 +0000 @@ -104,13 +104,44 @@ AS_IF([test "x$enable_convert" = xyes], [DISABLE_BTRFSCONVERT=0], [DISABLE_BTRFSCONVERT=1]) AC_SUBST([DISABLE_BTRFSCONVERT]) +AC_ARG_WITH([convert], + AS_HELP_STRING([[[]--with-convert[[=auto]]]], [built-in filesystems for convert (default: auto) +supported (comma separated list): ext2]), + [], [with_convert=auto] +) + +if test "$with_convert" = "yes"; then + with_convert=auto +fi + +if test "$with_convert" = "no"; then + with_convert= +fi + +convertfs= +BTRFSCONVERT_EXT2=0 if test "x$enable_convert" = xyes; then - PKG_CHECK_MODULES(EXT2FS, [ext2fs >= 1.42],, - [PKG_CHECK_MODULES(EXT2FS, [ext2fs], - [AC_DEFINE([HAVE_OLD_E2FSPROGS], [1], - [E2fsprogs does not support BIGALLOC])] - )]) - PKG_CHECK_MODULES(COM_ERR, [com_err]) + if test "x$with_convert" = "xauto" || echo "$with_convert" | grep -q "ext2"; then + PKG_CHECK_MODULES(EXT2FS, [ext2fs >= 1.42],, + [PKG_CHECK_MODULES(EXT2FS, [ext2fs], + [AC_DEFINE([HAVE_OLD_E2FSPROGS], [1], + [E2fsprogs does not support BIGALLOC])] + )]) + PKG_CHECK_MODULES(COM_ERR, [com_err]) + convertfs="${convertfs:+$convertfs,}ext2" + BTRFSCONVERT_EXT2=1 + fi +fi +AC_SUBST([BTRFSCONVERT_EXT2]) + +# catch typos +tmp=$(echo "$with_convert" | sed -e 's/auto//' | sed -e 's/ext2//' | sed -e 's/,\+/,/') +if ! test "x$tmp" = "x"; then + AC_MSG_ERROR([unknown tokens for --with-convert: $tmp]) +fi + +if test "$DISABLE_BTRFSCONVERT" = 0 && test "x$convertfs" = "x"; then + AC_MSG_ERROR([no filesystems for convert, use --disable-convert instead]) fi AX_CHECK_DEFINE([linux/fiemap.h], [FIEMAP_EXTENT_SHARED], [], @@ -176,7 +207,7 @@ documentation: ${enable_documentation} backtrace support: ${enable_backtrace} - btrfs-convert: ${enable_convert} + btrfs-convert: ${enable_convert} ${convertfs:+($convertfs)} Type 'make' to compile. ]) diff -Nru btrfs-progs-4.7/ctree.h btrfs-progs-4.7.1/ctree.h --- btrfs-progs-4.7/ctree.h 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/ctree.h 2016-08-25 17:33:48.000000000 +0000 @@ -40,6 +40,12 @@ struct btrfs_free_space_ctl; #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ +/* + * Fake signature for an unfinalized filesystem, structures might be partially + * created or missing. + */ +#define BTRFS_MAGIC_PARTIAL 0x4D5F536652484221ULL /* ascii !BHRfS_M, no null */ + #define BTRFS_MAX_MIRRORS 3 #define BTRFS_MAX_LEVEL 8 @@ -1026,6 +1032,7 @@ unsigned int ignore_chunk_tree_error:1; unsigned int avoid_meta_chunk_alloc:1; unsigned int avoid_sys_chunk_alloc:1; + unsigned int finalize_on_close:1; int (*free_extent_hook)(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -1740,7 +1747,10 @@ BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, data_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8); BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, name_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, transid, 64); static inline void btrfs_dir_item_key(struct extent_buffer *eb, struct btrfs_dir_item *item, @@ -2364,6 +2374,8 @@ struct btrfs_block_group_cache *cache); u64 add_new_free_space(struct btrfs_block_group_cache *block_group, struct btrfs_fs_info *info, u64 start, u64 end); +u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset); + /* ctree.c */ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2); int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -2576,6 +2588,8 @@ u64 dir, u64 index); int btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 ino, u32 mode); +int btrfs_change_inode_flags(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 ino, u64 flags); int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 ino, u64 parent_ino, char *name, int namelen, u8 type, u64 *index, int add_backref); diff -Nru btrfs-progs-4.7/debian/changelog btrfs-progs-4.7.1/debian/changelog --- btrfs-progs-4.7/debian/changelog 2016-08-11 11:52:31.000000000 +0000 +++ btrfs-progs-4.7.1/debian/changelog 2016-08-31 20:01:15.000000000 +0000 @@ -1,3 +1,15 @@ +btrfs-progs (4.7.1-1~ppa14.04+1) trusty; urgency=medium + + * Merge new upstream release from Debian Unstable. + + -- Nicolas Derive Wed, 31 Aug 2016 22:00:04 +0200 + +btrfs-progs (4.7.1-1) unstable; urgency=medium + + * New upstream release. + + -- Dimitri John Ledkov Fri, 26 Aug 2016 15:00:46 +0100 + btrfs-progs (4.7-1) unstable; urgency=medium * New upstream release. diff -Nru btrfs-progs-4.7/disk-io.c btrfs-progs-4.7.1/disk-io.c --- btrfs-progs-4.7/disk-io.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/disk-io.c 2016-08-25 17:33:48.000000000 +0000 @@ -932,7 +932,7 @@ } static int setup_root_or_create_block(struct btrfs_fs_info *fs_info, - enum btrfs_open_ctree_flags flags, + unsigned flags, struct btrfs_root *info_root, u64 objectid, char *str) { @@ -961,7 +961,7 @@ } int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr, - enum btrfs_open_ctree_flags flags) + unsigned flags) { struct btrfs_super_block *sb = fs_info->super_copy; struct btrfs_root *root; @@ -1114,7 +1114,7 @@ int btrfs_scan_fs_devices(int fd, const char *path, struct btrfs_fs_devices **fs_devices, - u64 sb_bytenr, int super_recover, + u64 sb_bytenr, unsigned sbflags, int skip_devices) { u64 total_devs; @@ -1136,7 +1136,7 @@ } ret = btrfs_scan_one_device(fd, path, fs_devices, - &total_devs, sb_bytenr, super_recover); + &total_devs, sb_bytenr, sbflags); if (ret) { fprintf(stderr, "No valid Btrfs found on %s\n", path); return ret; @@ -1217,7 +1217,7 @@ u64 sb_bytenr, u64 root_tree_bytenr, u64 chunk_root_bytenr, - enum btrfs_open_ctree_flags flags) + unsigned flags) { struct btrfs_fs_info *fs_info; struct btrfs_super_block *disk_super; @@ -1225,6 +1225,7 @@ struct extent_buffer *eb; int ret; int oflags; + unsigned sbflags = SBREAD_DEFAULT; if (sb_bytenr == 0) sb_bytenr = BTRFS_SUPER_INFO_OFFSET; @@ -1247,9 +1248,18 @@ if (flags & OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR) fs_info->ignore_chunk_tree_error = 1; - ret = btrfs_scan_fs_devices(fp, path, &fs_devices, sb_bytenr, - (flags & OPEN_CTREE_RECOVER_SUPER), - (flags & OPEN_CTREE_NO_DEVICES)); + if ((flags & OPEN_CTREE_RECOVER_SUPER) + && (flags & OPEN_CTREE_FS_PARTIAL)) { + fprintf(stderr, + "cannot open a partially created filesystem for recovery"); + goto out; + } + + if (flags & OPEN_CTREE_FS_PARTIAL) + sbflags = SBREAD_PARTIAL; + + ret = btrfs_scan_fs_devices(fp, path, &fs_devices, sb_bytenr, sbflags, + (flags & OPEN_CTREE_NO_DEVICES)); if (ret) goto out; @@ -1268,10 +1278,11 @@ disk_super = fs_info->super_copy; if (flags & OPEN_CTREE_RECOVER_SUPER) - ret = btrfs_read_dev_super(fs_devices->latest_bdev, - disk_super, sb_bytenr, 1); + ret = btrfs_read_dev_super(fs_devices->latest_bdev, disk_super, + sb_bytenr, SBREAD_RECOVER); else - ret = btrfs_read_dev_super(fp, disk_super, sb_bytenr, 0); + ret = btrfs_read_dev_super(fp, disk_super, sb_bytenr, + sbflags); if (ret) { printk("No valid btrfs found\n"); goto out_devices; @@ -1323,7 +1334,7 @@ struct btrfs_fs_info *open_ctree_fs_info(const char *filename, u64 sb_bytenr, u64 root_tree_bytenr, u64 chunk_root_bytenr, - enum btrfs_open_ctree_flags flags) + unsigned flags) { int fp; int ret; @@ -1356,7 +1367,7 @@ } struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, - enum btrfs_open_ctree_flags flags) + unsigned flags) { struct btrfs_fs_info *info; @@ -1371,7 +1382,7 @@ } struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, - enum btrfs_open_ctree_flags flags) + unsigned flags) { struct btrfs_fs_info *info; @@ -1392,7 +1403,7 @@ * - number of devices - something sane * - sys array size - maximum */ -static int check_super(struct btrfs_super_block *sb) +static int check_super(struct btrfs_super_block *sb, unsigned sbflags) { char result[BTRFS_CSUM_SIZE]; u32 crc; @@ -1400,8 +1411,12 @@ int csum_size; if (btrfs_super_magic(sb) != BTRFS_MAGIC) { - error("superblock magic doesn't match"); - return -EIO; + if (btrfs_super_magic(sb) == BTRFS_MAGIC_PARTIAL) { + if (!(sbflags & SBREAD_PARTIAL)) { + error("superblock magic doesn't match"); + return -EIO; + } + } } csum_type = btrfs_super_csum_type(sb); @@ -1533,7 +1548,7 @@ } int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr, - int super_recover) + unsigned sbflags) { u8 fsid[BTRFS_FSID_SIZE]; int fsid_is_initialized = 0; @@ -1541,7 +1556,7 @@ struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp; int i; int ret; - int max_super = super_recover ? BTRFS_SUPER_MIRROR_MAX : 1; + int max_super = sbflags & SBREAD_RECOVER ? BTRFS_SUPER_MIRROR_MAX : 1; u64 transid = 0; u64 bytenr; @@ -1553,7 +1568,7 @@ if (btrfs_super_bytenr(buf) != sb_bytenr) return -1; - if (check_super(buf)) + if (check_super(buf, sbflags)) return -1; memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE); return 0; @@ -1577,7 +1592,7 @@ /* if magic is NULL, the device was removed */ if (btrfs_super_magic(buf) == 0 && i == 0) break; - if (check_super(buf)) + if (check_super(buf, sbflags)) continue; if (!fsid_is_initialized) { @@ -1746,6 +1761,15 @@ write_ctree_super(trans, root); btrfs_free_transaction(root, trans); } + + if (fs_info->finalize_on_close) { + btrfs_set_super_magic(fs_info->super_copy, BTRFS_MAGIC); + root->fs_info->finalize_on_close = 0; + ret = write_all_supers(root); + if (ret) + fprintf(stderr, + "failed to write new super block err %d\n", ret); + } btrfs_free_block_groups(fs_info); free_fs_roots_tree(&fs_info->fs_root_tree); diff -Nru btrfs-progs-4.7/disk-io.h btrfs-progs-4.7.1/disk-io.h --- btrfs-progs-4.7/disk-io.h 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/disk-io.h 2016-08-25 17:33:48.000000000 +0000 @@ -29,21 +29,29 @@ #define BTRFS_SUPER_MIRROR_SHIFT 12 enum btrfs_open_ctree_flags { - OPEN_CTREE_WRITES = (1 << 0), - OPEN_CTREE_PARTIAL = (1 << 1), - OPEN_CTREE_BACKUP_ROOT = (1 << 2), - OPEN_CTREE_RECOVER_SUPER = (1 << 3), - OPEN_CTREE_RESTORE = (1 << 4), - OPEN_CTREE_NO_BLOCK_GROUPS = (1 << 5), - OPEN_CTREE_EXCLUSIVE = (1 << 6), - OPEN_CTREE_NO_DEVICES = (1 << 7), + /* Open filesystem for writes */ + OPEN_CTREE_WRITES = (1U << 0), + /* Allow to open filesystem with some broken tree roots (eg log root) */ + OPEN_CTREE_PARTIAL = (1U << 1), + /* If primary root pinters are invalid, try backup copies */ + OPEN_CTREE_BACKUP_ROOT = (1U << 2), + /* Allow reading all superblock sopies if the primary is damaged */ + OPEN_CTREE_RECOVER_SUPER = (1U << 3), + /* Restoring filesystem image */ + OPEN_CTREE_RESTORE = (1U << 4), + /* Do not read block groups (extent tree) */ + OPEN_CTREE_NO_BLOCK_GROUPS = (1U << 5), + /* Open all devices in O_EXCL mode */ + OPEN_CTREE_EXCLUSIVE = (1U << 6), + /* Do not scan devices */ + OPEN_CTREE_NO_DEVICES = (1U << 7), /* * Don't print error messages if bytenr or checksums do not match in * tree block headers. Turn on by OPEN_CTREE_SUPPRESS_ERROR */ - OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS = (1 << 8), - /* Return chunk root */ - __OPEN_CTREE_RETURN_CHUNK_ROOT = (1 << 9), + OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS = (1U << 8), + /* Return the chunk root */ + __OPEN_CTREE_RETURN_CHUNK_ROOT = (1U << 9), OPEN_CTREE_CHUNK_ROOT_ONLY = OPEN_CTREE_PARTIAL + OPEN_CTREE_SUPPRESS_CHECK_BLOCK_ERRORS + __OPEN_CTREE_RETURN_CHUNK_ROOT, @@ -53,15 +61,34 @@ * Like split PARTIAL into SKIP_CSUM/SKIP_EXTENT */ - OPEN_CTREE_IGNORE_FSID_MISMATCH = (1 << 10), + /* Ignore UUID mismatches */ + OPEN_CTREE_IGNORE_FSID_MISMATCH = (1U << 10), /* - * Allow open_ctree_fs_info() to return a incomplete fs_info with + * Allow open_ctree_fs_info() to return an incomplete fs_info with * system chunks from super block only. - * It's useful for chunk corruption case. + * It's useful when chunks are corrupted. * Makes no sense for open_ctree variants returning btrfs_root. */ - OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR = (1 << 11) + OPEN_CTREE_IGNORE_CHUNK_TREE_ERROR = (1U << 11), + + /* Allow to open a partially created filesystem */ + OPEN_CTREE_FS_PARTIAL = (1U << 12), +}; + +/* + * Modes of superblock access + */ +enum btrfs_read_sb_flags { + SBREAD_DEFAULT = 0, + /* Reading superblock during recovery */ + SBREAD_RECOVER = (1 << 0), + + /* + * Read superblock with the fake signature, cannot be used with + * SBREAD_RECOVER + */ + SBREAD_PARTIAL = (1 << 1), }; static inline u64 btrfs_sb_offset(int mirror) @@ -103,27 +130,28 @@ struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr); int btrfs_check_fs_compatibility(struct btrfs_super_block *sb, int writable); int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr, - enum btrfs_open_ctree_flags flags); + unsigned flags); void btrfs_release_all_roots(struct btrfs_fs_info *fs_info); void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info); int btrfs_scan_fs_devices(int fd, const char *path, struct btrfs_fs_devices **fs_devices, u64 sb_bytenr, - int super_recover, int skip_devices); + unsigned sbflags, int skip_devices); int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info, u64 chunk_root_bytenr); struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, - enum btrfs_open_ctree_flags flags); + unsigned flags); struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, - enum btrfs_open_ctree_flags flags); + unsigned flags); struct btrfs_fs_info *open_ctree_fs_info(const char *filename, u64 sb_bytenr, u64 root_tree_bytenr, u64 chunk_root_bytenr, - enum btrfs_open_ctree_flags flags); + unsigned flags); int close_ctree_fs_info(struct btrfs_fs_info *fs_info); static inline int close_ctree(struct btrfs_root *root) { - BUG_ON(!root); + if (!root) + return 0; return close_ctree_fs_info(root->fs_info); } @@ -131,7 +159,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr, - int super_recover); + unsigned sbflags); int btrfs_map_bh_to_logical(struct btrfs_root *root, struct extent_buffer *bh, u64 logical); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, diff -Nru btrfs-progs-4.7/Documentation/btrfs.asciidoc btrfs-progs-4.7.1/Documentation/btrfs.asciidoc --- btrfs-progs-4.7/Documentation/btrfs.asciidoc 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/Documentation/btrfs.asciidoc 2016-08-25 17:33:48.000000000 +0000 @@ -19,6 +19,9 @@ *btrfstune* that were separate historically and/or haven't been merged to the main utility. See section 'STANDALONE TOOLS' for more details. +For other topics (mount options, etc) please refer to the separate manual +page `btrfs`(5). + COMMAND SYNTAX -------------- diff -Nru btrfs-progs-4.7/Documentation/btrfs-check.asciidoc btrfs-progs-4.7.1/Documentation/btrfs-check.asciidoc --- btrfs-progs-4.7/Documentation/btrfs-check.asciidoc 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/Documentation/btrfs-check.asciidoc 2016-08-25 17:33:48.000000000 +0000 @@ -93,6 +93,19 @@ + NOTE: Do not use unless you know what you're doing. +--mode=MODE:: +select mode of operation regarding memory and IO ++ +The 'MODE' can be one of 'original' and 'lowmem'. The original mode is mostly +unoptimized regarding memory consumpption and can lead to out-of-memory +conditions on large filesystems. The possible workaround is to export the block +device over network to a machine with enough memory. The low memory mode is +supposed to address the memory consumption, at the cost of increased IO when it +needs to re-read blocks when needed. This may increase run time. + +NOTE: 'lowmem' mode does not work with '--repair' yet, and is still considered +experimental. + EXIT STATUS ----------- *btrfs check* returns a zero exit status if it succeeds. Non zero is diff -Nru btrfs-progs-4.7/Documentation/btrfs-quota.asciidoc btrfs-progs-4.7.1/Documentation/btrfs-quota.asciidoc --- btrfs-progs-4.7/Documentation/btrfs-quota.asciidoc 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/Documentation/btrfs-quota.asciidoc 2016-08-25 17:33:48.000000000 +0000 @@ -3,7 +3,7 @@ NAME ---- -btrfs-quota - control the quota of a btrfs filesystem +btrfs-quota - control the global quota status of a btrfs filesystem SYNOPSIS -------- @@ -11,14 +11,208 @@ DESCRIPTION ----------- -*btrfs quota* is used to enable/disable or rescan subvolume quota of a btrfs -filesystem. - -For setting quota or other quota operations on a btrfs filesystem, please see -`btrfs-qgroup`(8) for details. - -WARNING: Quota and qgroup in btrfs filesystem is not stable and impacts -performance in mainline kernel yet(v3.14 so far). +The commands under *btrfs quota* are used to affect the global status of quotas +of a btrfs filesystem. The quota groups (qgroups) are managed by the subcommand +`btrfs qgroup`(8). + +NOTE: the qgroups are different than the traditional user quotas and designed +to track shared and exlusive data per-subvolume. Plese refer to the section +'HIERARCHICAL QUOTA GROUP CONCEPTS' for a detailed description. + +PERFORMANCE IMPLICATIONS +~~~~~~~~~~~~~~~~~~~~~~~~ + +When the quotas are turned on, they affect all extent processing, taking a +performance hit. It is not recommended to turn on qgroups unless the user +intends to actually use them. + +STABILITY STATUS +~~~~~~~~~~~~~~~~ + +The qgroup implementation has turned out to be quite difficult as it affects +the core of the filesystem operation. The users have hit various corner cases +over time, eg. wrong accounting or system instability. The situation is +gradually improving but currently (4.7) there are still issues found and fixed. + +HIERARCHICAL QUOTA GROUP CONCEPTS +--------------------------------- + +The concept of quota has a long-standing tradition in the Unix world. Ever +since computers allow multiple users to work simultaneously in one filesystem, +there is the need to prevent one user from using up the entire space. Every +user should get his fair share of the available resources. + +In case of files, the solution is quite straightforward. Each file has an +'owner' recorded along with it, and it has a size. Traditional quota just +restricts the total size of all files that are owned by a user. The concept is +quite flexible: if a user hits his quota limit, the administrator can raise it +on the fly. + +On the other hand, the traditional approach has only a poor solution to +restrict directories. +At installation time, the harddisk can be partitioned so that every directory +(eg. /usr, /var/, ...) that needs a limit gets its own partition. The obvious +problem is, that those limits cannot be changed without a reinstall ation. The +btrfs subvolume feature builds a bridge. Subvolumes correspond in many ways to +partitions, as every subvolume looks like its own filesystem. With subvolume +quota, it is now possible to restrict each subvolume like a partition, but keep +the flexibility of quota. The space for each subvolume can be expanded or +restricted on the fly. + +As subvolumes are the basis for snapshots, interesting questions arise as to +how to account used space in the presence of snapshots. If you have a file +shared between a subvolume and a snapshot, whom to account the file to? The +creator? Both? What if the file gets modified in the snapshot, should only +these changes be accounted to it? But wait, both the snapshot and the subvolume +belong to the same user home. I just want to limit the total space used by +both! But somebody else might not want to charge the snapshots to the users. + +Btrfs subvolume quota solves these problems by introducing groups of subvolumes +and let the user put limits on them. It is even possible to have groups of +groups. In the following, we refer to them as 'qgruops'. + +Each qgroup primarily tracks two numbers, the amount of total referenced +space and the amount of exclusively referenced space. + +referenced:: +space is the amount of data that can be reached from any of the subvolumes +contained in the qgroup, while +exclusive:: +is the amount of data where all references to this data can be reached +from within this qgroup. + +SUBVOLUME QUOTA GROUPS +~~~~~~~~~~~~~~~~~~~~~~ + +The basic notion of the Subvolume Quota feature is the qouta group, short +qgroup. Qgroups are notated as 'level/id', eg. the qgroup 3/2 is a qgroup of +level 3. For level 0, the leading '0/' can be omitted. +Qgroups of level 0 get created automatically when a subvolume/snapshot gets +created. The ID of the qgroup corresponds to the ID of the subvolume, so 0/5 +is the qgroup for the root subvolume. +For the *btrfs qgroup* command, the path to the subvolume can also be used +instead of '0/ID'. For all higher levels, the ID can be choosen freely. + +Each qgroup can contain a set of lower level qgroups, thus creating a hierarchy +of qgroups. Figure 1 shows an example qgroup tree. + +// TODO: insert Figure 1 + +At the bottom, some extents are depicted showing which qgroups reference which +extents. It is important to understand the notion of 'referenced' vs +'exclusive'. In the example, qgroup 0/2 references extents 2 and 3, while 1/2 +references extents 2-4, 2/1 references all extents. + +On the other hand, extent 1 is exclusive to 0/1, extent 2 is exclusive to 0/2, +while extent 3 is neither exclusive to 0/2 nor to 0/3. But because both +references can be reached from 1/2, extent 3 is exclusive to 1/2. All extents +are exclusive to 2/1. + +So exclusive does not mean there is no other way to reach the extent, but it +does mean that if you delete all subvolumes contained in a qgroup, the extent +will get deleted. + +Exclusive of a qgroup conveys the useful information how much space will be +freed in case all subvolumes of the qgroup get deleted. + +All data extents are accounted this way. Metadata that belongs to a specific +subvolume (i.e. its filesystem tree) is also accounted. Checksums and extent +allocation information are not accounted. + +In turn, the referenced count of a qgroup can be limited. All writes beyond +this limit will lead to a 'Quota Exceeded' error. + +INHERITANCE +~~~~~~~~~~~ + +Things get a bit more complicated when new subvolumes or snapshots are created. +The case of (empty) subvolumes is still quite easy. If a subvolume should be +part of a qgroup, it has to be added to the qgroup at creation time. To add it +at a later time, it would be necessary to at least rescan the full subvolume +for a proper accounting. + +Creation of a snapshot is the hard case. Obviously, the snapshot will +reference the exact amount of space as its source, and both source and +destination now have an exclusive count of 0 (the filesystem nodesize to be +precise, as the roots of the trees are not shared). But what about qgroups of +higher levels? If the qgroup contains both the source and the destination, +nothing changes. If the qgroup contains only the source, it might lose some +exclusive. + +But how much? The tempting answer is, subtract all exclusive of the source from +the qgroup, but that is wrong, or at least not enough. There could have been +an extent that is referenced from the source and another subvolume from that +qgroup. This extent would have been exclusive to the qgroup, but not to the +source subvolume. With the creation of the snapshot, the qgroup would also +lose this extent from its exclusive set. + +So how can this problem be solved? In the instant the snapshot gets created, we +already have to know the correct exclusive count. We need to have a second +qgroup that contains all the subvolumes as the first qgroup, except the +subvolume we want to snapshot. The moment we create the snapshot, the +exclusive count from the second qgroup needs to be copied to the first qgroup, +as it represents the correct value. The second qgroup is called a tracking +qgroup. It is only there in case a snapshot is needed. + +USE CASES +~~~~~~~~~ + +Below are some usecases that do not mean to be extensive. You can find your +own way how to integrate qgroups. + +==== SINGLE-USER MACHINE ==== + +`Replacement for partitions` + +The simplest use case is to use qgroups as simple replacement for partitions. +Btrfs takes the disk as a whole, and /, /usr, /var etc. are created as +subvolumes. As each subvolume gets it own qgroup automatically, they can +simply be restricted. No hierarchy is needed for that. + +`Track usage of snapshots` + +When a snapshot is taken, a qgroup for it will automatically be created with +the correct values. 'Referenced' will show how much is in it, possibly shared +with other subvolumes. 'Exclusive' will be the amount of space that gets freed +when the subvolume is deleted. + +==== MULTI-USER MACHINE ==== + +`Restricting homes` + +When you have several users on a machine, with home directories probably under +/home, you might want to restrict /home as a whole, while restricting every +user to an indiviual limit as well. This is easily accomplished by creating a +qgroup for /home , eg. 1/1, and assigning all user subvolumes to it. +Restricting this qgroup will limit /home, while every user subvolume can get +its own (lower) limit. + +`Accounting snapshots to the user` + +Let's say the user is allowed to create snapshots via some mechanism. It would +only be fair to account space used by the snapshots to the user. This does not +mean the user doubles his usage as soon as he takes a snapshot. Of course, +files that are present in his home and the snapshot should only be accounted +once. This can be accomplished by creating a qgroup for each user, say +'1/UID'. The user home and all snapshots are assigned to this qgroup. +Limiting it will extend the limit to all snapshots, counting files only once. +To limit /home as a whole, a higher level group 2/1 replacing 1/1 from the +previous example is needed, with all user qgroups assigned to it. + +`Do not account snapshots` + +On the other hand, when the snapshots get created automatically, the user has +no chance to control them, so the space used by them should not be accounted to +him. This is already the case when creating snapshots in the example from +the previous section. + +`Snapshots for backup purposes` + +This scenario is a mixture of the previous two. The user can create snapshots, +but some snapshots for backup purposes are being created by the system. The +user's snapshots should be accounted to the user, not the system. The solution +is similar to the one from section 'Accounting snapshots to the user', but do +not assign system snapshots to user's qgroup. SUBCOMMAND ---------- diff -Nru btrfs-progs-4.7/extent-tree.c btrfs-progs-4.7.1/extent-tree.c --- btrfs-progs-4.7/extent-tree.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/extent-tree.c 2016-08-25 17:33:48.000000000 +0000 @@ -598,7 +598,7 @@ } #endif -static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) +u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) { u32 high_crc = ~(u32)0; u32 low_crc = ~(u32)0; diff -Nru btrfs-progs-4.7/inode.c btrfs-progs-4.7.1/inode.c --- btrfs-progs-4.7/inode.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/inode.c 2016-08-25 17:33:48.000000000 +0000 @@ -472,6 +472,42 @@ } /* + * Change inode flags to given value + */ +int btrfs_change_inode_flags(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 ino, u64 flags) +{ + struct btrfs_inode_item *item; + struct btrfs_path *path; + struct btrfs_key key; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret > 0) { + ret = -ENOENT; + goto out; + } + if (ret < 0) + goto out; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + btrfs_set_inode_flags(path->nodes[0], item, flags); + btrfs_mark_buffer_dirty(path->nodes[0]); +out: + btrfs_free_path(path); + return ret; +} + +/* * Make a dir under the parent inode 'parent_ino' with 'name' * and 'mode', The owner will be root/root. */ diff -Nru btrfs-progs-4.7/Makefile.in btrfs-progs-4.7.1/Makefile.in --- btrfs-progs-4.7/Makefile.in 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/Makefile.in 2016-08-25 17:33:48.000000000 +0000 @@ -12,6 +12,11 @@ # V=1 verbose, print command lines (default: quiet) # C=1 run checker before compilation (default checker: sparse) # D=1 debugging build, turn off optimizations +# D=dflags dtto, turn on additional debugging features: +# verbose - print file:line along with error/warning messages +# trace - print trace before the error/warning messages +# abort - call abort() on first error (dumps core) +# all - shortcut for all of the above # W=123 build with warnings (default: off) # DEBUG_CFLAGS additional compiler flags for debugging build # EXTRA_CFLAGS additional compiler flags @@ -35,6 +40,7 @@ INSTALL = @INSTALL@ DISABLE_DOCUMENTATION = @DISABLE_DOCUMENTATION@ DISABLE_BTRFSCONVERT = @DISABLE_BTRFSCONVERT@ +BTRFSCONVERT_EXT2 = @BTRFSCONVERT_EXT2@ EXTRA_CFLAGS := EXTRA_LDFLAGS := @@ -121,6 +127,24 @@ DEBUG_CFLAGS_INTERNAL = $(DEBUG_CFLAGS_DEFAULT) $(DEBUG_CFLAGS) endif +ifneq (,$(findstring verbose,$(D))) + DEBUG_CFLAGS_INTERNAL += -DDEBUG_VERBOSE_ERROR=1 +endif + +ifneq (,$(findstring trace,$(D))) + DEBUG_CFLAGS_INTERNAL += -DDEBUG_TRACE_ON_ERROR=1 +endif + +ifneq (,$(findstring abort,$(D))) + DEBUG_CFLAGS_INTERNAL += -DDEBUG_ABORT_ON_ERROR=1 +endif + +ifneq (,$(findstring all,$(D))) + DEBUG_CFLAGS_INTERNAL += -DDEBUG_VERBOSE_ERROR=1 + DEBUG_CFLAGS_INTERNAL += -DDEBUG_TRACE_ON_ERROR=1 + DEBUG_CFLAGS_INTERNAL += -DDEBUG_ABORT_ON_ERROR=1 +endif + MAKEOPTS = --no-print-directory Q=$(Q) # build all by default @@ -143,6 +167,7 @@ # external libs required by various binaries; for btrfs-foo, # specify btrfs_foo_libs = ; see $($(subst...)) rules below btrfs_convert_libs = @EXT2FS_LIBS@ @COM_ERR_LIBS@ +btrfs_convert_cflags = -DBTRFSCONVERT_EXT2=$(BTRFSCONVERT_EXT2) btrfs_fragments_libs = -lgd -lpng -ljpeg -lfreetype btrfs_debug_tree_objects = cmds-inspect-dump-tree.o btrfs_show_super_objects = cmds-inspect-dump-super.o @@ -198,15 +223,18 @@ %.o.d: %.c $(Q)$(CC) -MM -MG -MF $@ -MT $(@:.o.d=.o) -MT $(@:.o.d=.static.o) -MT $@ $(CFLAGS) $< +# +# Pick from per-file variables, btrfs_*_cflags +# .c.o: @$(check_echo) " [SP] $<" $(Q)$(check) $(CFLAGS) $(CHECKER_FLAGS) $< @echo " [CC] $@" - $(Q)$(CC) $(CFLAGS) -c $< + $(Q)$(CC) $(CFLAGS) -c $< $($(subst -,_,$(@:%.o=%)-cflags)) %.static.o: %.c @echo " [CC] $@" - $(Q)$(CC) $(STATIC_CFLAGS) -c $< -o $@ + $(Q)$(CC) $(STATIC_CFLAGS) -c $< -o $@ $($(subst -,_,$(@:%.static.o=%)-cflags)) all: $(progs) $(BUILDDIRS) $(SUBDIRS): $(BUILDDIRS) diff -Nru btrfs-progs-4.7/mkfs.c btrfs-progs-4.7.1/mkfs.c --- btrfs-progs-4.7/mkfs.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/mkfs.c 2016-08-25 17:33:48.000000000 +0000 @@ -79,7 +79,8 @@ BTRFS_FIRST_CHUNK_TREE_OBJECTID, 0, BTRFS_MKFS_SYSTEM_GROUP_SIZE); allocation->system += BTRFS_MKFS_SYSTEM_GROUP_SIZE; - BUG_ON(ret); + if (ret) + return ret; if (mixed) { ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root, @@ -87,37 +88,40 @@ BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA); if (ret == -ENOSPC) { - fprintf(stderr, - "no space to allocate data/metadata chunk\n"); + error("no space to allocate data/metadata chunk"); goto err; } - BUG_ON(ret); + if (ret) + return ret; ret = btrfs_make_block_group(trans, root, 0, BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA, BTRFS_FIRST_CHUNK_TREE_OBJECTID, chunk_start, chunk_size); - BUG_ON(ret); + if (ret) + return ret; allocation->mixed += chunk_size; } else { ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root, &chunk_start, &chunk_size, BTRFS_BLOCK_GROUP_METADATA); if (ret == -ENOSPC) { - fprintf(stderr, "no space to allocate metadata chunk\n"); + error("no space to allocate metadata chunk"); goto err; } - BUG_ON(ret); + if (ret) + return ret; ret = btrfs_make_block_group(trans, root, 0, BTRFS_BLOCK_GROUP_METADATA, BTRFS_FIRST_CHUNK_TREE_OBJECTID, chunk_start, chunk_size); allocation->metadata += chunk_size; - BUG_ON(ret); + if (ret) + return ret; } root->fs_info->system_allocs = 0; - btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); err: return ret; @@ -136,16 +140,18 @@ &chunk_start, &chunk_size, BTRFS_BLOCK_GROUP_DATA); if (ret == -ENOSPC) { - fprintf(stderr, "no space to allocate data chunk\n"); + error("no space to allocate data chunk"); goto err; } - BUG_ON(ret); + if (ret) + return ret; ret = btrfs_make_block_group(trans, root, 0, BTRFS_BLOCK_GROUP_DATA, BTRFS_FIRST_CHUNK_TREE_OBJECTID, chunk_start, chunk_size); allocation->data += chunk_size; - BUG_ON(ret); + if (ret) + return ret; } err: @@ -184,32 +190,50 @@ return ret; } -static void __recow_root(struct btrfs_trans_handle *trans, +static int __recow_root(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - int ret; struct extent_buffer *tmp; + int ret; if (trans->transid != btrfs_root_generation(&root->root_item)) { extent_buffer_get(root->node); ret = __btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp, 0, 0); - BUG_ON(ret); + if (ret) + return ret; free_extent_buffer(tmp); } + + return 0; } -static void recow_roots(struct btrfs_trans_handle *trans, +static int recow_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_fs_info *info = root->fs_info; + int ret; - __recow_root(trans, info->fs_root); - __recow_root(trans, info->tree_root); - __recow_root(trans, info->extent_root); - __recow_root(trans, info->chunk_root); - __recow_root(trans, info->dev_root); - __recow_root(trans, info->csum_root); + ret = __recow_root(trans, info->fs_root); + if (ret) + return ret; + ret = __recow_root(trans, info->tree_root); + if (ret) + return ret; + ret = __recow_root(trans, info->extent_root); + if (ret) + return ret; + ret = __recow_root(trans, info->chunk_root); + if (ret) + return ret; + ret = __recow_root(trans, info->dev_root); + if (ret) + return ret; + ret = __recow_root(trans, info->csum_root); + if (ret) + return ret; + + return 0; } static int create_one_raid_group(struct btrfs_trans_handle *trans, @@ -224,26 +248,32 @@ ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root, &chunk_start, &chunk_size, type); if (ret == -ENOSPC) { - fprintf(stderr, "not enough free space\n"); + error("not enough free space to allocate chunk"); exit(1); } - BUG_ON(ret); + if (ret) + return ret; + ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0, type, BTRFS_FIRST_CHUNK_TREE_OBJECTID, chunk_start, chunk_size); - if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == BTRFS_BLOCK_GROUP_DATA) + + type &= BTRFS_BLOCK_GROUP_TYPE_MASK; + if (type == BTRFS_BLOCK_GROUP_DATA) { allocation->data += chunk_size; - else if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == BTRFS_BLOCK_GROUP_METADATA) + } else if (type == BTRFS_BLOCK_GROUP_METADATA) { allocation->metadata += chunk_size; - else if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == BTRFS_BLOCK_GROUP_SYSTEM) + } else if (type == BTRFS_BLOCK_GROUP_SYSTEM) { allocation->system += chunk_size; - else if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == - (BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA)) + } else if (type == + (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA)) { allocation->mixed += chunk_size; - else - BUG_ON(1); + } else { + error("unrecognized profile type: 0x%llx", + (unsigned long long)type); + ret = -EINVAL; + } - BUG_ON(ret); return ret; } @@ -260,25 +290,28 @@ ret = create_one_raid_group(trans, root, BTRFS_BLOCK_GROUP_SYSTEM | metadata_profile, allocation); - BUG_ON(ret); + if (ret) + return ret; if (mixed) meta_flags |= BTRFS_BLOCK_GROUP_DATA; ret = create_one_raid_group(trans, root, meta_flags | metadata_profile, allocation); - BUG_ON(ret); + if (ret) + return ret; } if (!mixed && data_profile) { ret = create_one_raid_group(trans, root, BTRFS_BLOCK_GROUP_DATA | data_profile, allocation); - BUG_ON(ret); + if (ret) + return ret; } - recow_roots(trans, root); + ret = recow_roots(trans, root); - return 0; + return ret; } static int create_data_reloc_tree(struct btrfs_trans_handle *trans, @@ -291,7 +324,8 @@ int ret; ret = btrfs_copy_root(trans, root, root->node, &tmp, objectid); - BUG_ON(ret); + if (ret) + return ret; memcpy(&root_item, &root->root_item, sizeof(root_item)); btrfs_set_root_bytenr(&root_item, tmp->start); @@ -304,40 +338,33 @@ location.offset = 0; ret = btrfs_insert_root(trans, root->fs_info->tree_root, &location, &root_item); - BUG_ON(ret); - return 0; + + return ret; } static void print_usage(int ret) { - fprintf(stderr, "usage: mkfs.btrfs [options] dev [ dev ... ]\n"); - fprintf(stderr, "options:\n"); - fprintf(stderr, "\t-A|--alloc-start START the offset to start the FS\n"); - fprintf(stderr, "\t-b|--byte-count SIZE total number of bytes in the FS\n"); - fprintf(stderr, "\t-d|--data PROFILE data profile, raid0, raid1, raid5, raid6, raid10, dup or single\n"); - fprintf(stderr, "\t-f|--force force overwrite of existing filesystem\n"); - fprintf(stderr, "\t-l|--leafsize SIZE deprecated, alias for nodesize\n"); - fprintf(stderr, "\t-L|--label LABEL set a label\n"); - fprintf(stderr, "\t-m|--metadata PROFILE metadata profile, values like data profile\n"); - fprintf(stderr, "\t-M|--mixed mix metadata and data together\n"); - fprintf(stderr, "\t-n|--nodesize SIZE size of btree nodes\n"); - fprintf(stderr, "\t-s|--sectorsize SIZE min block allocation (may not mountable by current kernel)\n"); - fprintf(stderr, "\t-r|--rootdir DIR the source directory\n"); - fprintf(stderr, "\t-K|--nodiscard do not perform whole device TRIM\n"); - fprintf(stderr, "\t-O|--features LIST comma separated list of filesystem features, use '-O list-all' to list features\n"); - fprintf(stderr, "\t-U|--uuid UUID specify the filesystem UUID\n"); - fprintf(stderr, "\t-q|--quiet no messages except errors\n"); - fprintf(stderr, "\t-V|--version print the mkfs.btrfs version and exit\n"); + printf("usage: mkfs.btrfs [options] dev [ dev ... ]\n"); + printf("options:\n"); + printf("\t-A|--alloc-start START the offset to start the FS\n"); + printf("\t-b|--byte-count SIZE total number of bytes in the FS\n"); + printf("\t-d|--data PROFILE data profile, raid0, raid1, raid5, raid6, raid10, dup or single\n"); + printf("\t-f|--force force overwrite of existing filesystem\n"); + printf("\t-l|--leafsize SIZE deprecated, alias for nodesize\n"); + printf("\t-L|--label LABEL set a label\n"); + printf("\t-m|--metadata PROFILE metadata profile, values like data profile\n"); + printf("\t-M|--mixed mix metadata and data together\n"); + printf("\t-n|--nodesize SIZE size of btree nodes\n"); + printf("\t-s|--sectorsize SIZE min block allocation (may not mountable by current kernel)\n"); + printf("\t-r|--rootdir DIR the source directory\n"); + printf("\t-K|--nodiscard do not perform whole device TRIM\n"); + printf("\t-O|--features LIST comma separated list of filesystem features, use '-O list-all' to list features\n"); + printf("\t-U|--uuid UUID specify the filesystem UUID\n"); + printf("\t-q|--quiet no messages except errors\n"); + printf("\t-V|--version print the mkfs.btrfs version and exit\n"); exit(ret); } -static void print_version(void) __attribute__((noreturn)); -static void print_version(void) -{ - fprintf(stderr, "mkfs.btrfs, part of %s\n", PACKAGE_STRING); - exit(0); -} - static u64 parse_profile(char *s) { if (strcasecmp(s, "raid0") == 0) { @@ -355,7 +382,7 @@ } else if (strcasecmp(s, "single") == 0) { return 0; } else { - fprintf(stderr, "Unknown profile %s\n", s); + error("unknown profile %s", s); exit(1); } /* not reached */ @@ -367,7 +394,7 @@ int len = strlen(input); if (len >= BTRFS_LABEL_SIZE) { - fprintf(stderr, "Label %s is too long (max %d)\n", input, + error("label %s is too long (max %d)", input, BTRFS_LABEL_SIZE - 1); exit(1); } @@ -551,8 +578,8 @@ if (ret < 0) { if(errno == ENOTSUP) return 0; - fprintf(stderr, "get a list of xattr failed for %s\n", - file_name); + error("getting a list of xattr failed for %s: %s", file_name, + strerror(errno)); return ret; } if (ret == 0) @@ -567,8 +594,8 @@ if (ret < 0) { if(errno == ENOTSUP) return 0; - fprintf(stderr, "get a xattr value failed for %s attr %s\n", - file_name, cur_name); + error("gettig a xattr value failed for %s attr %s: %s", + file_name, cur_name, strerror(errno)); return ret; } @@ -576,8 +603,8 @@ cur_name_len, cur_value, ret, objectid); if (ret) { - fprintf(stderr, "insert a xattr item failed for %s\n", - file_name); + error("inserting a xattr item failed for %s: %s", + file_name, strerror(-ret)); } cur_name = strtok(next_location, &delimiter); @@ -595,11 +622,11 @@ ret = readlink(path_name, buf, sizeof(buf)); if (ret <= 0) { - fprintf(stderr, "readlink failed for %s\n", path_name); + error("readlink failed for %s: %s", path_name, strerror(errno)); goto fail; } if (ret >= sizeof(buf)) { - fprintf(stderr, "symlink too long for %s\n", path_name); + error("symlink too long for %s", path_name); ret = -1; goto fail; } @@ -635,7 +662,7 @@ fd = open(path_name, O_RDONLY); if (fd == -1) { - fprintf(stderr, "%s open failed\n", path_name); + error("cannot open %s: %s", path_name, strerror(errno)); return ret; } @@ -653,7 +680,10 @@ ret_read = pread64(fd, buffer, st->st_size, bytes_read); if (ret_read == -1) { - fprintf(stderr, "%s read failed\n", path_name); + error("cannot read %s at offset %llu length %llu: %s", + path_name, (unsigned long long)bytes_read, + (unsigned long long)st->st_size, + strerror(errno)); free(buffer); goto end; } @@ -698,7 +728,11 @@ ret_read = pread64(fd, eb->data, sectorsize, file_pos + bytes_read); if (ret_read == -1) { - fprintf(stderr, "%s read failed\n", path_name); + error("cannot read %s at offset %llu length %llu: %s", + path_name, + (unsigned long long)file_pos + bytes_read, + (unsigned long long)sectorsize, + strerror(errno)); goto end; } @@ -718,7 +752,7 @@ ret = write_and_map_eb(trans, root, eb); if (ret) { - fprintf(stderr, "output file write failed\n"); + error("failed to write %s", path_name); goto end; } @@ -788,7 +822,7 @@ dir_entry->dir_name = dir_name; dir_entry->path = realpath(dir_name, real_path); if (!dir_entry->path) { - fprintf(stderr, "get directory real path error\n"); + error("realpath failed for %s: %s", dir_name, strerror(errno)); ret = -1; goto fail_no_dir; } @@ -804,7 +838,7 @@ btrfs_set_key_type(&root_dir_key, BTRFS_INODE_ITEM_KEY); ret = btrfs_lookup_inode(trans, root, &path, &root_dir_key, 1); if (ret) { - fprintf(stderr, "root dir lookup error\n"); + error("failed to lookup root dir: %d", ret); goto fail_no_dir; } @@ -827,8 +861,8 @@ parent_inum = parent_dir_entry->inum; parent_dir_name = parent_dir_entry->dir_name; if (chdir(parent_dir_entry->path)) { - fprintf(stderr, "chdir error for %s\n", - parent_dir_name); + error("chdir failed for %s: %s", + parent_dir_name, strerror(errno)); ret = -1; goto fail_no_files; } @@ -837,7 +871,7 @@ directory_select, NULL); if (count == -1) { - fprintf(stderr, "scandir for %s failed: %s\n", + error("scandir failed for %s: %s", parent_dir_name, strerror (errno)); ret = -1; goto fail; @@ -847,8 +881,8 @@ cur_file = files[i]; if (lstat(cur_file->d_name, &st) == -1) { - fprintf(stderr, "lstat failed for file %s\n", - cur_file->d_name); + error("lstat failed for %s: %s", + cur_file->d_name, strerror(errno)); ret = -1; goto fail; } @@ -859,7 +893,8 @@ cur_file->d_name, &st, &dir_index_cnt); if (ret) { - fprintf(stderr, "add_directory_items failed\n"); + error("unable to add directory items for %s: %d", + cur_file->d_name, ret); goto fail; } @@ -868,18 +903,25 @@ parent_inum, dir_index_cnt, &cur_inode); if (ret == -EEXIST) { - BUG_ON(st.st_nlink <= 1); + if (st.st_nlink <= 1) { + error( + "item %s already exists but has wrong st_nlink %ld <= 1", + cur_file->d_name, st.st_nlink); + goto fail; + } continue; } if (ret) { - fprintf(stderr, "add_inode_items failed\n"); + error("unable to add inode items for %s: %d", + cur_file->d_name, ret); goto fail; } ret = add_xattr_item(trans, root, cur_inum, cur_file->d_name); if (ret) { - fprintf(stderr, "add_xattr_item failed\n"); + error("unable to add xattr items for %s: %d", + cur_file->d_name, ret); if(ret != -ENOTSUP) goto fail; } @@ -900,14 +942,16 @@ cur_inum, parent_inum, &st, cur_file->d_name, out_fd); if (ret) { - fprintf(stderr, "add_file_items failed\n"); + error("unable to add file items for %s: %d", + cur_file->d_name, ret); goto fail; } } else if (S_ISLNK(st.st_mode)) { ret = add_symbolic_link(trans, root, cur_inum, cur_file->d_name); if (ret) { - fprintf(stderr, "add_symbolic_link failed\n"); + error("unable to add symlink for %s: %d", + cur_file->d_name, ret); goto fail; } } @@ -957,12 +1001,14 @@ for (i = 0; i < num_of_meta_chunks; i++) { ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root, &chunk_start, &chunk_size, meta_type); - BUG_ON(ret); + if (ret) + return ret; ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0, meta_type, BTRFS_FIRST_CHUNK_TREE_OBJECTID, chunk_start, chunk_size); allocation->metadata += chunk_size; - BUG_ON(ret); + if (ret) + return ret; set_extent_dirty(&root->fs_info->free_space_cache, chunk_start, chunk_start + chunk_size - 1, 0); } @@ -972,12 +1018,14 @@ ret = btrfs_alloc_data_chunk(trans, root->fs_info->extent_root, &chunk_start, size_of_data, data_type, 0); - BUG_ON(ret); + if (ret) + return ret; ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0, data_type, BTRFS_FIRST_CHUNK_TREE_OBJECTID, chunk_start, size_of_data); allocation->data += size_of_data; - BUG_ON(ret); + if (ret) + return ret; set_extent_dirty(&root->fs_info->free_space_cache, chunk_start, chunk_start + size_of_data - 1, 0); return ret; @@ -987,16 +1035,14 @@ { int ret; struct btrfs_trans_handle *trans; - struct stat root_st; - struct directory_name_entry dir_head; - struct directory_name_entry *dir_entry = NULL; ret = lstat(source_dir, &root_st); if (ret) { - fprintf(stderr, "unable to lstat the %s\n", source_dir); + error("unable to lstat %s: %s", source_dir, strerror(errno)); + ret = -errno; goto out; } @@ -1005,10 +1051,14 @@ trans = btrfs_start_transaction(root, 1); ret = traverse_directory(trans, root, source_dir, &dir_head, out_fd); if (ret) { - fprintf(stderr, "unable to traverse_directory\n"); + error("unable to traverse directory %s: %d", source_dir, ret); goto fail; } - btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); + if (ret) { + error("transaction commit failed: %d", ret); + goto out; + } if (verbose) printf("Making image is completed.\n"); @@ -1021,8 +1071,7 @@ free(dir_entry); } out: - fprintf(stderr, "Making image is aborted.\n"); - return -1; + return ret; } /* @@ -1061,8 +1110,8 @@ ret = ftw(dir_name, ftw_add_entry_size, 10); dir_size = global_total_size; if (ret < 0) { - fprintf(stderr, "ftw subdir walk of '%s' failed: %s\n", - dir_name, strerror(errno)); + error("ftw subdir walk of %s failed: %s", dir_name, + strerror(errno)); exit(1); } @@ -1354,6 +1403,7 @@ { char *file; struct btrfs_root *root; + struct btrfs_fs_info *fs_info; struct btrfs_trans_handle *trans; char *label = NULL; u64 block_count = 0; @@ -1428,8 +1478,7 @@ data_profile_opt = 1; break; case 'l': - fprintf(stderr, - "WARNING: --leafsize is deprecated, use --nodesize\n"); + warning("--leafsize is deprecated, use --nodesize"); case 'n': nodesize = parse_size(optarg); nodesize_forced = 1; @@ -1450,8 +1499,7 @@ tmp = btrfs_parse_fs_features(tmp, &features); if (tmp) { - fprintf(stderr, - "Unrecognized filesystem feature '%s'\n", + error("unrecognized filesystem feature '%s'", tmp); free(orig); exit(1); @@ -1471,7 +1519,9 @@ zero_end = 0; break; case 'V': - print_version(); + printf("mkfs.btrfs, part of %s\n", + PACKAGE_STRING); + exit(0); break; case 'r': source_dir = optarg; @@ -1506,8 +1556,7 @@ print_usage(1); if (source_dir_set && dev_cnt > 1) { - fprintf(stderr, - "The -r option is limited to a single device\n"); + error("the option -r is limited to a single device"); exit(1); } @@ -1515,11 +1564,11 @@ uuid_t dummy_uuid; if (uuid_parse(fs_uuid, dummy_uuid) != 0) { - fprintf(stderr, "could not parse UUID: %s\n", fs_uuid); + error("could not parse UUID: %s", fs_uuid); exit(1); } if (!test_uuid_unique(fs_uuid)) { - fprintf(stderr, "non-unique UUID: %s\n", fs_uuid); + error("non-unique UUID: %s", fs_uuid); exit(1); } } @@ -1561,8 +1610,8 @@ if (metadata_profile_opt || data_profile_opt) { if (metadata_profile != data_profile) { - fprintf(stderr, - "ERROR: With mixed block groups data and metadata profiles must be the same\n"); + error( + "with mixed block groups data and metadata profiles must be the same"); exit(1); } } @@ -1589,11 +1638,9 @@ /* Check device/block_count after the nodesize is determined */ if (block_count && block_count < btrfs_min_dev_size(nodesize)) { - fprintf(stderr, - "Size '%llu' is too small to make a usable filesystem\n", + error("size %llu is too small to make a usable filesystem", block_count); - fprintf(stderr, - "Minimum size for btrfs filesystem is %llu\n", + error("minimum size for btrfs filesystem is %llu", btrfs_min_dev_size(nodesize)); exit(1); } @@ -1603,16 +1650,14 @@ path = argv[i]; ret = test_minimum_size(path, nodesize); if (ret < 0) { - fprintf(stderr, "Failed to check size for '%s': %s\n", + error("failed to check size for %s: %s", path, strerror(-ret)); exit (1); } if (ret > 0) { - fprintf(stderr, - "'%s' is too small to make a usable filesystem\n", + error("'%s' is too small to make a usable filesystem", path); - fprintf(stderr, - "Minimum size for each btrfs device is %llu.\n", + error("minimum size for each btrfs device is %llu", btrfs_min_dev_size(nodesize)); exit(1); } @@ -1632,8 +1677,7 @@ */ fd = open(file, O_RDWR); if (fd < 0) { - fprintf(stderr, "unable to open %s: %s\n", file, - strerror(errno)); + error("unable to open %s: %s", file, strerror(errno)); exit(1); } ret = btrfs_prepare_device(fd, file, &dev_block_count, @@ -1646,13 +1690,16 @@ exit(1); } if (block_count && block_count > dev_block_count) { - fprintf(stderr, "%s is smaller than requested size\n", file); + error("%s is smaller than requested size, expected %llu, found %llu", + file, + (unsigned long long)block_count, + (unsigned long long)dev_block_count); exit(1); } } else { fd = open_target(file); if (fd < 0) { - fprintf(stderr, "unable to open the %s\n", file); + error("unable to open %s: %s", file, strerror(errno)); exit(1); } @@ -1662,7 +1709,7 @@ block_count = source_dir_size; ret = zero_output_file(fd, block_count); if (ret) { - fprintf(stderr, "unable to zero the output file\n"); + error("unable to zero the output file"); exit(1); } /* our "device" is the new image file */ @@ -1671,7 +1718,8 @@ /* To create the first block group and chunk 0 in make_btrfs */ if (dev_block_count < BTRFS_MKFS_SYSTEM_GROUP_SIZE) { - fprintf(stderr, "device is too small to make filesystem\n"); + error("device is too small to make filesystem, must be at least %llu", + (unsigned long long)BTRFS_MKFS_SYSTEM_GROUP_SIZE); exit(1); } @@ -1683,8 +1731,7 @@ if (group_profile_max_safe_loss(metadata_profile) < group_profile_max_safe_loss(data_profile)){ - fprintf(stderr, - "WARNING: metadata has lower redundancy than data!\n\n"); + warning("metadata has lower redundancy than data!\n"); } mkfs_cfg.label = label; @@ -1698,53 +1745,56 @@ ret = make_btrfs(fd, &mkfs_cfg, NULL); if (ret) { - fprintf(stderr, "error during mkfs: %s\n", strerror(-ret)); + error("error during mkfs: %s", strerror(-ret)); exit(1); } - root = open_ctree(file, 0, OPEN_CTREE_WRITES); - if (!root) { - fprintf(stderr, "Open ctree failed\n"); + fs_info = open_ctree_fs_info(file, 0, 0, 0, + OPEN_CTREE_WRITES | OPEN_CTREE_FS_PARTIAL); + if (!fs_info) { + error("open ctree failed"); close(fd); exit(1); } - root->fs_info->alloc_start = alloc_start; + root = fs_info->fs_root; + fs_info->alloc_start = alloc_start; ret = create_metadata_block_groups(root, mixed, &allocation); if (ret) { - fprintf(stderr, "failed to create default block groups\n"); + error("failed to create default block groups: %d", ret); exit(1); } trans = btrfs_start_transaction(root, 1); if (!trans) { - fprintf(stderr, "failed to start transaction\n"); + error("failed to start transaction"); exit(1); } ret = create_data_block_groups(trans, root, mixed, &allocation); if (ret) { - fprintf(stderr, "failed to create default data block groups\n"); + error("failed to create default data block groups: %d", ret); exit(1); } ret = make_root_dir(trans, root, &allocation); if (ret) { - fprintf(stderr, "failed to setup the root directory\n"); + error("failed to setup the root directory: %d", ret); exit(1); } - btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); + if (ret) { + error("unable to commit transaction: %d", ret); + goto out; + } trans = btrfs_start_transaction(root, 1); if (!trans) { - fprintf(stderr, "failed to start transaction\n"); + error("failed to start transaction"); exit(1); } - if (is_block_device(file) == 1) - btrfs_register_one_device(file); - if (dev_cnt == 0) goto raid_groups; @@ -1758,14 +1808,13 @@ */ fd = open(file, O_RDWR); if (fd < 0) { - fprintf(stderr, "unable to open %s: %s\n", file, - strerror(errno)); + error("unable to open %s: %s", file, strerror(errno)); exit(1); } ret = btrfs_device_already_in_root(root, fd, BTRFS_SUPER_INFO_OFFSET); if (ret) { - fprintf(stderr, "skipping duplicate device %s in FS\n", + error("skipping duplicate device %s in the filesystem", file); close(fd); continue; @@ -1782,47 +1831,67 @@ ret = btrfs_add_to_fsid(trans, root, fd, file, dev_block_count, sectorsize, sectorsize, sectorsize); - BUG_ON(ret); + if (ret) { + error("unable to add %s to filesystem: %d", file, ret); + goto out; + } if (verbose >= 2) { struct btrfs_device *device; - device = container_of(root->fs_info->fs_devices->devices.next, + device = container_of(fs_info->fs_devices->devices.next, struct btrfs_device, dev_list); printf("adding device %s id %llu\n", file, (unsigned long long)device->devid); } - - if (is_block_device(file) == 1) - btrfs_register_one_device(file); } raid_groups: if (!source_dir_set) { ret = create_raid_groups(trans, root, data_profile, metadata_profile, mixed, &allocation); - BUG_ON(ret); + if (ret) { + error("unable to create raid groups: %d", ret); + goto out; + } } ret = create_data_reloc_tree(trans, root); - BUG_ON(ret); + if (ret) { + error("unable to create data reloc tree: %d", ret); + goto out; + } - btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); + if (ret) { + error("unable to commit transaction: %d", ret); + goto out; + } if (source_dir_set) { trans = btrfs_start_transaction(root, 1); ret = create_chunks(trans, root, num_of_meta_chunks, size_of_data, &allocation); - BUG_ON(ret); - btrfs_commit_transaction(trans, root); + if (ret) { + error("unable to create chunks: %d", ret); + goto out; + } + ret = btrfs_commit_transaction(trans, root); + if (ret) { + error("transaction commit failed: %d", ret); + goto out; + } ret = make_image(source_dir, root, fd); - BUG_ON(ret); + if (ret) { + error("error wihle filling filesystem: %d", ret); + goto out; + } } - ret = cleanup_temp_chunks(root->fs_info, &allocation, data_profile, + ret = cleanup_temp_chunks(fs_info, &allocation, data_profile, metadata_profile, metadata_profile); if (ret < 0) { - fprintf(stderr, "Failed to cleanup temporary chunks\n"); + error("failed to cleanup temporary chunks: %d", ret); goto out; } @@ -1834,7 +1903,7 @@ printf("Node size: %u\n", nodesize); printf("Sector size: %u\n", sectorsize); printf("Filesystem size: %s\n", - pretty_size(btrfs_super_total_bytes(root->fs_info->super_copy))); + pretty_size(btrfs_super_total_bytes(fs_info->super_copy))); printf("Block group profiles:\n"); if (allocation.data) printf(" Data: %-8s %16s\n", @@ -1859,10 +1928,26 @@ list_all_devices(root); } + /* + * The filesystem is now fully set up, commit the remaining changes and + * fix the signature as the last step before closing the devices. + */ + fs_info->finalize_on_close = 1; out: ret = close_ctree(root); - BUG_ON(ret); + + if (!ret) { + optind = saved_optind; + dev_cnt = argc - optind; + while (dev_cnt-- > 0) { + file = argv[optind++]; + if (is_block_device(file) == 1) + btrfs_register_one_device(file); + } + } + btrfs_close_all_devices(); free(label); - return 0; + + return !!ret; } diff -Nru btrfs-progs-4.7/print-tree.c btrfs-progs-4.7.1/print-tree.c --- btrfs-progs-4.7/print-tree.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/print-tree.c 2016-08-25 17:33:48.000000000 +0000 @@ -496,18 +496,31 @@ return cnt; } +/* + * Caller must ensure sizeof(*ret) >= 7 "RDONLY" + */ +static void root_flags_to_str(u64 flags, char *ret) +{ + if (flags & BTRFS_ROOT_SUBVOL_RDONLY) + strcat(ret, "RDONLY"); + else + strcat(ret, "none"); +} + static void print_root(struct extent_buffer *leaf, int slot) { struct btrfs_root_item *ri; struct btrfs_root_item root_item; int len; char uuid_str[BTRFS_UUID_UNPARSED_SIZE]; + char flags_str[32] = {0}; ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); len = btrfs_item_size_nr(leaf, slot); memset(&root_item, 0, sizeof(root_item)); read_extent_buffer(leaf, &root_item, (unsigned long)ri, len); + root_flags_to_str(btrfs_root_flags(&root_item), flags_str); printf("\t\troot data bytenr %llu level %d dirid %llu refs %u gen %llu lastsnap %llu\n", (unsigned long long)btrfs_root_bytenr(&root_item), @@ -516,6 +529,8 @@ btrfs_root_refs(&root_item), (unsigned long long)btrfs_root_generation(&root_item), (unsigned long long)btrfs_root_last_snapshot(&root_item)); + printf("\t\tflags 0x%llx(%s)\n", btrfs_root_flags(&root_item), + flags_str); if (root_item.generation == root_item.generation_v2) { uuid_unparse(root_item.uuid, uuid_str); @@ -835,6 +850,33 @@ } } +/* Caller should ensure sizeof(*ret) >= 29 "NODATASUM|NODATACOW|READONLY" */ +static void inode_flags_to_str(u64 flags, char *ret) +{ + int empty = 1; + + if (flags & BTRFS_INODE_NODATASUM) { + empty = 0; + strcpy(ret, "NODATASUM"); + } + if (flags & BTRFS_INODE_NODATACOW) { + if (!empty) { + empty = 0; + strcat(ret, "|"); + } + strcat(ret, "NODATACOW"); + } + if (flags & BTRFS_INODE_READONLY) { + if (!empty) { + empty = 0; + strcat(ret, "|"); + } + strcat(ret, "READONLY"); + } + if (empty) + strcat(ret, "none"); +} + void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; @@ -884,10 +926,12 @@ switch (type) { case BTRFS_INODE_ITEM_KEY: + memset(flags_str, 0, sizeof(flags_str)); ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); + inode_flags_to_str(btrfs_inode_flags(l, ii), flags_str); printf("\t\tinode generation %llu transid %llu size %llu nbytes %llu\n" "\t\tblock group %llu mode %o links %u uid %u gid %u\n" - "\t\trdev %llu flags 0x%llx\n", + "\t\trdev %llu flags 0x%llx(%s)\n", (unsigned long long)btrfs_inode_generation(l, ii), (unsigned long long)btrfs_inode_transid(l, ii), (unsigned long long)btrfs_inode_size(l, ii), @@ -898,7 +942,8 @@ btrfs_inode_uid(l, ii), btrfs_inode_gid(l, ii), (unsigned long long)btrfs_inode_rdev(l,ii), - (unsigned long long)btrfs_inode_flags(l,ii)); + (unsigned long long)btrfs_inode_flags(l,ii), + flags_str); break; case BTRFS_INODE_REF_KEY: iref = btrfs_item_ptr(l, i, struct btrfs_inode_ref); diff -Nru btrfs-progs-4.7/super-recover.c btrfs-progs-4.7.1/super-recover.c --- btrfs-progs-4.7/super-recover.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/super-recover.c 2016-08-25 17:33:48.000000000 +0000 @@ -279,7 +279,8 @@ } init_recover_superblock(&recover); - ret = btrfs_scan_fs_devices(fd, dname, &recover.fs_devices, 0, 1, 0); + ret = btrfs_scan_fs_devices(fd, dname, &recover.fs_devices, 0, + SBREAD_RECOVER, 0); close(fd); if (ret) { ret = 1; diff -Nru btrfs-progs-4.7/tests/convert-tests/008-readonly-image/test.sh btrfs-progs-4.7.1/tests/convert-tests/008-readonly-image/test.sh --- btrfs-progs-4.7/tests/convert-tests/008-readonly-image/test.sh 1970-01-01 00:00:00.000000000 +0000 +++ btrfs-progs-4.7.1/tests/convert-tests/008-readonly-image/test.sh 2016-08-25 17:33:48.000000000 +0000 @@ -0,0 +1,26 @@ +#!/bin/bash +# Check if the converted ext2 image is readonly + +source $TOP/tests/common +source $TOP/tests/common.convert + +setup_root_helper +prepare_test_dev 512M +check_prereq btrfs-convert + +default_mke2fs="mke2fs -t ext4 -b 4096" +convert_test_preamble '' 'readonly image test' 16k "$default_mke2fs" +convert_test_prep_fs $default_mke2fs +run_check_umount_test_dev +convert_test_do_convert +run_check_mount_test_dev + +# It's expected to fail +$SUDO_HELPER dd if=/dev/zero of=$TEST_MNT/ext2_save/image bs=1M count=1 \ + &> /dev/null +if [ $? -ne 1 ]; then + echo "after convert ext2_save/image is not read-only" + exit 1 +fi +run_check_umount_test_dev +convert_test_post_rollback diff -Nru btrfs-progs-4.7/utils.c btrfs-progs-4.7.1/utils.c --- btrfs-progs-4.7/utils.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/utils.c 2016-08-25 17:33:48.000000000 +0000 @@ -238,6 +238,9 @@ * * For now sys chunk array will be empty and dev_item is empty too. * They will be re-initialized at temp chunk tree setup. + * + * The superblock signature is not valid, denotes a partially created + * filesystem, needs to be finalized. */ static int setup_temp_super(int fd, struct btrfs_mkfs_config *cfg, u64 root_bytenr, u64 chunk_bytenr) @@ -276,7 +279,7 @@ btrfs_set_super_bytenr(super, cfg->super_bytenr); btrfs_set_super_num_devices(super, 1); - btrfs_set_super_magic(super, BTRFS_MAGIC); + btrfs_set_super_magic(super, BTRFS_MAGIC_PARTIAL); btrfs_set_super_generation(super, 1); btrfs_set_super_root(super, root_bytenr); btrfs_set_super_chunk_root(super, chunk_bytenr); @@ -1004,6 +1007,9 @@ /* * @fs_uuid - if NULL, generates a UUID, returns back the new filesystem UUID + * + * The superblock signature is not valid, denotes a partially created + * filesystem, needs to be finalized. */ int make_btrfs(int fd, struct btrfs_mkfs_config *cfg, struct btrfs_convert_context *cctx) @@ -1064,7 +1070,7 @@ btrfs_set_super_bytenr(&super, cfg->blocks[0]); btrfs_set_super_num_devices(&super, 1); - btrfs_set_super_magic(&super, BTRFS_MAGIC); + btrfs_set_super_magic(&super, BTRFS_MAGIC_PARTIAL); btrfs_set_super_generation(&super, 1); btrfs_set_super_root(&super, cfg->blocks[1]); btrfs_set_super_chunk_root(&super, cfg->blocks[3]); @@ -2272,7 +2278,7 @@ /* scan the initial device */ ret = btrfs_scan_one_device(fd, file, &fs_devices_mnt, - &total_devs, BTRFS_SUPER_INFO_OFFSET, 0); + &total_devs, BTRFS_SUPER_INFO_OFFSET, SBREAD_DEFAULT); is_btrfs = (ret >= 0); /* scan other devices */ @@ -2400,7 +2406,12 @@ ret = 0; disk_super = (struct btrfs_super_block *)buf; - if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) + /* + * Accept devices from the same filesystem, allow partially created + * structures. + */ + if (btrfs_super_magic(disk_super) != BTRFS_MAGIC && + btrfs_super_magic(disk_super) != BTRFS_MAGIC_PARTIAL) goto brelse; if (!memcmp(disk_super->fsid, root->fs_info->super_copy->fsid, @@ -3419,7 +3430,8 @@ continue; } ret = btrfs_scan_one_device(fd, path, &tmp_devices, - &num_devices, BTRFS_SUPER_INFO_OFFSET, 0); + &num_devices, BTRFS_SUPER_INFO_OFFSET, + SBREAD_DEFAULT); if (ret) { error("cannot scan %s: %s", path, strerror(-ret)); close (fd); diff -Nru btrfs-progs-4.7/utils.h btrfs-progs-4.7.1/utils.h --- btrfs-progs-4.7/utils.h 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/utils.h 2016-08-25 17:33:48.000000000 +0000 @@ -311,8 +311,61 @@ const char * const *usagestr); int string_is_numerical(const char *str); +#if DEBUG_VERBOSE_ERROR +#define PRINT_VERBOSE_ERROR fprintf(stderr, "%s:%d:", __FILE__, __LINE__) +#else +#define PRINT_VERBOSE_ERROR +#endif + +#if DEBUG_TRACE_ON_ERROR +#define PRINT_TRACE_ON_ERROR print_trace() +#else +#define PRINT_TRACE_ON_ERROR +#endif + +#if DEBUG_ABORT_ON_ERROR +#define DO_ABORT_ON_ERROR abort() +#else +#define DO_ABORT_ON_ERROR +#endif + +#define error(fmt, ...) \ + do { \ + PRINT_TRACE_ON_ERROR; \ + PRINT_VERBOSE_ERROR; \ + __error((fmt), ##__VA_ARGS__); \ + DO_ABORT_ON_ERROR; \ + } while (0) + +#define error_on(cond, fmt, ...) \ + do { \ + if ((cond)) \ + PRINT_TRACE_ON_ERROR; \ + if ((cond)) \ + PRINT_VERBOSE_ERROR; \ + __error_on((cond), (fmt), ##__VA_ARGS__); \ + if ((cond)) \ + DO_ABORT_ON_ERROR; \ + } while (0) + +#define warning(fmt, ...) \ + do { \ + PRINT_TRACE_ON_ERROR; \ + PRINT_VERBOSE_ERROR; \ + __warning((fmt), ##__VA_ARGS__); \ + } while (0) + +#define warning_on(cond, fmt, ...) \ + do { \ + if ((cond)) \ + PRINT_TRACE_ON_ERROR; \ + if ((cond)) \ + PRINT_VERBOSE_ERROR; \ + __warning_on((cond), (fmt), ##__VA_ARGS__); \ + } while (0) + __attribute__ ((format (printf, 1, 2))) -static inline void warning(const char *fmt, ...) +static inline void __warning(const char *fmt, ...) { va_list args; @@ -324,7 +377,7 @@ } __attribute__ ((format (printf, 1, 2))) -static inline void error(const char *fmt, ...) +static inline void __error(const char *fmt, ...) { va_list args; @@ -336,7 +389,7 @@ } __attribute__ ((format (printf, 2, 3))) -static inline int warning_on(int condition, const char *fmt, ...) +static inline int __warning_on(int condition, const char *fmt, ...) { va_list args; @@ -353,7 +406,7 @@ } __attribute__ ((format (printf, 2, 3))) -static inline int error_on(int condition, const char *fmt, ...) +static inline int __error_on(int condition, const char *fmt, ...) { va_list args; diff -Nru btrfs-progs-4.7/version.sh btrfs-progs-4.7.1/version.sh --- btrfs-progs-4.7/version.sh 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/version.sh 2016-08-25 17:33:48.000000000 +0000 @@ -6,7 +6,7 @@ # Copyright 2008, Oracle # Released under the GNU GPLv2 -v="v4.7" +v="v4.7.1" opt=$1 diff -Nru btrfs-progs-4.7/volumes.c btrfs-progs-4.7.1/volumes.c --- btrfs-progs-4.7/volumes.c 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/volumes.c 2016-08-25 17:33:48.000000000 +0000 @@ -251,7 +251,7 @@ int btrfs_scan_one_device(int fd, const char *path, struct btrfs_fs_devices **fs_devices_ret, - u64 *total_devs, u64 super_offset, int super_recover) + u64 *total_devs, u64 super_offset, unsigned sbflags) { struct btrfs_super_block *disk_super; char buf[BTRFS_SUPER_INFO_SIZE]; @@ -259,7 +259,7 @@ u64 devid; disk_super = (struct btrfs_super_block *)buf; - ret = btrfs_read_dev_super(fd, disk_super, super_offset, super_recover); + ret = btrfs_read_dev_super(fd, disk_super, super_offset, sbflags); if (ret < 0) return -EIO; devid = btrfs_stack_device_id(&disk_super->dev_item); diff -Nru btrfs-progs-4.7/volumes.h btrfs-progs-4.7.1/volumes.h --- btrfs-progs-4.7/volumes.h 2016-07-29 13:06:42.000000000 +0000 +++ btrfs-progs-4.7.1/volumes.h 2016-08-25 17:33:48.000000000 +0000 @@ -210,7 +210,7 @@ struct btrfs_device *device); int btrfs_scan_one_device(int fd, const char *path, struct btrfs_fs_devices **fs_devices_ret, - u64 *total_devs, u64 super_offset, int super_recover); + u64 *total_devs, u64 super_offset, unsigned sbflags); int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); struct list_head *btrfs_scanned_uuids(void); int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,