You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
561 lines
14 KiB
561 lines
14 KiB
/*
|
|
* extents.c --- rebuild extent tree
|
|
*
|
|
* Copyright (C) 2014 Oracle.
|
|
*
|
|
* %Begin-Header%
|
|
* This file may be redistributed under the terms of the GNU Public
|
|
* License, version 2.
|
|
* %End-Header%
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
#include "e2fsck.h"
|
|
#include "problem.h"
|
|
|
|
#undef DEBUG
|
|
#undef DEBUG_SUMMARY
|
|
#undef DEBUG_FREE
|
|
|
|
#define NUM_EXTENTS 341 /* about one ETB' worth of extents */
|
|
|
|
static errcode_t e2fsck_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino);
|
|
|
|
/* Schedule an inode to have its extent tree rebuilt during pass 1E. */
|
|
errcode_t e2fsck_rebuild_extents_later(e2fsck_t ctx, ext2_ino_t ino)
|
|
{
|
|
errcode_t retval = 0;
|
|
|
|
if (!ext2fs_has_feature_extents(ctx->fs->super) ||
|
|
(ctx->options & E2F_OPT_NO) ||
|
|
(ino != EXT2_ROOT_INO && ino < ctx->fs->super->s_first_ino))
|
|
return 0;
|
|
|
|
if (ctx->flags & E2F_FLAG_ALLOC_OK)
|
|
return e2fsck_rebuild_extents(ctx, ino);
|
|
|
|
if (!ctx->inodes_to_rebuild)
|
|
retval = e2fsck_allocate_inode_bitmap(ctx->fs,
|
|
_("extent rebuild inode map"),
|
|
EXT2FS_BMAP64_RBTREE,
|
|
"inodes_to_rebuild",
|
|
&ctx->inodes_to_rebuild);
|
|
if (retval)
|
|
return retval;
|
|
|
|
ext2fs_mark_inode_bitmap2(ctx->inodes_to_rebuild, ino);
|
|
return 0;
|
|
}
|
|
|
|
/* Ask if an inode will have its extents rebuilt during pass 1E. */
|
|
int e2fsck_ino_will_be_rebuilt(e2fsck_t ctx, ext2_ino_t ino)
|
|
{
|
|
if (!ctx->inodes_to_rebuild)
|
|
return 0;
|
|
return ext2fs_test_inode_bitmap2(ctx->inodes_to_rebuild, ino);
|
|
}
|
|
|
|
struct extent_list {
|
|
blk64_t blocks_freed;
|
|
struct ext2fs_extent *extents;
|
|
unsigned int count;
|
|
unsigned int size;
|
|
unsigned int ext_read;
|
|
errcode_t retval;
|
|
ext2_ino_t ino;
|
|
};
|
|
|
|
static errcode_t load_extents(e2fsck_t ctx, struct extent_list *list)
|
|
{
|
|
ext2_filsys fs = ctx->fs;
|
|
ext2_extent_handle_t handle;
|
|
struct ext2fs_extent extent;
|
|
errcode_t retval;
|
|
|
|
retval = ext2fs_extent_open(fs, list->ino, &handle);
|
|
if (retval)
|
|
return retval;
|
|
|
|
retval = ext2fs_extent_get(handle, EXT2_EXTENT_ROOT, &extent);
|
|
if (retval)
|
|
goto out;
|
|
|
|
do {
|
|
if (extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
|
|
goto next;
|
|
|
|
/* Internal node; free it and we'll re-allocate it later */
|
|
if (!(extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)) {
|
|
#if defined(DEBUG) || defined(DEBUG_FREE)
|
|
printf("ino=%d free=%llu bf=%llu\n", list->ino,
|
|
extent.e_pblk, list->blocks_freed + 1);
|
|
#endif
|
|
list->blocks_freed++;
|
|
ext2fs_block_alloc_stats2(fs, extent.e_pblk, -1);
|
|
goto next;
|
|
}
|
|
|
|
list->ext_read++;
|
|
/* Can we attach it to the previous extent? */
|
|
if (list->count) {
|
|
struct ext2fs_extent *last = list->extents +
|
|
list->count - 1;
|
|
blk64_t end = last->e_len + extent.e_len;
|
|
|
|
if (last->e_pblk + last->e_len == extent.e_pblk &&
|
|
last->e_lblk + last->e_len == extent.e_lblk &&
|
|
(last->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ==
|
|
(extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
|
|
end < (1ULL << 32)) {
|
|
last->e_len += extent.e_len;
|
|
#ifdef DEBUG
|
|
printf("R: ino=%d len=%u\n", list->ino,
|
|
last->e_len);
|
|
#endif
|
|
goto next;
|
|
}
|
|
}
|
|
|
|
/* Do we need to expand? */
|
|
if (list->count == list->size) {
|
|
unsigned int new_size = (list->size + NUM_EXTENTS) *
|
|
sizeof(struct ext2fs_extent);
|
|
retval = ext2fs_resize_mem(0, new_size, &list->extents);
|
|
if (retval)
|
|
goto out;
|
|
list->size += NUM_EXTENTS;
|
|
}
|
|
|
|
/* Add a new extent */
|
|
memcpy(list->extents + list->count, &extent, sizeof(extent));
|
|
#ifdef DEBUG
|
|
printf("R: ino=%d pblk=%llu lblk=%llu len=%u\n", list->ino,
|
|
extent.e_pblk, extent.e_lblk, extent.e_len);
|
|
#endif
|
|
list->count++;
|
|
next:
|
|
retval = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT, &extent);
|
|
} while (retval == 0);
|
|
|
|
out:
|
|
/* Ok if we run off the end */
|
|
if (retval == EXT2_ET_EXTENT_NO_NEXT)
|
|
retval = 0;
|
|
ext2fs_extent_free(handle);
|
|
return retval;
|
|
}
|
|
|
|
static int find_blocks(ext2_filsys fs, blk64_t *blocknr, e2_blkcnt_t blockcnt,
|
|
blk64_t ref_blk EXT2FS_ATTR((unused)),
|
|
int ref_offset EXT2FS_ATTR((unused)), void *priv_data)
|
|
{
|
|
struct extent_list *list = priv_data;
|
|
|
|
/* Internal node? */
|
|
if (blockcnt < 0) {
|
|
#if defined(DEBUG) || defined(DEBUG_FREE)
|
|
printf("ino=%d free=%llu bf=%llu\n", list->ino, *blocknr,
|
|
list->blocks_freed + 1);
|
|
#endif
|
|
list->blocks_freed++;
|
|
ext2fs_block_alloc_stats2(fs, *blocknr, -1);
|
|
return 0;
|
|
}
|
|
|
|
/* Can we attach it to the previous extent? */
|
|
if (list->count) {
|
|
struct ext2fs_extent *last = list->extents +
|
|
list->count - 1;
|
|
blk64_t end = last->e_len + 1;
|
|
|
|
if (last->e_lblk + last->e_len == (__u64) blockcnt &&
|
|
last->e_pblk + last->e_len == *blocknr &&
|
|
end < (1ULL << 32)) {
|
|
last->e_len++;
|
|
#ifdef DEBUG
|
|
printf("R: ino=%d len=%u\n", list->ino, last->e_len);
|
|
#endif
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* Do we need to expand? */
|
|
if (list->count == list->size) {
|
|
unsigned int new_size = (list->size + NUM_EXTENTS) *
|
|
sizeof(struct ext2fs_extent);
|
|
list->retval = ext2fs_resize_mem(0, new_size, &list->extents);
|
|
if (list->retval)
|
|
return BLOCK_ABORT;
|
|
list->size += NUM_EXTENTS;
|
|
}
|
|
|
|
/* Add a new extent */
|
|
list->extents[list->count].e_pblk = *blocknr;
|
|
list->extents[list->count].e_lblk = blockcnt;
|
|
list->extents[list->count].e_len = 1;
|
|
list->extents[list->count].e_flags = 0;
|
|
#ifdef DEBUG
|
|
printf("R: ino=%d pblk=%llu lblk=%llu len=%u\n", list->ino, *blocknr,
|
|
blockcnt, 1);
|
|
#endif
|
|
list->count++;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static errcode_t rebuild_extent_tree(e2fsck_t ctx, struct extent_list *list,
|
|
ext2_ino_t ino)
|
|
{
|
|
struct ext2_inode_large inode;
|
|
errcode_t retval;
|
|
ext2_extent_handle_t handle;
|
|
unsigned int i, ext_written;
|
|
struct ext2fs_extent *ex, extent;
|
|
blk64_t start_val, delta;
|
|
|
|
list->count = 0;
|
|
list->blocks_freed = 0;
|
|
list->ino = ino;
|
|
list->ext_read = 0;
|
|
e2fsck_read_inode_full(ctx, ino, EXT2_INODE(&inode), sizeof(inode),
|
|
"rebuild_extents");
|
|
|
|
/* Skip deleted inodes and inline data files */
|
|
if (inode.i_links_count == 0 ||
|
|
inode.i_flags & EXT4_INLINE_DATA_FL)
|
|
return 0;
|
|
|
|
/* Collect lblk->pblk mappings */
|
|
if (inode.i_flags & EXT4_EXTENTS_FL) {
|
|
retval = load_extents(ctx, list);
|
|
if (retval)
|
|
goto err;
|
|
goto extents_loaded;
|
|
}
|
|
|
|
retval = ext2fs_block_iterate3(ctx->fs, ino, BLOCK_FLAG_READ_ONLY, 0,
|
|
find_blocks, list);
|
|
if (retval)
|
|
goto err;
|
|
if (list->retval) {
|
|
retval = list->retval;
|
|
goto err;
|
|
}
|
|
|
|
extents_loaded:
|
|
/* Reset extent tree */
|
|
inode.i_flags &= ~EXT4_EXTENTS_FL;
|
|
memset(inode.i_block, 0, sizeof(inode.i_block));
|
|
|
|
/* Make a note of freed blocks */
|
|
quota_data_sub(ctx->qctx, &inode, ino,
|
|
list->blocks_freed * ctx->fs->blocksize);
|
|
retval = ext2fs_iblk_sub_blocks(ctx->fs, EXT2_INODE(&inode),
|
|
list->blocks_freed);
|
|
if (retval)
|
|
goto err;
|
|
|
|
/* Now stuff extents into the file */
|
|
retval = ext2fs_extent_open2(ctx->fs, ino, EXT2_INODE(&inode), &handle);
|
|
if (retval)
|
|
goto err;
|
|
|
|
ext_written = 0;
|
|
start_val = ext2fs_inode_i_blocks(ctx->fs, EXT2_INODE(&inode));
|
|
for (i = 0, ex = list->extents; i < list->count; i++, ex++) {
|
|
memcpy(&extent, ex, sizeof(struct ext2fs_extent));
|
|
extent.e_flags &= EXT2_EXTENT_FLAGS_UNINIT;
|
|
if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) {
|
|
if (extent.e_len > EXT_UNINIT_MAX_LEN) {
|
|
extent.e_len = EXT_UNINIT_MAX_LEN;
|
|
ex->e_pblk += EXT_UNINIT_MAX_LEN;
|
|
ex->e_lblk += EXT_UNINIT_MAX_LEN;
|
|
ex->e_len -= EXT_UNINIT_MAX_LEN;
|
|
ex--;
|
|
i--;
|
|
}
|
|
} else {
|
|
if (extent.e_len > EXT_INIT_MAX_LEN) {
|
|
extent.e_len = EXT_INIT_MAX_LEN;
|
|
ex->e_pblk += EXT_INIT_MAX_LEN;
|
|
ex->e_lblk += EXT_INIT_MAX_LEN;
|
|
ex->e_len -= EXT_INIT_MAX_LEN;
|
|
ex--;
|
|
i--;
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
printf("W: ino=%d pblk=%llu lblk=%llu len=%u\n", ino,
|
|
extent.e_pblk, extent.e_lblk, extent.e_len);
|
|
#endif
|
|
retval = ext2fs_extent_insert(handle, EXT2_EXTENT_INSERT_AFTER,
|
|
&extent);
|
|
if (retval)
|
|
goto err2;
|
|
retval = ext2fs_extent_fix_parents(handle);
|
|
if (retval)
|
|
goto err2;
|
|
ext_written++;
|
|
}
|
|
|
|
delta = ext2fs_inode_i_blocks(ctx->fs, EXT2_INODE(&inode)) - start_val;
|
|
if (delta) {
|
|
if (!ext2fs_has_feature_huge_file(ctx->fs->super) ||
|
|
!(inode.i_flags & EXT4_HUGE_FILE_FL))
|
|
delta <<= 9;
|
|
else
|
|
delta *= ctx->fs->blocksize;
|
|
quota_data_add(ctx->qctx, &inode, ino, delta);
|
|
}
|
|
|
|
#if defined(DEBUG) || defined(DEBUG_SUMMARY)
|
|
printf("rebuild: ino=%d extents=%d->%d\n", ino, list->ext_read,
|
|
ext_written);
|
|
#endif
|
|
e2fsck_write_inode(ctx, ino, EXT2_INODE(&inode), "rebuild_extents");
|
|
|
|
err2:
|
|
ext2fs_extent_free(handle);
|
|
err:
|
|
return retval;
|
|
}
|
|
|
|
/* Rebuild the extents immediately */
|
|
static errcode_t e2fsck_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino)
|
|
{
|
|
struct extent_list list;
|
|
errcode_t err;
|
|
|
|
if (!ext2fs_has_feature_extents(ctx->fs->super) ||
|
|
(ctx->options & E2F_OPT_NO) ||
|
|
(ino != EXT2_ROOT_INO && ino < ctx->fs->super->s_first_ino))
|
|
return 0;
|
|
|
|
e2fsck_read_bitmaps(ctx);
|
|
memset(&list, 0, sizeof(list));
|
|
err = ext2fs_get_mem(sizeof(struct ext2fs_extent) * NUM_EXTENTS,
|
|
&list.extents);
|
|
if (err)
|
|
return err;
|
|
list.size = NUM_EXTENTS;
|
|
err = rebuild_extent_tree(ctx, &list, ino);
|
|
ext2fs_free_mem(&list.extents);
|
|
|
|
return err;
|
|
}
|
|
|
|
static void rebuild_extents(e2fsck_t ctx, const char *pass_name, int pr_header)
|
|
{
|
|
struct problem_context pctx;
|
|
#ifdef RESOURCE_TRACK
|
|
struct resource_track rtrack;
|
|
#endif
|
|
struct extent_list list;
|
|
int first = 1;
|
|
ext2_ino_t ino = 0;
|
|
errcode_t retval;
|
|
|
|
if (!ext2fs_has_feature_extents(ctx->fs->super) ||
|
|
!ext2fs_test_valid(ctx->fs) ||
|
|
ctx->invalid_bitmaps) {
|
|
if (ctx->inodes_to_rebuild)
|
|
ext2fs_free_inode_bitmap(ctx->inodes_to_rebuild);
|
|
ctx->inodes_to_rebuild = NULL;
|
|
}
|
|
|
|
if (ctx->inodes_to_rebuild == NULL)
|
|
return;
|
|
|
|
init_resource_track(&rtrack, ctx->fs->io);
|
|
clear_problem_context(&pctx);
|
|
e2fsck_read_bitmaps(ctx);
|
|
|
|
memset(&list, 0, sizeof(list));
|
|
retval = ext2fs_get_mem(sizeof(struct ext2fs_extent) * NUM_EXTENTS,
|
|
&list.extents);
|
|
list.size = NUM_EXTENTS;
|
|
while (1) {
|
|
retval = ext2fs_find_first_set_inode_bitmap2(
|
|
ctx->inodes_to_rebuild, ino + 1,
|
|
ctx->fs->super->s_inodes_count, &ino);
|
|
if (retval)
|
|
break;
|
|
pctx.ino = ino;
|
|
if (first) {
|
|
fix_problem(ctx, pr_header, &pctx);
|
|
first = 0;
|
|
}
|
|
pctx.errcode = rebuild_extent_tree(ctx, &list, ino);
|
|
if (pctx.errcode) {
|
|
end_problem_latch(ctx, PR_LATCH_OPTIMIZE_EXT);
|
|
fix_problem(ctx, PR_1E_OPTIMIZE_EXT_ERR, &pctx);
|
|
}
|
|
if (ctx->progress && !ctx->progress_fd)
|
|
e2fsck_simple_progress(ctx, "Rebuilding extents",
|
|
100.0 * (float) ino /
|
|
(float) ctx->fs->super->s_inodes_count,
|
|
ino);
|
|
}
|
|
end_problem_latch(ctx, PR_LATCH_OPTIMIZE_EXT);
|
|
|
|
ext2fs_free_inode_bitmap(ctx->inodes_to_rebuild);
|
|
ctx->inodes_to_rebuild = NULL;
|
|
ext2fs_free_mem(&list.extents);
|
|
|
|
print_resource_track(ctx, pass_name, &rtrack, ctx->fs->io);
|
|
}
|
|
|
|
/* Scan a file to see if we should rebuild its extent tree */
|
|
errcode_t e2fsck_check_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino,
|
|
struct ext2_inode *inode,
|
|
struct problem_context *pctx)
|
|
{
|
|
struct extent_tree_info eti;
|
|
struct ext2_extent_info info, top_info;
|
|
struct ext2fs_extent extent;
|
|
ext2_extent_handle_t ehandle;
|
|
ext2_filsys fs = ctx->fs;
|
|
errcode_t retval;
|
|
|
|
/* block map file and we want extent conversion */
|
|
if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
|
|
!(inode->i_flags & EXT4_INLINE_DATA_FL) &&
|
|
(ctx->options & E2F_OPT_CONVERT_BMAP)) {
|
|
return e2fsck_rebuild_extents_later(ctx, ino);
|
|
}
|
|
|
|
if (!(inode->i_flags & EXT4_EXTENTS_FL))
|
|
return 0;
|
|
memset(&eti, 0, sizeof(eti));
|
|
eti.ino = ino;
|
|
|
|
/* Otherwise, go scan the extent tree... */
|
|
retval = ext2fs_extent_open2(fs, ino, inode, &ehandle);
|
|
if (retval)
|
|
return 0;
|
|
|
|
retval = ext2fs_extent_get_info(ehandle, &top_info);
|
|
if (retval)
|
|
goto out;
|
|
|
|
/* Check maximum extent depth */
|
|
pctx->ino = ino;
|
|
pctx->blk = top_info.max_depth;
|
|
pctx->blk2 = ext2fs_max_extent_depth(ehandle);
|
|
if (pctx->blk2 < pctx->blk &&
|
|
fix_problem(ctx, PR_1_EXTENT_BAD_MAX_DEPTH, pctx))
|
|
eti.force_rebuild = 1;
|
|
|
|
/* Can we collect extent tree level stats? */
|
|
pctx->blk = MAX_EXTENT_DEPTH_COUNT;
|
|
if (pctx->blk2 > pctx->blk)
|
|
fix_problem(ctx, PR_1E_MAX_EXTENT_TREE_DEPTH, pctx);
|
|
|
|
/* We need to fix tree depth problems, but the scan isn't a fix. */
|
|
if (ctx->options & E2F_OPT_FIXES_ONLY)
|
|
goto out;
|
|
|
|
retval = ext2fs_extent_get(ehandle, EXT2_EXTENT_ROOT, &extent);
|
|
if (retval)
|
|
goto out;
|
|
|
|
do {
|
|
retval = ext2fs_extent_get_info(ehandle, &info);
|
|
if (retval)
|
|
break;
|
|
|
|
/*
|
|
* If this is the first extent in an extent block that we
|
|
* haven't visited, collect stats on the block.
|
|
*/
|
|
if (info.curr_entry == 1 &&
|
|
!(extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT) &&
|
|
!eti.force_rebuild) {
|
|
struct extent_tree_level *etl;
|
|
|
|
etl = eti.ext_info + info.curr_level;
|
|
etl->num_extents += info.num_entries;
|
|
etl->max_extents += info.max_entries;
|
|
/*
|
|
* Implementation wart: Splitting extent blocks when
|
|
* appending will leave the old block with one free
|
|
* entry. Therefore unless the node is totally full,
|
|
* pretend that a non-root extent block can hold one
|
|
* fewer entry than it actually does, so that we don't
|
|
* repeatedly rebuild the extent tree.
|
|
*/
|
|
if (info.curr_level &&
|
|
info.num_entries < info.max_entries)
|
|
etl->max_extents--;
|
|
}
|
|
|
|
/* Skip to the end of a block of leaf nodes */
|
|
if (extent.e_flags & EXT2_EXTENT_FLAGS_LEAF) {
|
|
retval = ext2fs_extent_get(ehandle,
|
|
EXT2_EXTENT_LAST_SIB,
|
|
&extent);
|
|
if (retval)
|
|
break;
|
|
}
|
|
|
|
retval = ext2fs_extent_get(ehandle, EXT2_EXTENT_NEXT, &extent);
|
|
} while (retval == 0);
|
|
out:
|
|
ext2fs_extent_free(ehandle);
|
|
return e2fsck_should_rebuild_extents(ctx, pctx, &eti, &top_info);
|
|
}
|
|
|
|
/* Having scanned a file's extent tree, decide if we should rebuild it */
|
|
errcode_t e2fsck_should_rebuild_extents(e2fsck_t ctx,
|
|
struct problem_context *pctx,
|
|
struct extent_tree_info *eti,
|
|
struct ext2_extent_info *info)
|
|
{
|
|
struct extent_tree_level *ei;
|
|
int i, j, op;
|
|
unsigned int extents_per_block;
|
|
|
|
if (eti->force_rebuild)
|
|
goto rebuild;
|
|
|
|
if (ctx->options & E2F_OPT_NOOPT_EXTENTS)
|
|
return 0;
|
|
|
|
extents_per_block = (ctx->fs->blocksize -
|
|
sizeof(struct ext3_extent_header)) /
|
|
sizeof(struct ext3_extent);
|
|
/*
|
|
* If we can consolidate a level or shorten the tree, schedule the
|
|
* extent tree to be rebuilt.
|
|
*/
|
|
for (i = 0, ei = eti->ext_info; i < info->max_depth + 1; i++, ei++) {
|
|
if (ei->max_extents - ei->num_extents > extents_per_block) {
|
|
pctx->blk = i;
|
|
op = PR_1E_CAN_NARROW_EXTENT_TREE;
|
|
goto rebuild;
|
|
}
|
|
for (j = 0; j < i; j++) {
|
|
if (ei->num_extents < eti->ext_info[j].max_extents) {
|
|
pctx->blk = i;
|
|
op = PR_1E_CAN_COLLAPSE_EXTENT_TREE;
|
|
goto rebuild;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
|
|
rebuild:
|
|
if (eti->force_rebuild || fix_problem(ctx, op, pctx))
|
|
return e2fsck_rebuild_extents_later(ctx, eti->ino);
|
|
return 0;
|
|
}
|
|
|
|
void e2fsck_pass1e(e2fsck_t ctx)
|
|
{
|
|
rebuild_extents(ctx, "Pass 1E", PR_1E_PASS_HEADER);
|
|
}
|