From 857c69dbb644daf8c25f622e8ee86076dcb6da5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bo=20Brant=C3=A9n?= Date: Tue, 18 Feb 2020 11:32:32 +0100 Subject: [PATCH] checksums on extents --- Ext4Fsd/ext4/ext4_extents.c | 5009 +++++++++++++++++----------------- Ext4Fsd/ext4/ext4_xattr.c | 2546 +++++++++-------- Ext4Fsd/include/linux/ext4.h | 8 + 3 files changed, 3754 insertions(+), 3809 deletions(-) diff --git a/Ext4Fsd/ext4/ext4_extents.c b/Ext4Fsd/ext4/ext4_extents.c index e54fd01..cadfc43 100644 --- a/Ext4Fsd/ext4/ext4_extents.c +++ b/Ext4Fsd/ext4/ext4_extents.c @@ -1,2514 +1,2495 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- - */ - -#include "ext2fs.h" -#include "linux\ext4.h" - -#pragma warning(push) -#pragma warning(disable: 4018) -#pragma warning(disable: 4242) -#pragma warning(disable: 4244) - - -/* - * used by extent splitting. - */ -#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ - due to ENOSPC */ -#define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */ -#define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */ - -#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ -#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ - -#define CONFIG_EXTENT_TEST -#ifdef CONFIG_EXTENT_TEST - -#define ext4_mark_inode_dirty(icb, handle, n) ext3_mark_inode_dirty(icb, n) -static inline ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode) -{ - PEXT2_VCB Vcb; - Vcb = inode->i_sb->s_priv; - return (inode->i_ino - 1) / BLOCKS_PER_GROUP; -} - -static ext4_fsblk_t ext4_new_meta_blocks(void *icb, handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, - unsigned int flags, - unsigned long *count, int *errp) -{ - NTSTATUS status; - ULONG blockcnt = (count)?*count:1; - ULONG block = 0; - - status = Ext2NewBlock((PEXT2_IRP_CONTEXT)icb, - inode->i_sb->s_priv, - 0, goal, - &block, - &blockcnt); - if (count) - *count = blockcnt; - - if (!NT_SUCCESS(status)) { - *errp = Ext2LinuxError(status); - return 0; - } - inode->i_blocks += (blockcnt * (inode->i_sb->s_blocksize >> 9)); - return block; -} - -static void ext4_free_blocks(void *icb, handle_t *handle, struct inode *inode, void *fake, - ext4_fsblk_t block, int count, int flags) -{ - Ext2FreeBlock((PEXT2_IRP_CONTEXT)icb, inode->i_sb->s_priv, block, count); - inode->i_blocks -= count * (inode->i_sb->s_blocksize >> 9); - return; -} - -static inline void ext_debug(char *str, ...) -{ -} -#if TRUE -#define EXT4_ERROR_INODE(inode, str, ...) do { \ - DbgPrint("inode[%p]: "##str "\n", inode, __VA_ARGS__); \ - } while(0) -#else -#define EXT4_ERROR_INODE -#endif - -#define ext4_std_error(s, err) -#define assert ASSERT - -#endif - -/* - * Return the right sibling of a tree node(either leaf or indexes node) - */ - -#define EXT_MAX_BLOCKS 0xffffffff - - -static inline int ext4_ext_space_block(struct inode *inode, int check) -{ - int size; - - size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) - / sizeof(struct ext4_extent); -#ifdef AGGRESSIVE_TEST - if (!check && size > 6) - size = 6; -#endif - return size; -} - -static inline int ext4_ext_space_block_idx(struct inode *inode, int check) -{ - int size; - - size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) - / sizeof(struct ext4_extent_idx); -#ifdef AGGRESSIVE_TEST - if (!check && size > 5) - size = 5; -#endif - return size; -} - -static inline int ext4_ext_space_root(struct inode *inode, int check) -{ - int size; - - size = sizeof(EXT4_I(inode)->i_block); - size -= sizeof(struct ext4_extent_header); - size /= sizeof(struct ext4_extent); -#ifdef AGGRESSIVE_TEST - if (!check && size > 3) - size = 3; -#endif - return size; -} - -static inline int ext4_ext_space_root_idx(struct inode *inode, int check) -{ - int size; - - size = sizeof(EXT4_I(inode)->i_block); - size -= sizeof(struct ext4_extent_header); - size /= sizeof(struct ext4_extent_idx); -#ifdef AGGRESSIVE_TEST - if (!check && size > 4) - size = 4; -#endif - return size; -} - -static int -ext4_ext_max_entries(struct inode *inode, int depth) -{ - int max; - - if (depth == ext_depth(inode)) { - if (depth == 0) - max = ext4_ext_space_root(inode, 1); - else - max = ext4_ext_space_root_idx(inode, 1); - } else { - if (depth == 0) - max = ext4_ext_space_block(inode, 1); - else - max = ext4_ext_space_block_idx(inode, 1); - } - - return max; -} - -static int __ext4_ext_check(const char *function, unsigned int line, - struct inode *inode, - struct ext4_extent_header *eh, int depth, - ext4_fsblk_t pblk); - -/* - * read_extent_tree_block: - * Get a buffer_head by extents_bread, and read fresh data from the storage. - */ -static struct buffer_head * -__read_extent_tree_block(const char *function, unsigned int line, - struct inode *inode, ext4_fsblk_t pblk, int depth, - int flags) -{ - struct buffer_head *bh; - int err; - - bh = extents_bread(inode->i_sb, pblk); - if (!bh) - return ERR_PTR(-ENOMEM); - - if (!buffer_uptodate(bh)) { - err = -EIO; - goto errout; - } - if (buffer_verified(bh)) - return bh; - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); - if (err) - goto errout; - set_buffer_verified(bh); - return bh; -errout: - extents_brelse(bh); - return ERR_PTR(err); - -} - -#define read_extent_tree_block(inode, pblk, depth, flags) \ - __read_extent_tree_block("", __LINE__, (inode), (pblk), \ - (depth), (flags)) - -#define ext4_ext_check(inode, eh, depth, pblk) \ - __ext4_ext_check("", __LINE__, (inode), (eh), (depth), (pblk)) - -int ext4_ext_check_inode(struct inode *inode) -{ - return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); -} - -static uint32_t ext4_ext_block_csum(struct inode *inode, - struct ext4_extent_header *eh) -{ - /*return ext4_crc32c(inode->i_csum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));*/ - return 0; -} - -static void ext4_extent_block_csum_set(struct inode *inode, - struct ext4_extent_header *eh) -{ - struct ext4_extent_tail *tail; - - tail = find_ext4_extent_tail(eh); - tail->et_checksum = ext4_ext_block_csum( - inode, eh); -} - -static int ext4_split_extent_at(void *icb, - handle_t *handle, - struct inode *inode, - struct ext4_ext_path **ppath, - ext4_lblk_t split, - int split_flag, - int flags); - -static inline int -ext4_force_split_extent_at(void *icb, handle_t *handle, struct inode *inode, - struct ext4_ext_path **ppath, ext4_lblk_t lblk, - int nofail) -{ - struct ext4_ext_path *path = *ppath; - int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); - - return ext4_split_extent_at(icb, handle, inode, ppath, lblk, unwritten ? - EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, - EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | - (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); -} - -/* - * could return: - * - EROFS - * - ENOMEM - */ - -static int ext4_ext_get_access(void *icb, handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - if (path->p_bh) { - /* path points to block */ - - return ext4_journal_get_write_access(icb, handle, path->p_bh); - - } - /* path points to leaf/index in inode body */ - /* we use in-core data, no need to protect them */ - return 0; -} - - -static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t block) -{ - if (path) { - int depth = path->p_depth; - struct ext4_extent *ex; - - /* - * Try to predict block placement assuming that we are - * filling in a file which will eventually be - * non-sparse --- i.e., in the case of libbfd writing - * an ELF object sections out-of-order but in a way - * the eventually results in a contiguous object or - * executable file, or some database extending a table - * space file. However, this is actually somewhat - * non-ideal if we are writing a sparse file such as - * qemu or KVM writing a raw image file that is going - * to stay fairly sparse, since it will end up - * fragmenting the file system's free space. Maybe we - * should have some hueristics or some way to allow - * userspace to pass a hint to file system, - * especially if the latter case turns out to be - * common. - */ - ex = path[depth].p_ext; - if (ex) { - ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex); - ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block); - - if (block > ext_block) - return ext_pblk + (block - ext_block); - else - return ext_pblk - (ext_block - block); - } - - /* it looks like index is empty; - * try to find starting block from index itself */ - if (path[depth].p_bh) - return path[depth].p_bh->b_blocknr; - } - - /* OK. use inode's group */ - return ext4_inode_to_goal_block(inode); -} - -/* - * Allocation for a meta data block - */ -static ext4_fsblk_t -ext4_ext_new_meta_block(void *icb, handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *ex, int *err, unsigned int flags) -{ - ext4_fsblk_t goal, newblock; - - goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); - newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags, - NULL, err); - return newblock; -} - -int __ext4_ext_dirty(const char *where, unsigned int line, - void *icb, handle_t *handle, - struct inode *inode, - struct ext4_ext_path *path) -{ - int err; - - if (path->p_bh) { - ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); - /* path points to block */ - err = __ext4_handle_dirty_metadata(where, line, icb, handle, inode, path->p_bh); - } else { - /* path points to leaf/index in inode body */ - err = ext4_mark_inode_dirty(icb, handle, inode); - } - return err; -} - -void ext4_ext_drop_refs(struct ext4_ext_path *path) -{ - int depth, i; - - if (!path) - return; - depth = path->p_depth; - for (i = 0; i <= depth; i++, path++) - if (path->p_bh) { - extents_brelse(path->p_bh); - path->p_bh = NULL; - } -} - -/* - * Check that whether the basic information inside the extent header - * is correct or not. - */ -static int __ext4_ext_check(const char *function, unsigned int line, - struct inode *inode, - struct ext4_extent_header *eh, int depth, - ext4_fsblk_t pblk) -{ - struct ext4_extent_tail *tail; - const char *error_msg; - int max = 0; - - if (eh->eh_magic != EXT4_EXT_MAGIC) { - error_msg = "invalid magic"; - goto corrupted; - } - if (le16_to_cpu(eh->eh_depth) != depth) { - error_msg = "unexpected eh_depth"; - goto corrupted; - } - if (eh->eh_max == 0) { - error_msg = "invalid eh_max"; - goto corrupted; - } - if (eh->eh_entries > eh->eh_max) { - error_msg = "invalid eh_entries"; - goto corrupted; - } - - tail = find_ext4_extent_tail(eh); - if (tail->et_checksum != ext4_ext_block_csum(inode, eh)) { - ext_debug("Warning: extent checksum damaged? tail->et_checksum = " - "%lu, ext4_ext_block_csum = %lu\n", - tail->et_checksum, ext4_ext_block_csum(inode, eh)); - } - - return 0; - -corrupted: - ext_debug("corrupted! %s\n", error_msg); - return -EIO; -} - -/* - * ext4_ext_binsearch_idx: - * binary search for the closest index of the given block - * the header must be checked before calling this - */ -static void -ext4_ext_binsearch_idx(struct inode *inode, - struct ext4_ext_path *path, ext4_lblk_t block) -{ - struct ext4_extent_header *eh = path->p_hdr; - struct ext4_extent_idx *r, *l, *m; - - ext_debug("binsearch for %u(idx): ", block); - - l = EXT_FIRST_INDEX(eh) + 1; - r = EXT_LAST_INDEX(eh); - while (l <= r) { - m = l + (r - l) / 2; - if (block < (m->ei_block)) - r = m - 1; - else - l = m + 1; - ext_debug("%p(%u):%p(%u):%p(%u) ", l, (l->ei_block), - m, (m->ei_block), - r, (r->ei_block)); - } - - path->p_idx = l - 1; - ext_debug(" -> %u->%lld ", (path->p_idx->ei_block), - ext4_idx_pblock(path->p_idx)); - -#ifdef CHECK_BINSEARCH - { - struct ext4_extent_idx *chix, *ix; - int k; - - chix = ix = EXT_FIRST_INDEX(eh); - for (k = 0; k < (eh->eh_entries); k++, ix++) { - if (k != 0 && - (ix->ei_block) <= (ix[-1].ei_block)) { - printk(KERN_DEBUG "k=%d, ix=0x%p, " - "first=0x%p\n", k, - ix, EXT_FIRST_INDEX(eh)); - printk(KERN_DEBUG "%u <= %u\n", - (ix->ei_block), - (ix[-1].ei_block)); - } - BUG_ON(k && (ix->ei_block) - <= (ix[-1].ei_block)); - if (block < (ix->ei_block)) - break; - chix = ix; - } - BUG_ON(chix != path->p_idx); - } -#endif - -} - -/* - * ext4_ext_binsearch: - * binary search for closest extent of the given block - * the header must be checked before calling this - */ -static void -ext4_ext_binsearch(struct inode *inode, - struct ext4_ext_path *path, ext4_lblk_t block) -{ - struct ext4_extent_header *eh = path->p_hdr; - struct ext4_extent *r, *l, *m; - - if (eh->eh_entries == 0) { - /* - * this leaf is empty: - * we get such a leaf in split/add case - */ - return; - } - - ext_debug("binsearch for %u: ", block); - - l = EXT_FIRST_EXTENT(eh) + 1; - r = EXT_LAST_EXTENT(eh); - - while (l <= r) { - m = l + (r - l) / 2; - if (block < m->ee_block) - r = m - 1; - else - l = m + 1; - ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block, - m, (m->ee_block), - r, (r->ee_block)); - } - - path->p_ext = l - 1; - ext_debug(" -> %d:%llu:[%d]%d ", - (path->p_ext->ee_block), - ext4_ext_pblock(path->p_ext), - ext4_ext_is_unwritten(path->p_ext), - ext4_ext_get_actual_len(path->p_ext)); - -#ifdef CHECK_BINSEARCH - { - struct ext4_extent *chex, *ex; - int k; - - chex = ex = EXT_FIRST_EXTENT(eh); - for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { - BUG_ON(k && (ex->ee_block) - <= (ex[-1].ee_block)); - if (block < (ex->ee_block)) - break; - chex = ex; - } - BUG_ON(chex != path->p_ext); - } -#endif - -} - -#ifdef EXT_DEBUG -static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) -{ - int k, l = path->p_depth; - - ext_debug("path:"); - for (k = 0; k <= l; k++, path++) { - if (path->p_idx) { - ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), - ext4_idx_pblock(path->p_idx)); - } else if (path->p_ext) { - ext_debug(" %d:[%d]%d:%llu ", - le32_to_cpu(path->p_ext->ee_block), - ext4_ext_is_unwritten(path->p_ext), - ext4_ext_get_actual_len(path->p_ext), - ext4_ext_pblock(path->p_ext)); - } else - ext_debug(" []"); - } - ext_debug("\n"); -} - -static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) -{ - int depth = ext_depth(inode); - struct ext4_extent_header *eh; - struct ext4_extent *ex; - int i; - - if (!path) - return; - - eh = path[depth].p_hdr; - ex = EXT_FIRST_EXTENT(eh); - - ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); - - for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { - ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), - ext4_ext_is_unwritten(ex), - ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); - } - ext_debug("\n"); -} - -static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, - ext4_fsblk_t newblock, int level) -{ - int depth = ext_depth(inode); - struct ext4_extent *ex; - - if (depth != level) { - struct ext4_extent_idx *idx; - idx = path[level].p_idx; - while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { - ext_debug("%d: move %d:%llu in new index %llu\n", level, - le32_to_cpu(idx->ei_block), - ext4_idx_pblock(idx), - newblock); - idx++; - } - - return; - } - - ex = path[depth].p_ext; - while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { - ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", - le32_to_cpu(ex->ee_block), - ext4_ext_pblock(ex), - ext4_ext_is_unwritten(ex), - ext4_ext_get_actual_len(ex), - newblock); - ex++; - } -} - -#else -#define ext4_ext_show_path(inode, path) -#define ext4_ext_show_leaf(inode, path) -#define ext4_ext_show_move(inode, path, newblock, level) -#endif - -struct ext4_ext_path * -ext4_find_extent(struct inode *inode, ext4_lblk_t block, - struct ext4_ext_path **orig_path, int flags) -{ - struct ext4_extent_header *eh; - struct buffer_head *bh; - struct ext4_ext_path *path = orig_path ? *orig_path : NULL; - short int depth, i, ppos = 0; - int ret; - - eh = ext_inode_hdr(inode); - depth = ext_depth(inode); - - if (path) { - ext4_ext_drop_refs(path); - if (depth > path[0].p_maxdepth) { - kfree(path); - *orig_path = path = NULL; - } - } - if (!path) { - /* account possible depth increase */ - path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), - GFP_NOFS); - if (unlikely(!path)) - return ERR_PTR(-ENOMEM); - path[0].p_maxdepth = depth + 1; - } - path[0].p_hdr = eh; - path[0].p_bh = NULL; - - i = depth; - /* walk through the tree */ - while (i) { - ext_debug("depth %d: num %d, max %d\n", - ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); - - ext4_ext_binsearch_idx(inode, path + ppos, block); - path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); - path[ppos].p_depth = i; - path[ppos].p_ext = NULL; - - bh = read_extent_tree_block(inode, path[ppos].p_block, --i, - flags); - if (unlikely(IS_ERR(bh))) { - ret = PTR_ERR(bh); - goto err; - } - - eh = ext_block_hdr(bh); - ppos++; - if (unlikely(ppos > depth)) { - extents_brelse(bh); - EXT4_ERROR_INODE(inode, - "ppos %d > depth %d", ppos, depth); - ret = -EIO; - goto err; - } - path[ppos].p_bh = bh; - path[ppos].p_hdr = eh; - } - - path[ppos].p_depth = i; - path[ppos].p_ext = NULL; - path[ppos].p_idx = NULL; - - /* find extent */ - ext4_ext_binsearch(inode, path + ppos, block); - /* if not an empty leaf */ - if (path[ppos].p_ext) - path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); - - ext4_ext_show_path(inode, path); - - return path; - -err: - ext4_ext_drop_refs(path); - if (path) { - kfree(path); - if (orig_path) - *orig_path = NULL; - } - return ERR_PTR(ret); -} - -/* - * ext4_ext_insert_index: - * insert new index [@logical;@ptr] into the block at @curp; - * check where to insert: before @curp or after @curp - */ -static int ext4_ext_insert_index(void *icb, handle_t *handle, struct inode *inode, - struct ext4_ext_path *curp, - int logical, ext4_fsblk_t ptr) -{ - struct ext4_extent_idx *ix; - int len, err; - - err = ext4_ext_get_access(icb, handle, inode, curp); - if (err) - return err; - - if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) { - EXT4_ERROR_INODE(inode, - "logical %d == ei_block %d!", - logical, le32_to_cpu(curp->p_idx->ei_block)); - return -EIO; - } - - if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) - >= le16_to_cpu(curp->p_hdr->eh_max))) { - EXT4_ERROR_INODE(inode, - "eh_entries %d >= eh_max %d!", - le16_to_cpu(curp->p_hdr->eh_entries), - le16_to_cpu(curp->p_hdr->eh_max)); - return -EIO; - } - - if (logical > le32_to_cpu(curp->p_idx->ei_block)) { - /* insert after */ - ext_debug("insert new index %d after: %llu\n", logical, ptr); - ix = curp->p_idx + 1; - } else { - /* insert before */ - ext_debug("insert new index %d before: %llu\n", logical, ptr); - ix = curp->p_idx; - } - - len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1; - BUG_ON(len < 0); - if (len > 0) { - ext_debug("insert new index %d: " - "move %d indices from 0x%p to 0x%p\n", - logical, len, ix, ix + 1); - memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx)); - } - - if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { - EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); - return -EIO; - } - - ix->ei_block = cpu_to_le32(logical); - ext4_idx_store_pblock(ix, ptr); - le16_add_cpu(&curp->p_hdr->eh_entries, 1); - - if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { - EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); - return -EIO; - } - - err = ext4_ext_dirty(icb, handle, inode, curp); - ext4_std_error(inode->i_sb, err); - - return err; -} - -/* - * ext4_ext_split: - * inserts new subtree into the path, using free index entry - * at depth @at: - * - allocates all needed blocks (new leaf and all intermediate index blocks) - * - makes decision where to split - * - moves remaining extents and index entries (right to the split point) - * into the newly allocated blocks - * - initializes subtree - */ -static int ext4_ext_split(void *icb, handle_t *handle, struct inode *inode, - unsigned int flags, - struct ext4_ext_path *path, - struct ext4_extent *newext, int at) -{ - struct buffer_head *bh = NULL; - int depth = ext_depth(inode); - struct ext4_extent_header *neh; - struct ext4_extent_idx *fidx; - int i = at, k, m, a; - ext4_fsblk_t newblock, oldblock; - __le32 border; - ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ - int err = 0; - - /* make decision: where to split? */ - /* FIXME: now decision is simplest: at current extent */ - - /* if current leaf will be split, then we should use - * border from split point */ - if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { - EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); - return -EIO; - } - if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { - border = path[depth].p_ext[1].ee_block; - ext_debug("leaf will be split." - " next leaf starts at %d\n", - le32_to_cpu(border)); - } else { - border = newext->ee_block; - ext_debug("leaf will be added." - " next leaf starts at %d\n", - le32_to_cpu(border)); - } - - /* - * If error occurs, then we break processing - * and mark filesystem read-only. index won't - * be inserted and tree will be in consistent - * state. Next mount will repair buffers too. - */ - - /* - * Get array to track all allocated blocks. - * We need this to handle errors and free blocks - * upon them. - */ - ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); - if (!ablocks) - return -ENOMEM; - - /* allocate all needed blocks */ - ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); - for (a = 0; a < depth - at; a++) { - newblock = ext4_ext_new_meta_block(icb, handle, inode, path, - newext, &err, flags); - if (newblock == 0) - goto cleanup; - ablocks[a] = newblock; - } - - /* initialize new leaf */ - newblock = ablocks[--a]; - if (unlikely(newblock == 0)) { - EXT4_ERROR_INODE(inode, "newblock == 0!"); - err = -EIO; - goto cleanup; - } - bh = extents_bwrite(inode->i_sb, newblock); - if (unlikely(!bh)) { - err = -ENOMEM; - goto cleanup; - } - - err = ext4_journal_get_create_access(icb, handle, bh); - if (err) - goto cleanup; - - neh = ext_block_hdr(bh); - neh->eh_entries = 0; - neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); - neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); - neh->eh_depth = 0; - - /* move remainder of path[depth] to the new leaf */ - if (unlikely(path[depth].p_hdr->eh_entries != - path[depth].p_hdr->eh_max)) { - EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", - path[depth].p_hdr->eh_entries, - path[depth].p_hdr->eh_max); - err = -EIO; - goto cleanup; - } - /* start copy from next extent */ - m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++; - ext4_ext_show_move(inode, path, newblock, depth); - if (m) { - struct ext4_extent *ex; - ex = EXT_FIRST_EXTENT(neh); - memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m); - le16_add_cpu(&neh->eh_entries, m); - } - - ext4_extent_block_csum_set(inode, neh); - set_buffer_uptodate(bh); - - err = ext4_handle_dirty_metadata(icb, handle, inode, bh); - if (err) - goto cleanup; - extents_brelse(bh); - bh = NULL; - - /* correct old leaf */ - if (m) { - err = ext4_ext_get_access(icb, handle, inode, path + depth); - if (err) - goto cleanup; - le16_add_cpu(&path[depth].p_hdr->eh_entries, -m); - err = ext4_ext_dirty(icb, handle, inode, path + depth); - if (err) - goto cleanup; - - } - - /* create intermediate indexes */ - k = depth - at - 1; - if (unlikely(k < 0)) { - EXT4_ERROR_INODE(inode, "k %d < 0!", k); - err = -EIO; - goto cleanup; - } - if (k) - ext_debug("create %d intermediate indices\n", k); - /* insert new index into current index block */ - /* current depth stored in i var */ - i = depth - 1; - while (k--) { - oldblock = newblock; - newblock = ablocks[--a]; - bh = extents_bwrite(inode->i_sb, newblock); - if (unlikely(!bh)) { - err = -ENOMEM; - goto cleanup; - } - - err = ext4_journal_get_create_access(icb, handle, bh); - if (err) - goto cleanup; - - neh = ext_block_hdr(bh); - neh->eh_entries = cpu_to_le16(1); - neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); - neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); - neh->eh_depth = cpu_to_le16(depth - i); - fidx = EXT_FIRST_INDEX(neh); - fidx->ei_block = border; - ext4_idx_store_pblock(fidx, oldblock); - - ext_debug("int.index at %d (block %llu): %u -> %llu\n", - i, newblock, le32_to_cpu(border), oldblock); - - /* move remainder of path[i] to the new index block */ - if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != - EXT_LAST_INDEX(path[i].p_hdr))) { - EXT4_ERROR_INODE(inode, - "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", - le32_to_cpu(path[i].p_ext->ee_block)); - err = -EIO; - goto cleanup; - } - /* start copy indexes */ - m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; - ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, - EXT_MAX_INDEX(path[i].p_hdr)); - ext4_ext_show_move(inode, path, newblock, i); - if (m) { - memmove(++fidx, path[i].p_idx, - sizeof(struct ext4_extent_idx) * m); - le16_add_cpu(&neh->eh_entries, m); - } - ext4_extent_block_csum_set(inode, neh); - set_buffer_uptodate(bh); - - err = ext4_handle_dirty_metadata(icb, handle, inode, bh); - if (err) - goto cleanup; - extents_brelse(bh); - bh = NULL; - - /* correct old index */ - if (m) { - err = ext4_ext_get_access(icb, handle, inode, path + i); - if (err) - goto cleanup; - le16_add_cpu(&path[i].p_hdr->eh_entries, -m); - err = ext4_ext_dirty(icb, handle, inode, path + i); - if (err) - goto cleanup; - } - - i--; - } - - /* insert new index */ - err = ext4_ext_insert_index(icb, handle, inode, path + at, - le32_to_cpu(border), newblock); - -cleanup: - if (bh) - extents_brelse(bh); - - if (err) { - /* free all allocated blocks in error case */ - for (i = 0; i < depth; i++) { - if (!ablocks[i]) - continue; - ext4_free_blocks(icb, handle, inode, NULL, ablocks[i], 1, - EXT4_FREE_BLOCKS_METADATA); - } - } - kfree(ablocks); - - return err; -} - -/* - * ext4_ext_grow_indepth: - * implements tree growing procedure: - * - allocates new block - * - moves top-level data (index block or leaf) into the new block - * - initializes new top-level, creating index that points to the - * just created block - */ -static int ext4_ext_grow_indepth(void *icb, handle_t *handle, struct inode *inode, - unsigned int flags) -{ - struct ext4_extent_header *neh; - struct buffer_head *bh; - ext4_fsblk_t newblock, goal = 0; - int err = 0; - - /* Try to prepend new index to old one */ - if (ext_depth(inode)) - goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode))); - goal = ext4_inode_to_goal_block(inode); - newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags, - NULL, &err); - if (newblock == 0) - return err; - - bh = extents_bwrite(inode->i_sb, newblock); - if (!bh) - return -ENOMEM; - - err = ext4_journal_get_create_access(icb, handle, bh); - if (err) - goto out; - - /* move top-level index/leaf into new block */ - memmove(bh->b_data, EXT4_I(inode)->i_block, - sizeof(EXT4_I(inode)->i_block)); - - /* set size of new block */ - neh = ext_block_hdr(bh); - /* old root could have indexes or leaves - * so calculate e_max right way */ - if (ext_depth(inode)) - neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); - else - neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); - neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); - ext4_extent_block_csum_set(inode, neh); - set_buffer_uptodate(bh); - - err = ext4_handle_dirty_metadata(icb, handle, inode, bh); - if (err) - goto out; - - /* Update top-level index: num,max,pointer */ - neh = ext_inode_hdr(inode); - neh->eh_entries = cpu_to_le16(1); - ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock); - if (neh->eh_depth == 0) { - /* Root extent block becomes index block */ - neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); - EXT_FIRST_INDEX(neh)->ei_block = - EXT_FIRST_EXTENT(neh)->ee_block; - } - ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", - (neh->eh_entries), (neh->eh_max), - (EXT_FIRST_INDEX(neh)->ei_block), - ext4_idx_pblock(EXT_FIRST_INDEX(neh))); - - le16_add_cpu(&neh->eh_depth, 1); - ext4_mark_inode_dirty(icb, handle, inode); -out: - extents_brelse(bh); - - return err; -} - -/* - * ext4_ext_create_new_leaf: - * finds empty index and adds new leaf. - * if no free index is found, then it requests in-depth growing. - */ -static int ext4_ext_create_new_leaf(void *icb, handle_t *handle, struct inode *inode, - unsigned int mb_flags, - unsigned int gb_flags, - struct ext4_ext_path **ppath, - struct ext4_extent *newext) -{ - struct ext4_ext_path *path = *ppath; - struct ext4_ext_path *curp; - int depth, i, err = 0; - -repeat: - i = depth = ext_depth(inode); - - /* walk up to the tree and look for free index entry */ - curp = path + depth; - while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { - i--; - curp--; - } - - /* we use already allocated block for index block, - * so subsequent data blocks should be contiguous */ - if (EXT_HAS_FREE_INDEX(curp)) { - /* if we found index with free entry, then use that - * entry: create all needed subtree and add new leaf */ - err = ext4_ext_split(icb, handle, inode, mb_flags, path, newext, i); - if (err) - goto out; - - /* refill path */ - path = ext4_find_extent(inode, - (ext4_lblk_t)le32_to_cpu(newext->ee_block), - ppath, gb_flags); - if (IS_ERR(path)) - err = PTR_ERR(path); - } else { - /* tree is full, time to grow in depth */ - err = ext4_ext_grow_indepth(icb, handle, inode, mb_flags); - if (err) - goto out; - - /* refill path */ - path = ext4_find_extent(inode, - (ext4_lblk_t)le32_to_cpu(newext->ee_block), - ppath, gb_flags); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } - - /* - * only first (depth 0 -> 1) produces free space; - * in all other cases we have to split the grown tree - */ - depth = ext_depth(inode); - if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { - /* now we need to split */ - goto repeat; - } - } - -out: - return err; -} - -/* - * search the closest allocated block to the left for *logical - * and returns it at @logical + it's physical address at @phys - * if *logical is the smallest allocated block, the function - * returns 0 at @phys - * return value contains 0 (success) or error code - */ -static int ext4_ext_search_left(struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t *logical, ext4_fsblk_t *phys) -{ - struct ext4_extent_idx *ix; - struct ext4_extent *ex; - int depth, ee_len; - - if (unlikely(path == NULL)) { - EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); - return -EIO; - } - depth = path->p_depth; - *phys = 0; - - if (depth == 0 && path->p_ext == NULL) - return 0; - - /* usually extent in the path covers blocks smaller - * then *logical, but it can be that extent is the - * first one in the file */ - - ex = path[depth].p_ext; - ee_len = ext4_ext_get_actual_len(ex); - if (*logical < le32_to_cpu(ex->ee_block)) { - if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { - EXT4_ERROR_INODE(inode, - "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", - *logical, le32_to_cpu(ex->ee_block)); - return -EIO; - } - while (--depth >= 0) { - ix = path[depth].p_idx; - if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { - EXT4_ERROR_INODE(inode, - "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", - ix != NULL ? le32_to_cpu(ix->ei_block) : 0, - EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? - le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0, - depth); - return -EIO; - } - } - return 0; - } - - if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { - EXT4_ERROR_INODE(inode, - "logical %d < ee_block %d + ee_len %d!", - *logical, le32_to_cpu(ex->ee_block), ee_len); - return -EIO; - } - - *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; - *phys = ext4_ext_pblock(ex) + ee_len - 1; - return 0; -} - -/* - * search the closest allocated block to the right for *logical - * and returns it at @logical + it's physical address at @phys - * if *logical is the largest allocated block, the function - * returns 0 at @phys - * return value contains 0 (success) or error code - */ -static int ext4_ext_search_right(struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t *logical, ext4_fsblk_t *phys, - struct ext4_extent **ret_ex) -{ - struct buffer_head *bh = NULL; - struct ext4_extent_header *eh; - struct ext4_extent_idx *ix; - struct ext4_extent *ex; - ext4_fsblk_t block; - int depth; /* Note, NOT eh_depth; depth from top of tree */ - int ee_len; - - if ((path == NULL)) { - EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); - return -EIO; - } - depth = path->p_depth; - *phys = 0; - - if (depth == 0 && path->p_ext == NULL) - return 0; - - /* usually extent in the path covers blocks smaller - * then *logical, but it can be that extent is the - * first one in the file */ - - ex = path[depth].p_ext; - ee_len = ext4_ext_get_actual_len(ex); - /*if (*logical < le32_to_cpu(ex->ee_block)) {*/ - if (*logical < (ex->ee_block)) { - if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { - EXT4_ERROR_INODE(inode, - "first_extent(path[%d].p_hdr) != ex", - depth); - return -EIO; - } - while (--depth >= 0) { - ix = path[depth].p_idx; - if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { - EXT4_ERROR_INODE(inode, - "ix != EXT_FIRST_INDEX *logical %d!", - *logical); - return -EIO; - } - } - goto found_extent; - } - - /*if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {*/ - if (unlikely(*logical < ((ex->ee_block) + ee_len))) { - EXT4_ERROR_INODE(inode, - "logical %d < ee_block %d + ee_len %d!", - /**logical, le32_to_cpu(ex->ee_block), ee_len);*/ - *logical, (ex->ee_block), ee_len); - return -EIO; - } - - if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { - /* next allocated block in this leaf */ - ex++; - goto found_extent; - } - - /* go up and search for index to the right */ - while (--depth >= 0) { - ix = path[depth].p_idx; - if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) - goto got_index; - } - - /* we've gone up to the root and found no index to the right */ - return 0; - -got_index: - /* we've found index to the right, let's - * follow it and find the closest allocated - * block to the right */ - ix++; - block = ext4_idx_pblock(ix); - while (++depth < path->p_depth) { - /* subtract from p_depth to get proper eh_depth */ - bh = read_extent_tree_block(inode, block, - path->p_depth - depth, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); - eh = ext_block_hdr(bh); - ix = EXT_FIRST_INDEX(eh); - block = ext4_idx_pblock(ix); - extents_brelse(bh); - } - - bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); - eh = ext_block_hdr(bh); - ex = EXT_FIRST_EXTENT(eh); -found_extent: - /**logical = le32_to_cpu(ex->ee_block);*/ - *logical = (ex->ee_block); - *phys = ext4_ext_pblock(ex); - *ret_ex = ex; - if (bh) - extents_brelse(bh); - return 0; -} - -/* - * ext4_ext_next_allocated_block: - * returns allocated block in subsequent extent or EXT_MAX_BLOCKS. - * NOTE: it considers block number from index entry as - * allocated block. Thus, index entries have to be consistent - * with leaves. - */ -ext4_lblk_t -ext4_ext_next_allocated_block(struct ext4_ext_path *path) -{ - int depth; - - depth = path->p_depth; - - if (depth == 0 && path->p_ext == NULL) - return EXT_MAX_BLOCKS; - - while (depth >= 0) { - if (depth == path->p_depth) { - /* leaf */ - if (path[depth].p_ext && - path[depth].p_ext != - EXT_LAST_EXTENT(path[depth].p_hdr)) - return le32_to_cpu(path[depth].p_ext[1].ee_block); - } else { - /* index */ - if (path[depth].p_idx != - EXT_LAST_INDEX(path[depth].p_hdr)) - return le32_to_cpu(path[depth].p_idx[1].ei_block); - } - depth--; - } - - return EXT_MAX_BLOCKS; -} - -/* - * ext4_ext_next_leaf_block: - * returns first allocated block from next leaf or EXT_MAX_BLOCKS - */ -static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path) -{ - int depth; - - BUG_ON(path == NULL); - depth = path->p_depth; - - /* zero-tree has no leaf blocks at all */ - if (depth == 0) - return EXT_MAX_BLOCKS; - - /* go to index block */ - depth--; - - while (depth >= 0) { - if (path[depth].p_idx != - EXT_LAST_INDEX(path[depth].p_hdr)) - return (ext4_lblk_t) - le32_to_cpu(path[depth].p_idx[1].ei_block); - depth--; - } - - return EXT_MAX_BLOCKS; -} - -/* - * ext4_ext_correct_indexes: - * if leaf gets modified and modified extent is first in the leaf, - * then we have to correct all indexes above. - * TODO: do we need to correct tree in all cases? - */ -static int ext4_ext_correct_indexes(void *icb, handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - struct ext4_extent_header *eh; - int depth = ext_depth(inode); - struct ext4_extent *ex; - __le32 border; - int k, err = 0; - - eh = path[depth].p_hdr; - ex = path[depth].p_ext; - - if (unlikely(ex == NULL || eh == NULL)) { - EXT4_ERROR_INODE(inode, - "ex %p == NULL or eh %p == NULL", ex, eh); - return -EIO; - } - - if (depth == 0) { - /* there is no tree at all */ - return 0; - } - - if (ex != EXT_FIRST_EXTENT(eh)) { - /* we correct tree if first leaf got modified only */ - return 0; - } - - /* - * TODO: we need correction if border is smaller than current one - */ - k = depth - 1; - border = path[depth].p_ext->ee_block; - err = ext4_ext_get_access(icb, handle, inode, path + k); - if (err) - return err; - path[k].p_idx->ei_block = border; - err = ext4_ext_dirty(icb, handle, inode, path + k); - if (err) - return err; - - while (k--) { - /* change all left-side indexes */ - if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) - break; - err = ext4_ext_get_access(icb, handle, inode, path + k); - if (err) - break; - path[k].p_idx->ei_block = border; - err = ext4_ext_dirty(icb, handle, inode, path + k); - if (err) - break; - } - - return err; -} - -int -ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, - struct ext4_extent *ex2) -{ - unsigned short ext1_ee_len, ext2_ee_len; - - /* - * Make sure that both extents are initialized. We don't merge - * unwritten extents so that we can be sure that end_io code has - * the extent that was written properly split out and conversion to - * initialized is trivial. - */ - if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2)) - return 0; - - ext1_ee_len = ext4_ext_get_actual_len(ex1); - ext2_ee_len = ext4_ext_get_actual_len(ex2); - - if (le32_to_cpu(ex1->ee_block) + ext1_ee_len != - le32_to_cpu(ex2->ee_block)) - return 0; - - /* - * To allow future support for preallocated extents to be added - * as an RO_COMPAT feature, refuse to merge to extents if - * this can result in the top bit of ee_len being set. - */ - if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) - return 0; - if (ext4_ext_is_unwritten(ex1) && - (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)) - return 0; -#ifdef AGGRESSIVE_TEST - if (ext1_ee_len >= 4) - return 0; -#endif - - if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) - return 1; - return 0; -} - -/* - * This function tries to merge the "ex" extent to the next extent in the tree. - * It always tries to merge towards right. If you want to merge towards - * left, pass "ex - 1" as argument instead of "ex". - * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns - * 1 if they got merged. - */ -static int ext4_ext_try_to_merge_right(struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *ex) -{ - struct ext4_extent_header *eh; - unsigned int depth, len; - int merge_done = 0, unwritten; - - depth = ext_depth(inode); - assert(path[depth].p_hdr != NULL); - eh = path[depth].p_hdr; - - while (ex < EXT_LAST_EXTENT(eh)) { - if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) - break; - /* merge with next extent! */ - unwritten = ext4_ext_is_unwritten(ex); - ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) - + ext4_ext_get_actual_len(ex + 1)); - if (unwritten) - ext4_ext_mark_unwritten(ex); - - if (ex + 1 < EXT_LAST_EXTENT(eh)) { - len = (EXT_LAST_EXTENT(eh) - ex - 1) - * sizeof(struct ext4_extent); - memmove(ex + 1, ex + 2, len); - } - le16_add_cpu(&eh->eh_entries, -1); - merge_done = 1; - if (!eh->eh_entries) - EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); - } - - return merge_done; -} - -/* - * This function does a very simple check to see if we can collapse - * an extent tree with a single extent tree leaf block into the inode. - */ -static void ext4_ext_try_to_merge_up(void *icb, handle_t *handle, - struct inode *inode, - struct ext4_ext_path *path) -{ - size_t s; - unsigned max_root = ext4_ext_space_root(inode, 0); - ext4_fsblk_t blk; - - if ((path[0].p_depth != 1) || - (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) || - (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root)) - return; - - /* - * We need to modify the block allocation bitmap and the block - * group descriptor to release the extent tree block. If we - * can't get the journal credits, give up. - */ - if (ext4_journal_extend(icb, handle, 2)) - return; - - /* - * Copy the extent data up to the inode - */ - blk = ext4_idx_pblock(path[0].p_idx); - s = le16_to_cpu(path[1].p_hdr->eh_entries) * - sizeof(struct ext4_extent_idx); - s += sizeof(struct ext4_extent_header); - - path[1].p_maxdepth = path[0].p_maxdepth; - memcpy(path[0].p_hdr, path[1].p_hdr, s); - path[0].p_depth = 0; - path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + - (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); - path[0].p_hdr->eh_max = cpu_to_le16(max_root); - - extents_brelse(path[1].p_bh); - ext4_free_blocks(icb, handle, inode, NULL, blk, 1, - EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); -} - -/* - * This function tries to merge the @ex extent to neighbours in the tree. - * return 1 if merge left else 0. - */ -static void ext4_ext_try_to_merge(void *icb, handle_t *handle, - struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *ex) { - struct ext4_extent_header *eh; - unsigned int depth; - int merge_done = 0; - - depth = ext_depth(inode); - BUG_ON(path[depth].p_hdr == NULL); - eh = path[depth].p_hdr; - - if (ex > EXT_FIRST_EXTENT(eh)) - merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); - - if (!merge_done) - (void) ext4_ext_try_to_merge_right(inode, path, ex); - - ext4_ext_try_to_merge_up(icb, handle, inode, path); -} - -/* - * ext4_ext_insert_extent: - * tries to merge requsted extent into the existing extent or - * inserts requested extent as new one into the tree, - * creating new leaf in the no-space case. - */ -int ext4_ext_insert_extent(void *icb, handle_t *handle, struct inode *inode, - struct ext4_ext_path **ppath, - struct ext4_extent *newext, - int gb_flags) -{ - struct ext4_ext_path *path = *ppath; - struct ext4_extent_header *eh; - struct ext4_extent *ex, *fex; - struct ext4_extent *nearex; /* nearest extent */ - struct ext4_ext_path *npath = NULL; - int depth, len, err; - ext4_lblk_t next; - int mb_flags = 0, unwritten; - - if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) - mb_flags |= EXT4_MB_DELALLOC_RESERVED; - if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { - EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); - return -EIO; - } - depth = ext_depth(inode); - ex = path[depth].p_ext; - eh = path[depth].p_hdr; - if (unlikely(path[depth].p_hdr == NULL)) { - EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - return -EIO; - } - - /* try to insert block into found extent and return */ - if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) { - - /* - * Try to see whether we should rather test the extent on - * right from ex, or from the left of ex. This is because - * ext4_find_extent() can return either extent on the - * left, or on the right from the searched position. This - * will make merging more effective. - */ - if (ex < EXT_LAST_EXTENT(eh) && - (le32_to_cpu(ex->ee_block) + - ext4_ext_get_actual_len(ex) < - le32_to_cpu(newext->ee_block))) { - ex += 1; - goto prepend; - } else if ((ex > EXT_FIRST_EXTENT(eh)) && - (le32_to_cpu(newext->ee_block) + - ext4_ext_get_actual_len(newext) < - le32_to_cpu(ex->ee_block))) - ex -= 1; - - /* Try to append newex to the ex */ - if (ext4_can_extents_be_merged(inode, ex, newext)) { - ext_debug("append [%d]%d block to %u:[%d]%d" - "(from %llu)\n", - ext4_ext_is_unwritten(newext), - ext4_ext_get_actual_len(newext), - le32_to_cpu(ex->ee_block), - ext4_ext_is_unwritten(ex), - ext4_ext_get_actual_len(ex), - ext4_ext_pblock(ex)); - err = ext4_ext_get_access(icb, handle, inode, - path + depth); - if (err) - return err; - unwritten = ext4_ext_is_unwritten(ex); - ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) - + ext4_ext_get_actual_len(newext)); - if (unwritten) - ext4_ext_mark_unwritten(ex); - eh = path[depth].p_hdr; - nearex = ex; - goto merge; - } - -prepend: - /* Try to prepend newex to the ex */ - if (ext4_can_extents_be_merged(inode, newext, ex)) { - ext_debug("prepend %u[%d]%d block to %u:[%d]%d" - "(from %llu)\n", - le32_to_cpu(newext->ee_block), - ext4_ext_is_unwritten(newext), - ext4_ext_get_actual_len(newext), - le32_to_cpu(ex->ee_block), - ext4_ext_is_unwritten(ex), - ext4_ext_get_actual_len(ex), - ext4_ext_pblock(ex)); - err = ext4_ext_get_access(icb, handle, inode, - path + depth); - if (err) - return err; - - unwritten = ext4_ext_is_unwritten(ex); - ex->ee_block = newext->ee_block; - ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); - ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) - + ext4_ext_get_actual_len(newext)); - if (unwritten) - ext4_ext_mark_unwritten(ex); - eh = path[depth].p_hdr; - nearex = ex; - goto merge; - } - } - - depth = ext_depth(inode); - eh = path[depth].p_hdr; - if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) - goto has_space; - - /* probably next leaf has space for us? */ - fex = EXT_LAST_EXTENT(eh); - next = EXT_MAX_BLOCKS; - if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) - next = ext4_ext_next_leaf_block(path); - if (next != EXT_MAX_BLOCKS) { - ext_debug("next leaf block - %u\n", next); - BUG_ON(npath != NULL); - npath = ext4_find_extent(inode, next, NULL, 0); - if (IS_ERR(npath)) - return PTR_ERR(npath); - BUG_ON(npath->p_depth != path->p_depth); - eh = npath[depth].p_hdr; - if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { - ext_debug("next leaf isn't full(%d)\n", - le16_to_cpu(eh->eh_entries)); - path = npath; - goto has_space; - } - ext_debug("next leaf has no free space(%d,%d)\n", - le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); - } - - /* - * There is no free space in the found leaf. - * We're gonna add a new leaf in the tree. - */ - if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) - mb_flags |= EXT4_MB_USE_RESERVED; - err = ext4_ext_create_new_leaf(icb, handle, inode, mb_flags, gb_flags, - ppath, newext); - if (err) - goto cleanup; - depth = ext_depth(inode); - eh = path[depth].p_hdr; - -has_space: - nearex = path[depth].p_ext; - - err = ext4_ext_get_access(icb, handle, inode, path + depth); - if (err) - goto cleanup; - - if (!nearex) { - /* there is no extent in this leaf, create first one */ - ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n", - le32_to_cpu(newext->ee_block), - ext4_ext_pblock(newext), - ext4_ext_is_unwritten(newext), - ext4_ext_get_actual_len(newext)); - nearex = EXT_FIRST_EXTENT(eh); - } else { - if (le32_to_cpu(newext->ee_block) - > le32_to_cpu(nearex->ee_block)) { - /* Insert after */ - ext_debug("insert %u:%llu:[%d]%d before: " - "nearest %p\n", - le32_to_cpu(newext->ee_block), - ext4_ext_pblock(newext), - ext4_ext_is_unwritten(newext), - ext4_ext_get_actual_len(newext), - nearex); - nearex++; - } else { - /* Insert before */ - BUG_ON(newext->ee_block == nearex->ee_block); - ext_debug("insert %u:%llu:[%d]%d after: " - "nearest %p\n", - le32_to_cpu(newext->ee_block), - ext4_ext_pblock(newext), - ext4_ext_is_unwritten(newext), - ext4_ext_get_actual_len(newext), - nearex); - } - len = EXT_LAST_EXTENT(eh) - nearex + 1; - if (len > 0) { - ext_debug("insert %u:%llu:[%d]%d: " - "move %d extents from 0x%p to 0x%p\n", - le32_to_cpu(newext->ee_block), - ext4_ext_pblock(newext), - ext4_ext_is_unwritten(newext), - ext4_ext_get_actual_len(newext), - len, nearex, nearex + 1); - memmove(nearex + 1, nearex, - len * sizeof(struct ext4_extent)); - } - } - - le16_add_cpu(&eh->eh_entries, 1); - path[depth].p_ext = nearex; - nearex->ee_block = newext->ee_block; - ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); - nearex->ee_len = newext->ee_len; - -merge: - /* try to merge extents */ - if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) - ext4_ext_try_to_merge(icb, handle, inode, path, nearex); - - - /* time to correct all indexes above */ - err = ext4_ext_correct_indexes(icb, handle, inode, path); - if (err) - goto cleanup; - - err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); - -cleanup: - if (npath) { - ext4_ext_drop_refs(npath); - kfree(npath); - } - return err; -} - -static inline int get_default_free_blocks_flags(struct inode *inode) -{ - return 0; -} - -/* FIXME!! we need to try to merge to left or right after zero-out */ -static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) -{ - ext4_fsblk_t ee_pblock; - unsigned int ee_len; - int ret; - - ee_len = ext4_ext_get_actual_len(ex); - ee_pblock = ext4_ext_pblock(ex); - - ret = 0; - - return ret; -} - -static int ext4_remove_blocks(void *icb, handle_t *handle, struct inode *inode, - struct ext4_extent *ex, - unsigned long from, unsigned long to) -{ - struct buffer_head *bh; - int i; - - if (from >= le32_to_cpu(ex->ee_block) - && to == le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1) { - /* tail removal */ - unsigned long num, start; - num = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - from; - start = ext4_ext_pblock(ex) + ext4_ext_get_actual_len(ex) - num; - ext4_free_blocks(icb, handle, inode, NULL, start, num, 0); - } else if (from == le32_to_cpu(ex->ee_block) - && to <= le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1) { - } else { - } - return 0; -} - -/* - * routine removes index from the index block - * it's used in truncate case only. thus all requests are for - * last index in the block only - */ -int ext4_ext_rm_idx(void *icb, handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - int err; - ext4_fsblk_t leaf; - - /* free index block */ - path--; - leaf = ext4_idx_pblock(path->p_idx); - BUG_ON(path->p_hdr->eh_entries == 0); - if ((err = ext4_ext_get_access(icb, handle, inode, path))) - return err; - path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); - if ((err = ext4_ext_dirty(icb, handle, inode, path))) - return err; - ext4_free_blocks(icb, handle, inode, NULL, leaf, 1, 0); - return err; -} - -static int -ext4_ext_rm_leaf(void *icb, handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, unsigned long start) -{ - int err = 0, correct_index = 0; - int depth = ext_depth(inode), credits; - struct ext4_extent_header *eh; - unsigned a, b, block, num; - unsigned long ex_ee_block; - unsigned short ex_ee_len; - struct ext4_extent *ex; - - /* the header must be checked already in ext4_ext_remove_space() */ - if (!path[depth].p_hdr) - path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); - eh = path[depth].p_hdr; - BUG_ON(eh == NULL); - - /* find where to start removing */ - ex = EXT_LAST_EXTENT(eh); - - ex_ee_block = le32_to_cpu(ex->ee_block); - ex_ee_len = ext4_ext_get_actual_len(ex); - - while (ex >= EXT_FIRST_EXTENT(eh) && - ex_ee_block + ex_ee_len > start) { - path[depth].p_ext = ex; - - a = ex_ee_block > start ? ex_ee_block : start; - b = (unsigned long long)ex_ee_block + ex_ee_len - 1 < - EXT_MAX_BLOCKS ? ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCKS; - - - if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { - block = 0; - num = 0; - BUG(); - } else if (a != ex_ee_block) { - /* remove tail of the extent */ - block = ex_ee_block; - num = a - block; - } else if (b != ex_ee_block + ex_ee_len - 1) { - /* remove head of the extent */ - block = a; - num = b - a; - /* there is no "make a hole" API yet */ - BUG(); - } else { - /* remove whole extent: excellent! */ - block = ex_ee_block; - num = 0; - BUG_ON(a != ex_ee_block); - BUG_ON(b != ex_ee_block + ex_ee_len - 1); - } - - /* at present, extent can't cross block group */ - /* leaf + bitmap + group desc + sb + inode */ - credits = 5; - if (ex == EXT_FIRST_EXTENT(eh)) { - correct_index = 1; - credits += (ext_depth(inode)) + 1; - } - - /*handle = ext4_ext_journal_restart(icb, handle, credits);*/ - /*if (IS_ERR(icb, handle)) {*/ - /*err = PTR_ERR(icb, handle);*/ - /*goto out;*/ - /*}*/ - - err = ext4_ext_get_access(icb, handle, inode, path + depth); - if (err) - goto out; - - err = ext4_remove_blocks(icb, handle, inode, ex, a, b); - if (err) - goto out; - - if (num == 0) { - /* this extent is removed entirely mark slot unused */ - ext4_ext_store_pblock(ex, 0); - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); - } - - ex->ee_block = cpu_to_le32(block); - ex->ee_len = cpu_to_le16(num); - - err = ext4_ext_dirty(icb, handle, inode, path + depth); - if (err) - goto out; - - ex--; - ex_ee_block = le32_to_cpu(ex->ee_block); - ex_ee_len = ext4_ext_get_actual_len(ex); - } - - if (correct_index && eh->eh_entries) - err = ext4_ext_correct_indexes(icb, handle, inode, path); - - /* if this leaf is free, then we should - * remove it from index block above */ - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) - err = ext4_ext_rm_idx(icb, handle, inode, path + depth); - -out: - return err; -} - -/* - * ext4_split_extent_at() splits an extent at given block. - * - * @handle: the journal handle - * @inode: the file inode - * @path: the path to the extent - * @split: the logical block where the extent is splitted. - * @split_flags: indicates if the extent could be zeroout if split fails, and - * the states(init or unwritten) of new extents. - * @flags: flags used to insert new extent to extent tree. - * - * - * Splits extent [a, b] into two extents [a, @split) and [@split, b], states - * of which are deterimined by split_flag. - * - * There are two cases: - * a> the extent are splitted into two extent. - * b> split is not needed, and just mark the extent. - * - * return 0 on success. - */ -static int ext4_split_extent_at(void *icb, handle_t *handle, - struct inode *inode, - struct ext4_ext_path **ppath, - ext4_lblk_t split, - int split_flag, - int flags) -{ - struct ext4_ext_path *path = *ppath; - ext4_fsblk_t newblock; - ext4_lblk_t ee_block; - struct ext4_extent *ex, newex, orig_ex, zero_ex; - struct ext4_extent *ex2 = NULL; - unsigned int ee_len, depth; - int err = 0; - - ext4_ext_show_leaf(inode, path); - - depth = ext_depth(inode); - ex = path[depth].p_ext; - ee_block = le32_to_cpu(ex->ee_block); - ee_len = ext4_ext_get_actual_len(ex); - newblock = split - ee_block + ext4_ext_pblock(ex); - - BUG_ON(split < ee_block || split >= (ee_block + ee_len)); - - err = ext4_ext_get_access(icb, handle, inode, path + depth); - if (err) - goto out; - - if (split == ee_block) { - /* - * case b: block @split is the block that the extent begins with - * then we just change the state of the extent, and splitting - * is not needed. - */ - if (split_flag & EXT4_EXT_MARK_UNWRIT2) - ext4_ext_mark_unwritten(ex); - else - ext4_ext_mark_initialized(ex); - - if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) - ext4_ext_try_to_merge(icb, handle, inode, path, ex); - - err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); - goto out; - } - - /* case a */ - memcpy(&orig_ex, ex, sizeof(orig_ex)); - ex->ee_len = cpu_to_le16(split - ee_block); - if (split_flag & EXT4_EXT_MARK_UNWRIT1) - ext4_ext_mark_unwritten(ex); - - /* - * path may lead to new leaf, not to original leaf any more - * after ext4_ext_insert_extent() returns, - */ - err = ext4_ext_dirty(icb, handle, inode, path + depth); - if (err) - goto fix_extent_len; - - ex2 = &newex; - ex2->ee_block = cpu_to_le32(split); - ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); - ext4_ext_store_pblock(ex2, newblock); - if (split_flag & EXT4_EXT_MARK_UNWRIT2) - ext4_ext_mark_unwritten(ex2); - - err = ext4_ext_insert_extent(icb, handle, inode, ppath, &newex, flags); - if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { - if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { - if (split_flag & EXT4_EXT_DATA_VALID1) { - err = ext4_ext_zeroout(inode, ex2); - zero_ex.ee_block = ex2->ee_block; - zero_ex.ee_len = cpu_to_le16( - ext4_ext_get_actual_len(ex2)); - ext4_ext_store_pblock(&zero_ex, - ext4_ext_pblock(ex2)); - } else { - err = ext4_ext_zeroout(inode, ex); - zero_ex.ee_block = ex->ee_block; - zero_ex.ee_len = cpu_to_le16( - ext4_ext_get_actual_len(ex)); - ext4_ext_store_pblock(&zero_ex, - ext4_ext_pblock(ex)); - } - } else { - err = ext4_ext_zeroout(inode, &orig_ex); - zero_ex.ee_block = orig_ex.ee_block; - zero_ex.ee_len = cpu_to_le16( - ext4_ext_get_actual_len(&orig_ex)); - ext4_ext_store_pblock(&zero_ex, - ext4_ext_pblock(&orig_ex)); - } - - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_len = cpu_to_le16(ee_len); - ext4_ext_try_to_merge(icb, handle, inode, path, ex); - err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); - if (err) - goto fix_extent_len; - - goto out; - } else if (err) - goto fix_extent_len; - -out: - ext4_ext_show_leaf(inode, path); - return err; - -fix_extent_len: - ex->ee_len = orig_ex.ee_len; - ext4_ext_dirty(icb, handle, inode, path + path->p_depth); - return err; -} - -/* - * returns 1 if current index have to be freed (even partial) - */ -static int inline -ext4_ext_more_to_rm(struct ext4_ext_path *path) -{ - BUG_ON(path->p_idx == NULL); - - if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) - return 0; - - /* - * if truncate on deeper level happened it it wasn't partial - * so we have to consider current index for truncation - */ - if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) - return 0; - return 1; -} - -int ext4_ext_remove_space(void *icb, struct inode *inode, unsigned long start) -{ - struct super_block *sb = inode->i_sb; - int depth = ext_depth(inode); - struct ext4_ext_path *path; - handle_t *handle = NULL; - int i = 0, err = 0; - - /* probably first extent we're gonna free will be last in block */ - /*handle = ext4_journal_start(inode, depth + 1);*/ - /*if (IS_ERR(icb, handle))*/ - /*return PTR_ERR(icb, handle);*/ - - /* - * we start scanning from right side freeing all the blocks - * after i_size and walking into the deep - */ - path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); - if (path == NULL) { - ext4_journal_stop(icb, handle); - return -ENOMEM; - } - memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1)); - path[0].p_hdr = ext_inode_hdr(inode); - if (ext4_ext_check_inode(inode)) { - err = -EIO; - goto out; - } - path[0].p_depth = depth; - - while (i >= 0 && err == 0) { - if (i == depth) { - /* this is leaf block */ - err = ext4_ext_rm_leaf(icb, handle, inode, path, start); - /* root level have p_bh == NULL, extents_brelse() eats this */ - extents_brelse(path[i].p_bh); - path[i].p_bh = NULL; - i--; - continue; - } - - /* this is index block */ - if (!path[i].p_hdr) { - path[i].p_hdr = ext_block_hdr(path[i].p_bh); - } - - if (!path[i].p_idx) { - /* this level hasn't touched yet */ - path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); - path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; - } else { - /* we've already was here, see at next index */ - path[i].p_idx--; - } - - if (ext4_ext_more_to_rm(path + i)) { - struct buffer_head *bh; - /* go to the next level */ - memset(path + i + 1, 0, sizeof(*path)); - bh = read_extent_tree_block(inode, ext4_idx_pblock(path[i].p_idx), path[0].p_depth - (i + 1), 0); - if (IS_ERR(bh)) { - /* should we reset i_size? */ - err = -EIO; - break; - } - path[i+1].p_bh = bh; - - /* put actual number of indexes to know is this - * number got changed at the next iteration */ - path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); - i++; - } else { - /* we finish processing this index, go up */ - if (path[i].p_hdr->eh_entries == 0 && i > 0) { - /* index is empty, remove it - * handle must be already prepared by the - * truncatei_leaf() */ - err = ext4_ext_rm_idx(icb, handle, inode, path + i); - } - /* root level have p_bh == NULL, extents_brelse() eats this */ - extents_brelse(path[i].p_bh); - path[i].p_bh = NULL; - i--; - } - } - - /* TODO: flexible tree reduction should be here */ - if (path->p_hdr->eh_entries == 0) { - /* - * truncate to zero freed all the tree - * so, we need to correct eh_depth - */ - err = ext4_ext_get_access(icb, handle, inode, path); - if (err == 0) { - ext_inode_hdr(inode)->eh_depth = 0; - ext_inode_hdr(inode)->eh_max = - cpu_to_le16(ext4_ext_space_root(inode, 0)); - err = ext4_ext_dirty(icb, handle, inode, path); - } - } -out: - if (path) { - ext4_ext_drop_refs(path); - kfree(path); - } - ext4_journal_stop(icb, handle); - - return err; -} - -int ext4_ext_tree_init(void *icb, handle_t *handle, struct inode *inode) -{ - struct ext4_extent_header *eh; - - eh = ext_inode_hdr(inode); - eh->eh_depth = 0; - eh->eh_entries = 0; - eh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); - eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); - ext4_mark_inode_dirty(icb, handle, inode); - return 0; -} - -/* - * called at mount time - */ -void ext4_ext_init(struct super_block *sb) -{ - /* - * possible initialization would be here - */ -} - -static int ext4_ext_convert_to_initialized ( - void *icb, - handle_t *handle, - struct inode *inode, - struct ext4_ext_path **ppath, - ext4_lblk_t split, - unsigned long blocks, - int flags) -{ - int depth = ext_depth(inode), err; - struct ext4_extent *ex = (*ppath)[depth].p_ext; - - assert (le32_to_cpu(ex->ee_block) <= split); - - if (split + blocks == le32_to_cpu(ex->ee_block) + - ext4_ext_get_actual_len(ex)) { - - /* split and initialize right part */ - err = ext4_split_extent_at(icb, handle, inode, ppath, split, - EXT4_EXT_MARK_UNWRIT1, flags); - - } else if (le32_to_cpu(ex->ee_block) == split) { - - /* split and initialize left part */ - err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks, - EXT4_EXT_MARK_UNWRIT2, flags); - - } else { - - /* split 1 extent to 3 and initialize the 2nd */ - err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks, - EXT4_EXT_MARK_UNWRIT1 | - EXT4_EXT_MARK_UNWRIT2, flags); - if (0 == err) { - err = ext4_split_extent_at(icb, handle, inode, ppath, split, - EXT4_EXT_MARK_UNWRIT1, flags); - } - } - - return err; -} - -int ext4_ext_get_blocks(void *icb, handle_t *handle, struct inode *inode, ext4_fsblk_t iblock, - unsigned long max_blocks, struct buffer_head *bh_result, - int create, int flags) -{ - struct ext4_ext_path *path = NULL; - struct ext4_extent newex, *ex; - int goal, err = 0, depth; - unsigned long allocated = 0; - ext4_fsblk_t next, newblock; - - clear_buffer_new(bh_result); - /*mutex_lock(&ext4_I(inode)->truncate_mutex);*/ - - /* find extent for this block */ - path = ext4_find_extent(inode, iblock, NULL, 0); - if (IS_ERR(path)) { - err = PTR_ERR(path); - path = NULL; - goto out2; - } - - depth = ext_depth(inode); - - /* - * consistent leaf must not be empty - * this situations is possible, though, _during_ tree modification - * this is why assert can't be put in ext4_ext_find_extent() - */ - BUG_ON(path[depth].p_ext == NULL && depth != 0); - - if ((ex = path[depth].p_ext)) { - ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); - ext4_fsblk_t ee_start = ext4_ext_pblock(ex); - unsigned short ee_len = ext4_ext_get_actual_len(ex); - /* if found exent covers block, simple return it */ - if (iblock >= ee_block && iblock < ee_block + ee_len) { - - /* number of remain blocks in the extent */ - allocated = ee_len + ee_block - iblock; - - if (ext4_ext_is_unwritten(ex)) { - if (create) { - newblock = iblock - ee_block + ee_start; - err = ext4_ext_convert_to_initialized ( - icb, handle, - inode, - &path, - iblock, - allocated, - flags); - if (err) - goto out2; - - } else { - newblock = 0; - } - } else { - newblock = iblock - ee_block + ee_start; - } - goto out; - } - } - - /* - * requested block isn't allocated yet - * we couldn't try to create block if create flag is zero - */ - if (!create) { - goto out2; - } - - /* find next allocated block so that we know how many - * blocks we can allocate without ovelapping next extent */ - next = ext4_ext_next_allocated_block(path); - BUG_ON(next <= iblock); - allocated = next - iblock; - if (flags & EXT4_GET_BLOCKS_PRE_IO && max_blocks > EXT_UNWRITTEN_MAX_LEN) - max_blocks = EXT_UNWRITTEN_MAX_LEN; - if (allocated > max_blocks) - allocated = max_blocks; - - /* allocate new block */ - goal = ext4_ext_find_goal(inode, path, iblock); - - newblock = ext4_new_meta_blocks(icb, handle, inode, goal, 0, - &allocated, &err); - if (!newblock) - goto out2; - - /* try to insert new extent into found leaf and return */ - newex.ee_block = cpu_to_le32(iblock); - ext4_ext_store_pblock(&newex, newblock); - newex.ee_len = cpu_to_le16(allocated); - /* if it's fallocate, mark ex as unwritten */ - if (flags & EXT4_GET_BLOCKS_PRE_IO) { - ext4_ext_mark_unwritten(&newex); - } - err = ext4_ext_insert_extent(icb, handle, inode, &path, &newex, - flags & EXT4_GET_BLOCKS_PRE_IO); - - if (err) { - /* free data blocks we just allocated */ - ext4_free_blocks(icb, handle, inode, NULL, ext4_ext_pblock(&newex), - le16_to_cpu(newex.ee_len), get_default_free_blocks_flags(inode)); - goto out2; - } - - ext4_mark_inode_dirty(icb, handle, inode); - - /* previous routine could use block we allocated */ - if (ext4_ext_is_unwritten(&newex)) - newblock = 0; - else - newblock = ext4_ext_pblock(&newex); - - set_buffer_new(bh_result); - -out: - if (allocated > max_blocks) - allocated = max_blocks; - - ext4_ext_show_leaf(inode, path); - set_buffer_mapped(bh_result); - bh_result->b_bdev = inode->i_sb->s_bdev; - bh_result->b_blocknr = newblock; -out2: - if (path) { - ext4_ext_drop_refs(path); - kfree(path); - } - /*mutex_unlock(&ext4_I(inode)->truncate_mutex);*/ - - return err ? err : allocated; -} - -int ext4_ext_truncate(void *icb, struct inode *inode, unsigned long start) -{ - int ret = ext4_ext_remove_space(icb, inode, start); - - /* Save modifications on i_blocks field of the inode. */ - if (!ret) - ret = ext4_mark_inode_dirty(icb, NULL, inode); - - return ret; -} - -#pragma warning(pop) - +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + +#include "ext2fs.h" +#include "linux\ext4.h" + +#pragma warning(push) +#pragma warning(disable: 4018) +#pragma warning(disable: 4242) +#pragma warning(disable: 4244) + + +/* + * used by extent splitting. + */ +#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ + due to ENOSPC */ +#define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */ +#define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */ + +#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ +#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ + +#define CONFIG_EXTENT_TEST +#ifdef CONFIG_EXTENT_TEST + +#define ext4_mark_inode_dirty(icb, handle, n) ext3_mark_inode_dirty(icb, n) +static inline ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode) +{ + PEXT2_VCB Vcb; + Vcb = inode->i_sb->s_priv; + return (inode->i_ino - 1) / BLOCKS_PER_GROUP; +} + +static ext4_fsblk_t ext4_new_meta_blocks(void *icb, handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, + unsigned int flags, + unsigned long *count, int *errp) +{ + NTSTATUS status; + ULONG blockcnt = (count)?*count:1; + ULONG block = 0; + + status = Ext2NewBlock((PEXT2_IRP_CONTEXT)icb, + inode->i_sb->s_priv, + 0, goal, + &block, + &blockcnt); + if (count) + *count = blockcnt; + + if (!NT_SUCCESS(status)) { + *errp = Ext2LinuxError(status); + return 0; + } + inode->i_blocks += (blockcnt * (inode->i_sb->s_blocksize >> 9)); + return block; +} + +static void ext4_free_blocks(void *icb, handle_t *handle, struct inode *inode, void *fake, + ext4_fsblk_t block, int count, int flags) +{ + Ext2FreeBlock((PEXT2_IRP_CONTEXT)icb, inode->i_sb->s_priv, block, count); + inode->i_blocks -= count * (inode->i_sb->s_blocksize >> 9); + return; +} + +static inline void ext_debug(char *str, ...) +{ +} +#if TRUE +#define EXT4_ERROR_INODE(inode, str, ...) do { \ + DbgPrint("inode[%p]: "##str "\n", inode, __VA_ARGS__); \ + } while(0) +#else +#define EXT4_ERROR_INODE +#endif + +#define ext4_std_error(s, err) +#define assert ASSERT + +#endif + +/* + * Return the right sibling of a tree node(either leaf or indexes node) + */ + +#define EXT_MAX_BLOCKS 0xffffffff + + +static inline int ext4_ext_space_block(struct inode *inode, int check) +{ + int size; + + size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) + / sizeof(struct ext4_extent); +#ifdef AGGRESSIVE_TEST + if (!check && size > 6) + size = 6; +#endif + return size; +} + +static inline int ext4_ext_space_block_idx(struct inode *inode, int check) +{ + int size; + + size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) + / sizeof(struct ext4_extent_idx); +#ifdef AGGRESSIVE_TEST + if (!check && size > 5) + size = 5; +#endif + return size; +} + +static inline int ext4_ext_space_root(struct inode *inode, int check) +{ + int size; + + size = sizeof(EXT4_I(inode)->i_block); + size -= sizeof(struct ext4_extent_header); + size /= sizeof(struct ext4_extent); +#ifdef AGGRESSIVE_TEST + if (!check && size > 3) + size = 3; +#endif + return size; +} + +static inline int ext4_ext_space_root_idx(struct inode *inode, int check) +{ + int size; + + size = sizeof(EXT4_I(inode)->i_block); + size -= sizeof(struct ext4_extent_header); + size /= sizeof(struct ext4_extent_idx); +#ifdef AGGRESSIVE_TEST + if (!check && size > 4) + size = 4; +#endif + return size; +} + +static int +ext4_ext_max_entries(struct inode *inode, int depth) +{ + int max; + + if (depth == ext_depth(inode)) { + if (depth == 0) + max = ext4_ext_space_root(inode, 1); + else + max = ext4_ext_space_root_idx(inode, 1); + } else { + if (depth == 0) + max = ext4_ext_space_block(inode, 1); + else + max = ext4_ext_space_block_idx(inode, 1); + } + + return max; +} + +static int __ext4_ext_check(const char *function, unsigned int line, + struct inode *inode, + struct ext4_extent_header *eh, int depth, + ext4_fsblk_t pblk); + +/* + * read_extent_tree_block: + * Get a buffer_head by extents_bread, and read fresh data from the storage. + */ +static struct buffer_head * +__read_extent_tree_block(const char *function, unsigned int line, + struct inode *inode, ext4_fsblk_t pblk, int depth, + int flags) +{ + struct buffer_head *bh; + int err; + + bh = extents_bread(inode->i_sb, pblk); + if (!bh) + return ERR_PTR(-ENOMEM); + + if (!buffer_uptodate(bh)) { + err = -EIO; + goto errout; + } + if (buffer_verified(bh)) + return bh; + err = __ext4_ext_check(function, line, inode, + ext_block_hdr(bh), depth, pblk); + if (err) + goto errout; + set_buffer_verified(bh); + return bh; +errout: + extents_brelse(bh); + return ERR_PTR(err); + +} + +#define read_extent_tree_block(inode, pblk, depth, flags) \ + __read_extent_tree_block("", __LINE__, (inode), (pblk), \ + (depth), (flags)) + +#define ext4_ext_check(inode, eh, depth, pblk) \ + __ext4_ext_check("", __LINE__, (inode), (eh), (depth), (pblk)) + +int ext4_ext_check_inode(struct inode *inode) +{ + return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); +} + +static int ext4_split_extent_at(void *icb, + handle_t *handle, + struct inode *inode, + struct ext4_ext_path **ppath, + ext4_lblk_t split, + int split_flag, + int flags); + +static inline int +ext4_force_split_extent_at(void *icb, handle_t *handle, struct inode *inode, + struct ext4_ext_path **ppath, ext4_lblk_t lblk, + int nofail) +{ + struct ext4_ext_path *path = *ppath; + int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); + + return ext4_split_extent_at(icb, handle, inode, ppath, lblk, unwritten ? + EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, + EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | + (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); +} + +/* + * could return: + * - EROFS + * - ENOMEM + */ + +static int ext4_ext_get_access(void *icb, handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) +{ + if (path->p_bh) { + /* path points to block */ + + return ext4_journal_get_write_access(icb, handle, path->p_bh); + + } + /* path points to leaf/index in inode body */ + /* we use in-core data, no need to protect them */ + return 0; +} + + +static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t block) +{ + if (path) { + int depth = path->p_depth; + struct ext4_extent *ex; + + /* + * Try to predict block placement assuming that we are + * filling in a file which will eventually be + * non-sparse --- i.e., in the case of libbfd writing + * an ELF object sections out-of-order but in a way + * the eventually results in a contiguous object or + * executable file, or some database extending a table + * space file. However, this is actually somewhat + * non-ideal if we are writing a sparse file such as + * qemu or KVM writing a raw image file that is going + * to stay fairly sparse, since it will end up + * fragmenting the file system's free space. Maybe we + * should have some hueristics or some way to allow + * userspace to pass a hint to file system, + * especially if the latter case turns out to be + * common. + */ + ex = path[depth].p_ext; + if (ex) { + ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex); + ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block); + + if (block > ext_block) + return ext_pblk + (block - ext_block); + else + return ext_pblk - (ext_block - block); + } + + /* it looks like index is empty; + * try to find starting block from index itself */ + if (path[depth].p_bh) + return path[depth].p_bh->b_blocknr; + } + + /* OK. use inode's group */ + return ext4_inode_to_goal_block(inode); +} + +/* + * Allocation for a meta data block + */ +static ext4_fsblk_t +ext4_ext_new_meta_block(void *icb, handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *ex, int *err, unsigned int flags) +{ + ext4_fsblk_t goal, newblock; + + goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); + newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags, + NULL, err); + return newblock; +} + +int __ext4_ext_dirty(const char *where, unsigned int line, + void *icb, handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path) +{ + int err; + + if (path->p_bh) { + ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); + /* path points to block */ + err = __ext4_handle_dirty_metadata(where, line, icb, handle, inode, path->p_bh); + } else { + /* path points to leaf/index in inode body */ + err = ext4_mark_inode_dirty(icb, handle, inode); + } + return err; +} + +void ext4_ext_drop_refs(struct ext4_ext_path *path) +{ + int depth, i; + + if (!path) + return; + depth = path->p_depth; + for (i = 0; i <= depth; i++, path++) + if (path->p_bh) { + extents_brelse(path->p_bh); + path->p_bh = NULL; + } +} + +/* + * Check that whether the basic information inside the extent header + * is correct or not. + */ +static int __ext4_ext_check(const char *function, unsigned int line, + struct inode *inode, + struct ext4_extent_header *eh, int depth, + ext4_fsblk_t pblk) +{ + struct ext4_extent_tail *tail; + const char *error_msg; + int max = 0; + + if (eh->eh_magic != EXT4_EXT_MAGIC) { + error_msg = "invalid magic"; + goto corrupted; + } + if (le16_to_cpu(eh->eh_depth) != depth) { + error_msg = "unexpected eh_depth"; + goto corrupted; + } + if (eh->eh_max == 0) { + error_msg = "invalid eh_max"; + goto corrupted; + } + if (eh->eh_entries > eh->eh_max) { + error_msg = "invalid eh_entries"; + goto corrupted; + } + + tail = find_ext4_extent_tail(eh); + if (!ext4_extent_block_csum_verify(inode, eh)) { + ext_debug("Warning: extent checksum damaged?\n"); + } + + return 0; + +corrupted: + ext_debug("corrupted! %s\n", error_msg); + return -EIO; +} + +/* + * ext4_ext_binsearch_idx: + * binary search for the closest index of the given block + * the header must be checked before calling this + */ +static void +ext4_ext_binsearch_idx(struct inode *inode, + struct ext4_ext_path *path, ext4_lblk_t block) +{ + struct ext4_extent_header *eh = path->p_hdr; + struct ext4_extent_idx *r, *l, *m; + + ext_debug("binsearch for %u(idx): ", block); + + l = EXT_FIRST_INDEX(eh) + 1; + r = EXT_LAST_INDEX(eh); + while (l <= r) { + m = l + (r - l) / 2; + if (block < (m->ei_block)) + r = m - 1; + else + l = m + 1; + ext_debug("%p(%u):%p(%u):%p(%u) ", l, (l->ei_block), + m, (m->ei_block), + r, (r->ei_block)); + } + + path->p_idx = l - 1; + ext_debug(" -> %u->%lld ", (path->p_idx->ei_block), + ext4_idx_pblock(path->p_idx)); + +#ifdef CHECK_BINSEARCH + { + struct ext4_extent_idx *chix, *ix; + int k; + + chix = ix = EXT_FIRST_INDEX(eh); + for (k = 0; k < (eh->eh_entries); k++, ix++) { + if (k != 0 && + (ix->ei_block) <= (ix[-1].ei_block)) { + printk(KERN_DEBUG "k=%d, ix=0x%p, " + "first=0x%p\n", k, + ix, EXT_FIRST_INDEX(eh)); + printk(KERN_DEBUG "%u <= %u\n", + (ix->ei_block), + (ix[-1].ei_block)); + } + BUG_ON(k && (ix->ei_block) + <= (ix[-1].ei_block)); + if (block < (ix->ei_block)) + break; + chix = ix; + } + BUG_ON(chix != path->p_idx); + } +#endif + +} + +/* + * ext4_ext_binsearch: + * binary search for closest extent of the given block + * the header must be checked before calling this + */ +static void +ext4_ext_binsearch(struct inode *inode, + struct ext4_ext_path *path, ext4_lblk_t block) +{ + struct ext4_extent_header *eh = path->p_hdr; + struct ext4_extent *r, *l, *m; + + if (eh->eh_entries == 0) { + /* + * this leaf is empty: + * we get such a leaf in split/add case + */ + return; + } + + ext_debug("binsearch for %u: ", block); + + l = EXT_FIRST_EXTENT(eh) + 1; + r = EXT_LAST_EXTENT(eh); + + while (l <= r) { + m = l + (r - l) / 2; + if (block < m->ee_block) + r = m - 1; + else + l = m + 1; + ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block, + m, (m->ee_block), + r, (r->ee_block)); + } + + path->p_ext = l - 1; + ext_debug(" -> %d:%llu:[%d]%d ", + (path->p_ext->ee_block), + ext4_ext_pblock(path->p_ext), + ext4_ext_is_unwritten(path->p_ext), + ext4_ext_get_actual_len(path->p_ext)); + +#ifdef CHECK_BINSEARCH + { + struct ext4_extent *chex, *ex; + int k; + + chex = ex = EXT_FIRST_EXTENT(eh); + for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { + BUG_ON(k && (ex->ee_block) + <= (ex[-1].ee_block)); + if (block < (ex->ee_block)) + break; + chex = ex; + } + BUG_ON(chex != path->p_ext); + } +#endif + +} + +#ifdef EXT_DEBUG +static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) +{ + int k, l = path->p_depth; + + ext_debug("path:"); + for (k = 0; k <= l; k++, path++) { + if (path->p_idx) { + ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), + ext4_idx_pblock(path->p_idx)); + } else if (path->p_ext) { + ext_debug(" %d:[%d]%d:%llu ", + le32_to_cpu(path->p_ext->ee_block), + ext4_ext_is_unwritten(path->p_ext), + ext4_ext_get_actual_len(path->p_ext), + ext4_ext_pblock(path->p_ext)); + } else + ext_debug(" []"); + } + ext_debug("\n"); +} + +static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) +{ + int depth = ext_depth(inode); + struct ext4_extent_header *eh; + struct ext4_extent *ex; + int i; + + if (!path) + return; + + eh = path[depth].p_hdr; + ex = EXT_FIRST_EXTENT(eh); + + ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); + + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { + ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), + ext4_ext_is_unwritten(ex), + ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); + } + ext_debug("\n"); +} + +static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, + ext4_fsblk_t newblock, int level) +{ + int depth = ext_depth(inode); + struct ext4_extent *ex; + + if (depth != level) { + struct ext4_extent_idx *idx; + idx = path[level].p_idx; + while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { + ext_debug("%d: move %d:%llu in new index %llu\n", level, + le32_to_cpu(idx->ei_block), + ext4_idx_pblock(idx), + newblock); + idx++; + } + + return; + } + + ex = path[depth].p_ext; + while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { + ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", + le32_to_cpu(ex->ee_block), + ext4_ext_pblock(ex), + ext4_ext_is_unwritten(ex), + ext4_ext_get_actual_len(ex), + newblock); + ex++; + } +} + +#else +#define ext4_ext_show_path(inode, path) +#define ext4_ext_show_leaf(inode, path) +#define ext4_ext_show_move(inode, path, newblock, level) +#endif + +struct ext4_ext_path * +ext4_find_extent(struct inode *inode, ext4_lblk_t block, + struct ext4_ext_path **orig_path, int flags) +{ + struct ext4_extent_header *eh; + struct buffer_head *bh; + struct ext4_ext_path *path = orig_path ? *orig_path : NULL; + short int depth, i, ppos = 0; + int ret; + + eh = ext_inode_hdr(inode); + depth = ext_depth(inode); + + if (path) { + ext4_ext_drop_refs(path); + if (depth > path[0].p_maxdepth) { + kfree(path); + *orig_path = path = NULL; + } + } + if (!path) { + /* account possible depth increase */ + path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), + GFP_NOFS); + if (unlikely(!path)) + return ERR_PTR(-ENOMEM); + path[0].p_maxdepth = depth + 1; + } + path[0].p_hdr = eh; + path[0].p_bh = NULL; + + i = depth; + /* walk through the tree */ + while (i) { + ext_debug("depth %d: num %d, max %d\n", + ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); + + ext4_ext_binsearch_idx(inode, path + ppos, block); + path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); + path[ppos].p_depth = i; + path[ppos].p_ext = NULL; + + bh = read_extent_tree_block(inode, path[ppos].p_block, --i, + flags); + if (unlikely(IS_ERR(bh))) { + ret = PTR_ERR(bh); + goto err; + } + + eh = ext_block_hdr(bh); + ppos++; + if (unlikely(ppos > depth)) { + extents_brelse(bh); + EXT4_ERROR_INODE(inode, + "ppos %d > depth %d", ppos, depth); + ret = -EIO; + goto err; + } + path[ppos].p_bh = bh; + path[ppos].p_hdr = eh; + } + + path[ppos].p_depth = i; + path[ppos].p_ext = NULL; + path[ppos].p_idx = NULL; + + /* find extent */ + ext4_ext_binsearch(inode, path + ppos, block); + /* if not an empty leaf */ + if (path[ppos].p_ext) + path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); + + ext4_ext_show_path(inode, path); + + return path; + +err: + ext4_ext_drop_refs(path); + if (path) { + kfree(path); + if (orig_path) + *orig_path = NULL; + } + return ERR_PTR(ret); +} + +/* + * ext4_ext_insert_index: + * insert new index [@logical;@ptr] into the block at @curp; + * check where to insert: before @curp or after @curp + */ +static int ext4_ext_insert_index(void *icb, handle_t *handle, struct inode *inode, + struct ext4_ext_path *curp, + int logical, ext4_fsblk_t ptr) +{ + struct ext4_extent_idx *ix; + int len, err; + + err = ext4_ext_get_access(icb, handle, inode, curp); + if (err) + return err; + + if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) { + EXT4_ERROR_INODE(inode, + "logical %d == ei_block %d!", + logical, le32_to_cpu(curp->p_idx->ei_block)); + return -EIO; + } + + if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) + >= le16_to_cpu(curp->p_hdr->eh_max))) { + EXT4_ERROR_INODE(inode, + "eh_entries %d >= eh_max %d!", + le16_to_cpu(curp->p_hdr->eh_entries), + le16_to_cpu(curp->p_hdr->eh_max)); + return -EIO; + } + + if (logical > le32_to_cpu(curp->p_idx->ei_block)) { + /* insert after */ + ext_debug("insert new index %d after: %llu\n", logical, ptr); + ix = curp->p_idx + 1; + } else { + /* insert before */ + ext_debug("insert new index %d before: %llu\n", logical, ptr); + ix = curp->p_idx; + } + + len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1; + BUG_ON(len < 0); + if (len > 0) { + ext_debug("insert new index %d: " + "move %d indices from 0x%p to 0x%p\n", + logical, len, ix, ix + 1); + memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx)); + } + + if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { + EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); + return -EIO; + } + + ix->ei_block = cpu_to_le32(logical); + ext4_idx_store_pblock(ix, ptr); + le16_add_cpu(&curp->p_hdr->eh_entries, 1); + + if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { + EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); + return -EIO; + } + + err = ext4_ext_dirty(icb, handle, inode, curp); + ext4_std_error(inode->i_sb, err); + + return err; +} + +/* + * ext4_ext_split: + * inserts new subtree into the path, using free index entry + * at depth @at: + * - allocates all needed blocks (new leaf and all intermediate index blocks) + * - makes decision where to split + * - moves remaining extents and index entries (right to the split point) + * into the newly allocated blocks + * - initializes subtree + */ +static int ext4_ext_split(void *icb, handle_t *handle, struct inode *inode, + unsigned int flags, + struct ext4_ext_path *path, + struct ext4_extent *newext, int at) +{ + struct buffer_head *bh = NULL; + int depth = ext_depth(inode); + struct ext4_extent_header *neh; + struct ext4_extent_idx *fidx; + int i = at, k, m, a; + ext4_fsblk_t newblock, oldblock; + __le32 border; + ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ + int err = 0; + + /* make decision: where to split? */ + /* FIXME: now decision is simplest: at current extent */ + + /* if current leaf will be split, then we should use + * border from split point */ + if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { + EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); + return -EIO; + } + if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { + border = path[depth].p_ext[1].ee_block; + ext_debug("leaf will be split." + " next leaf starts at %d\n", + le32_to_cpu(border)); + } else { + border = newext->ee_block; + ext_debug("leaf will be added." + " next leaf starts at %d\n", + le32_to_cpu(border)); + } + + /* + * If error occurs, then we break processing + * and mark filesystem read-only. index won't + * be inserted and tree will be in consistent + * state. Next mount will repair buffers too. + */ + + /* + * Get array to track all allocated blocks. + * We need this to handle errors and free blocks + * upon them. + */ + ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); + if (!ablocks) + return -ENOMEM; + + /* allocate all needed blocks */ + ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); + for (a = 0; a < depth - at; a++) { + newblock = ext4_ext_new_meta_block(icb, handle, inode, path, + newext, &err, flags); + if (newblock == 0) + goto cleanup; + ablocks[a] = newblock; + } + + /* initialize new leaf */ + newblock = ablocks[--a]; + if (unlikely(newblock == 0)) { + EXT4_ERROR_INODE(inode, "newblock == 0!"); + err = -EIO; + goto cleanup; + } + bh = extents_bwrite(inode->i_sb, newblock); + if (unlikely(!bh)) { + err = -ENOMEM; + goto cleanup; + } + + err = ext4_journal_get_create_access(icb, handle, bh); + if (err) + goto cleanup; + + neh = ext_block_hdr(bh); + neh->eh_entries = 0; + neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); + neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); + neh->eh_depth = 0; + + /* move remainder of path[depth] to the new leaf */ + if (unlikely(path[depth].p_hdr->eh_entries != + path[depth].p_hdr->eh_max)) { + EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", + path[depth].p_hdr->eh_entries, + path[depth].p_hdr->eh_max); + err = -EIO; + goto cleanup; + } + /* start copy from next extent */ + m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++; + ext4_ext_show_move(inode, path, newblock, depth); + if (m) { + struct ext4_extent *ex; + ex = EXT_FIRST_EXTENT(neh); + memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m); + le16_add_cpu(&neh->eh_entries, m); + } + + ext4_extent_block_csum_set(inode, neh); + set_buffer_uptodate(bh); + + err = ext4_handle_dirty_metadata(icb, handle, inode, bh); + if (err) + goto cleanup; + extents_brelse(bh); + bh = NULL; + + /* correct old leaf */ + if (m) { + err = ext4_ext_get_access(icb, handle, inode, path + depth); + if (err) + goto cleanup; + le16_add_cpu(&path[depth].p_hdr->eh_entries, -m); + err = ext4_ext_dirty(icb, handle, inode, path + depth); + if (err) + goto cleanup; + + } + + /* create intermediate indexes */ + k = depth - at - 1; + if (unlikely(k < 0)) { + EXT4_ERROR_INODE(inode, "k %d < 0!", k); + err = -EIO; + goto cleanup; + } + if (k) + ext_debug("create %d intermediate indices\n", k); + /* insert new index into current index block */ + /* current depth stored in i var */ + i = depth - 1; + while (k--) { + oldblock = newblock; + newblock = ablocks[--a]; + bh = extents_bwrite(inode->i_sb, newblock); + if (unlikely(!bh)) { + err = -ENOMEM; + goto cleanup; + } + + err = ext4_journal_get_create_access(icb, handle, bh); + if (err) + goto cleanup; + + neh = ext_block_hdr(bh); + neh->eh_entries = cpu_to_le16(1); + neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); + neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); + neh->eh_depth = cpu_to_le16(depth - i); + fidx = EXT_FIRST_INDEX(neh); + fidx->ei_block = border; + ext4_idx_store_pblock(fidx, oldblock); + + ext_debug("int.index at %d (block %llu): %u -> %llu\n", + i, newblock, le32_to_cpu(border), oldblock); + + /* move remainder of path[i] to the new index block */ + if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != + EXT_LAST_INDEX(path[i].p_hdr))) { + EXT4_ERROR_INODE(inode, + "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", + le32_to_cpu(path[i].p_ext->ee_block)); + err = -EIO; + goto cleanup; + } + /* start copy indexes */ + m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; + ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, + EXT_MAX_INDEX(path[i].p_hdr)); + ext4_ext_show_move(inode, path, newblock, i); + if (m) { + memmove(++fidx, path[i].p_idx, + sizeof(struct ext4_extent_idx) * m); + le16_add_cpu(&neh->eh_entries, m); + } + ext4_extent_block_csum_set(inode, neh); + set_buffer_uptodate(bh); + + err = ext4_handle_dirty_metadata(icb, handle, inode, bh); + if (err) + goto cleanup; + extents_brelse(bh); + bh = NULL; + + /* correct old index */ + if (m) { + err = ext4_ext_get_access(icb, handle, inode, path + i); + if (err) + goto cleanup; + le16_add_cpu(&path[i].p_hdr->eh_entries, -m); + err = ext4_ext_dirty(icb, handle, inode, path + i); + if (err) + goto cleanup; + } + + i--; + } + + /* insert new index */ + err = ext4_ext_insert_index(icb, handle, inode, path + at, + le32_to_cpu(border), newblock); + +cleanup: + if (bh) + extents_brelse(bh); + + if (err) { + /* free all allocated blocks in error case */ + for (i = 0; i < depth; i++) { + if (!ablocks[i]) + continue; + ext4_free_blocks(icb, handle, inode, NULL, ablocks[i], 1, + EXT4_FREE_BLOCKS_METADATA); + } + } + kfree(ablocks); + + return err; +} + +/* + * ext4_ext_grow_indepth: + * implements tree growing procedure: + * - allocates new block + * - moves top-level data (index block or leaf) into the new block + * - initializes new top-level, creating index that points to the + * just created block + */ +static int ext4_ext_grow_indepth(void *icb, handle_t *handle, struct inode *inode, + unsigned int flags) +{ + struct ext4_extent_header *neh; + struct buffer_head *bh; + ext4_fsblk_t newblock, goal = 0; + int err = 0; + + /* Try to prepend new index to old one */ + if (ext_depth(inode)) + goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode))); + goal = ext4_inode_to_goal_block(inode); + newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags, + NULL, &err); + if (newblock == 0) + return err; + + bh = extents_bwrite(inode->i_sb, newblock); + if (!bh) + return -ENOMEM; + + err = ext4_journal_get_create_access(icb, handle, bh); + if (err) + goto out; + + /* move top-level index/leaf into new block */ + memmove(bh->b_data, EXT4_I(inode)->i_block, + sizeof(EXT4_I(inode)->i_block)); + + /* set size of new block */ + neh = ext_block_hdr(bh); + /* old root could have indexes or leaves + * so calculate e_max right way */ + if (ext_depth(inode)) + neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); + else + neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); + neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); + ext4_extent_block_csum_set(inode, neh); + set_buffer_uptodate(bh); + + err = ext4_handle_dirty_metadata(icb, handle, inode, bh); + if (err) + goto out; + + /* Update top-level index: num,max,pointer */ + neh = ext_inode_hdr(inode); + neh->eh_entries = cpu_to_le16(1); + ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock); + if (neh->eh_depth == 0) { + /* Root extent block becomes index block */ + neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); + EXT_FIRST_INDEX(neh)->ei_block = + EXT_FIRST_EXTENT(neh)->ee_block; + } + ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", + (neh->eh_entries), (neh->eh_max), + (EXT_FIRST_INDEX(neh)->ei_block), + ext4_idx_pblock(EXT_FIRST_INDEX(neh))); + + le16_add_cpu(&neh->eh_depth, 1); + ext4_mark_inode_dirty(icb, handle, inode); +out: + extents_brelse(bh); + + return err; +} + +/* + * ext4_ext_create_new_leaf: + * finds empty index and adds new leaf. + * if no free index is found, then it requests in-depth growing. + */ +static int ext4_ext_create_new_leaf(void *icb, handle_t *handle, struct inode *inode, + unsigned int mb_flags, + unsigned int gb_flags, + struct ext4_ext_path **ppath, + struct ext4_extent *newext) +{ + struct ext4_ext_path *path = *ppath; + struct ext4_ext_path *curp; + int depth, i, err = 0; + +repeat: + i = depth = ext_depth(inode); + + /* walk up to the tree and look for free index entry */ + curp = path + depth; + while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { + i--; + curp--; + } + + /* we use already allocated block for index block, + * so subsequent data blocks should be contiguous */ + if (EXT_HAS_FREE_INDEX(curp)) { + /* if we found index with free entry, then use that + * entry: create all needed subtree and add new leaf */ + err = ext4_ext_split(icb, handle, inode, mb_flags, path, newext, i); + if (err) + goto out; + + /* refill path */ + path = ext4_find_extent(inode, + (ext4_lblk_t)le32_to_cpu(newext->ee_block), + ppath, gb_flags); + if (IS_ERR(path)) + err = PTR_ERR(path); + } else { + /* tree is full, time to grow in depth */ + err = ext4_ext_grow_indepth(icb, handle, inode, mb_flags); + if (err) + goto out; + + /* refill path */ + path = ext4_find_extent(inode, + (ext4_lblk_t)le32_to_cpu(newext->ee_block), + ppath, gb_flags); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out; + } + + /* + * only first (depth 0 -> 1) produces free space; + * in all other cases we have to split the grown tree + */ + depth = ext_depth(inode); + if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { + /* now we need to split */ + goto repeat; + } + } + +out: + return err; +} + +/* + * search the closest allocated block to the left for *logical + * and returns it at @logical + it's physical address at @phys + * if *logical is the smallest allocated block, the function + * returns 0 at @phys + * return value contains 0 (success) or error code + */ +static int ext4_ext_search_left(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t *logical, ext4_fsblk_t *phys) +{ + struct ext4_extent_idx *ix; + struct ext4_extent *ex; + int depth, ee_len; + + if (unlikely(path == NULL)) { + EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); + return -EIO; + } + depth = path->p_depth; + *phys = 0; + + if (depth == 0 && path->p_ext == NULL) + return 0; + + /* usually extent in the path covers blocks smaller + * then *logical, but it can be that extent is the + * first one in the file */ + + ex = path[depth].p_ext; + ee_len = ext4_ext_get_actual_len(ex); + if (*logical < le32_to_cpu(ex->ee_block)) { + if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { + EXT4_ERROR_INODE(inode, + "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", + *logical, le32_to_cpu(ex->ee_block)); + return -EIO; + } + while (--depth >= 0) { + ix = path[depth].p_idx; + if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { + EXT4_ERROR_INODE(inode, + "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", + ix != NULL ? le32_to_cpu(ix->ei_block) : 0, + EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? + le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0, + depth); + return -EIO; + } + } + return 0; + } + + if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { + EXT4_ERROR_INODE(inode, + "logical %d < ee_block %d + ee_len %d!", + *logical, le32_to_cpu(ex->ee_block), ee_len); + return -EIO; + } + + *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; + *phys = ext4_ext_pblock(ex) + ee_len - 1; + return 0; +} + +/* + * search the closest allocated block to the right for *logical + * and returns it at @logical + it's physical address at @phys + * if *logical is the largest allocated block, the function + * returns 0 at @phys + * return value contains 0 (success) or error code + */ +static int ext4_ext_search_right(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t *logical, ext4_fsblk_t *phys, + struct ext4_extent **ret_ex) +{ + struct buffer_head *bh = NULL; + struct ext4_extent_header *eh; + struct ext4_extent_idx *ix; + struct ext4_extent *ex; + ext4_fsblk_t block; + int depth; /* Note, NOT eh_depth; depth from top of tree */ + int ee_len; + + if ((path == NULL)) { + EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); + return -EIO; + } + depth = path->p_depth; + *phys = 0; + + if (depth == 0 && path->p_ext == NULL) + return 0; + + /* usually extent in the path covers blocks smaller + * then *logical, but it can be that extent is the + * first one in the file */ + + ex = path[depth].p_ext; + ee_len = ext4_ext_get_actual_len(ex); + /*if (*logical < le32_to_cpu(ex->ee_block)) {*/ + if (*logical < (ex->ee_block)) { + if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { + EXT4_ERROR_INODE(inode, + "first_extent(path[%d].p_hdr) != ex", + depth); + return -EIO; + } + while (--depth >= 0) { + ix = path[depth].p_idx; + if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { + EXT4_ERROR_INODE(inode, + "ix != EXT_FIRST_INDEX *logical %d!", + *logical); + return -EIO; + } + } + goto found_extent; + } + + /*if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {*/ + if (unlikely(*logical < ((ex->ee_block) + ee_len))) { + EXT4_ERROR_INODE(inode, + "logical %d < ee_block %d + ee_len %d!", + /**logical, le32_to_cpu(ex->ee_block), ee_len);*/ + *logical, (ex->ee_block), ee_len); + return -EIO; + } + + if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { + /* next allocated block in this leaf */ + ex++; + goto found_extent; + } + + /* go up and search for index to the right */ + while (--depth >= 0) { + ix = path[depth].p_idx; + if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) + goto got_index; + } + + /* we've gone up to the root and found no index to the right */ + return 0; + +got_index: + /* we've found index to the right, let's + * follow it and find the closest allocated + * block to the right */ + ix++; + block = ext4_idx_pblock(ix); + while (++depth < path->p_depth) { + /* subtract from p_depth to get proper eh_depth */ + bh = read_extent_tree_block(inode, block, + path->p_depth - depth, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); + eh = ext_block_hdr(bh); + ix = EXT_FIRST_INDEX(eh); + block = ext4_idx_pblock(ix); + extents_brelse(bh); + } + + bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); + eh = ext_block_hdr(bh); + ex = EXT_FIRST_EXTENT(eh); +found_extent: + /**logical = le32_to_cpu(ex->ee_block);*/ + *logical = (ex->ee_block); + *phys = ext4_ext_pblock(ex); + *ret_ex = ex; + if (bh) + extents_brelse(bh); + return 0; +} + +/* + * ext4_ext_next_allocated_block: + * returns allocated block in subsequent extent or EXT_MAX_BLOCKS. + * NOTE: it considers block number from index entry as + * allocated block. Thus, index entries have to be consistent + * with leaves. + */ +ext4_lblk_t +ext4_ext_next_allocated_block(struct ext4_ext_path *path) +{ + int depth; + + depth = path->p_depth; + + if (depth == 0 && path->p_ext == NULL) + return EXT_MAX_BLOCKS; + + while (depth >= 0) { + if (depth == path->p_depth) { + /* leaf */ + if (path[depth].p_ext && + path[depth].p_ext != + EXT_LAST_EXTENT(path[depth].p_hdr)) + return le32_to_cpu(path[depth].p_ext[1].ee_block); + } else { + /* index */ + if (path[depth].p_idx != + EXT_LAST_INDEX(path[depth].p_hdr)) + return le32_to_cpu(path[depth].p_idx[1].ei_block); + } + depth--; + } + + return EXT_MAX_BLOCKS; +} + +/* + * ext4_ext_next_leaf_block: + * returns first allocated block from next leaf or EXT_MAX_BLOCKS + */ +static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path) +{ + int depth; + + BUG_ON(path == NULL); + depth = path->p_depth; + + /* zero-tree has no leaf blocks at all */ + if (depth == 0) + return EXT_MAX_BLOCKS; + + /* go to index block */ + depth--; + + while (depth >= 0) { + if (path[depth].p_idx != + EXT_LAST_INDEX(path[depth].p_hdr)) + return (ext4_lblk_t) + le32_to_cpu(path[depth].p_idx[1].ei_block); + depth--; + } + + return EXT_MAX_BLOCKS; +} + +/* + * ext4_ext_correct_indexes: + * if leaf gets modified and modified extent is first in the leaf, + * then we have to correct all indexes above. + * TODO: do we need to correct tree in all cases? + */ +static int ext4_ext_correct_indexes(void *icb, handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) +{ + struct ext4_extent_header *eh; + int depth = ext_depth(inode); + struct ext4_extent *ex; + __le32 border; + int k, err = 0; + + eh = path[depth].p_hdr; + ex = path[depth].p_ext; + + if (unlikely(ex == NULL || eh == NULL)) { + EXT4_ERROR_INODE(inode, + "ex %p == NULL or eh %p == NULL", ex, eh); + return -EIO; + } + + if (depth == 0) { + /* there is no tree at all */ + return 0; + } + + if (ex != EXT_FIRST_EXTENT(eh)) { + /* we correct tree if first leaf got modified only */ + return 0; + } + + /* + * TODO: we need correction if border is smaller than current one + */ + k = depth - 1; + border = path[depth].p_ext->ee_block; + err = ext4_ext_get_access(icb, handle, inode, path + k); + if (err) + return err; + path[k].p_idx->ei_block = border; + err = ext4_ext_dirty(icb, handle, inode, path + k); + if (err) + return err; + + while (k--) { + /* change all left-side indexes */ + if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) + break; + err = ext4_ext_get_access(icb, handle, inode, path + k); + if (err) + break; + path[k].p_idx->ei_block = border; + err = ext4_ext_dirty(icb, handle, inode, path + k); + if (err) + break; + } + + return err; +} + +int +ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, + struct ext4_extent *ex2) +{ + unsigned short ext1_ee_len, ext2_ee_len; + + /* + * Make sure that both extents are initialized. We don't merge + * unwritten extents so that we can be sure that end_io code has + * the extent that was written properly split out and conversion to + * initialized is trivial. + */ + if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2)) + return 0; + + ext1_ee_len = ext4_ext_get_actual_len(ex1); + ext2_ee_len = ext4_ext_get_actual_len(ex2); + + if (le32_to_cpu(ex1->ee_block) + ext1_ee_len != + le32_to_cpu(ex2->ee_block)) + return 0; + + /* + * To allow future support for preallocated extents to be added + * as an RO_COMPAT feature, refuse to merge to extents if + * this can result in the top bit of ee_len being set. + */ + if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) + return 0; + if (ext4_ext_is_unwritten(ex1) && + (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)) + return 0; +#ifdef AGGRESSIVE_TEST + if (ext1_ee_len >= 4) + return 0; +#endif + + if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) + return 1; + return 0; +} + +/* + * This function tries to merge the "ex" extent to the next extent in the tree. + * It always tries to merge towards right. If you want to merge towards + * left, pass "ex - 1" as argument instead of "ex". + * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns + * 1 if they got merged. + */ +static int ext4_ext_try_to_merge_right(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *ex) +{ + struct ext4_extent_header *eh; + unsigned int depth, len; + int merge_done = 0, unwritten; + + depth = ext_depth(inode); + assert(path[depth].p_hdr != NULL); + eh = path[depth].p_hdr; + + while (ex < EXT_LAST_EXTENT(eh)) { + if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) + break; + /* merge with next extent! */ + unwritten = ext4_ext_is_unwritten(ex); + ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + + ext4_ext_get_actual_len(ex + 1)); + if (unwritten) + ext4_ext_mark_unwritten(ex); + + if (ex + 1 < EXT_LAST_EXTENT(eh)) { + len = (EXT_LAST_EXTENT(eh) - ex - 1) + * sizeof(struct ext4_extent); + memmove(ex + 1, ex + 2, len); + } + le16_add_cpu(&eh->eh_entries, -1); + merge_done = 1; + if (!eh->eh_entries) + EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); + } + + return merge_done; +} + +/* + * This function does a very simple check to see if we can collapse + * an extent tree with a single extent tree leaf block into the inode. + */ +static void ext4_ext_try_to_merge_up(void *icb, handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path) +{ + size_t s; + unsigned max_root = ext4_ext_space_root(inode, 0); + ext4_fsblk_t blk; + + if ((path[0].p_depth != 1) || + (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) || + (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root)) + return; + + /* + * We need to modify the block allocation bitmap and the block + * group descriptor to release the extent tree block. If we + * can't get the journal credits, give up. + */ + if (ext4_journal_extend(icb, handle, 2)) + return; + + /* + * Copy the extent data up to the inode + */ + blk = ext4_idx_pblock(path[0].p_idx); + s = le16_to_cpu(path[1].p_hdr->eh_entries) * + sizeof(struct ext4_extent_idx); + s += sizeof(struct ext4_extent_header); + + path[1].p_maxdepth = path[0].p_maxdepth; + memcpy(path[0].p_hdr, path[1].p_hdr, s); + path[0].p_depth = 0; + path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + + (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); + path[0].p_hdr->eh_max = cpu_to_le16(max_root); + + extents_brelse(path[1].p_bh); + ext4_free_blocks(icb, handle, inode, NULL, blk, 1, + EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); +} + +/* + * This function tries to merge the @ex extent to neighbours in the tree. + * return 1 if merge left else 0. + */ +static void ext4_ext_try_to_merge(void *icb, handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *ex) { + struct ext4_extent_header *eh; + unsigned int depth; + int merge_done = 0; + + depth = ext_depth(inode); + BUG_ON(path[depth].p_hdr == NULL); + eh = path[depth].p_hdr; + + if (ex > EXT_FIRST_EXTENT(eh)) + merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); + + if (!merge_done) + (void) ext4_ext_try_to_merge_right(inode, path, ex); + + ext4_ext_try_to_merge_up(icb, handle, inode, path); +} + +/* + * ext4_ext_insert_extent: + * tries to merge requsted extent into the existing extent or + * inserts requested extent as new one into the tree, + * creating new leaf in the no-space case. + */ +int ext4_ext_insert_extent(void *icb, handle_t *handle, struct inode *inode, + struct ext4_ext_path **ppath, + struct ext4_extent *newext, + int gb_flags) +{ + struct ext4_ext_path *path = *ppath; + struct ext4_extent_header *eh; + struct ext4_extent *ex, *fex; + struct ext4_extent *nearex; /* nearest extent */ + struct ext4_ext_path *npath = NULL; + int depth, len, err; + ext4_lblk_t next; + int mb_flags = 0, unwritten; + + if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + mb_flags |= EXT4_MB_DELALLOC_RESERVED; + if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { + EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); + return -EIO; + } + depth = ext_depth(inode); + ex = path[depth].p_ext; + eh = path[depth].p_hdr; + if (unlikely(path[depth].p_hdr == NULL)) { + EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); + return -EIO; + } + + /* try to insert block into found extent and return */ + if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) { + + /* + * Try to see whether we should rather test the extent on + * right from ex, or from the left of ex. This is because + * ext4_find_extent() can return either extent on the + * left, or on the right from the searched position. This + * will make merging more effective. + */ + if (ex < EXT_LAST_EXTENT(eh) && + (le32_to_cpu(ex->ee_block) + + ext4_ext_get_actual_len(ex) < + le32_to_cpu(newext->ee_block))) { + ex += 1; + goto prepend; + } else if ((ex > EXT_FIRST_EXTENT(eh)) && + (le32_to_cpu(newext->ee_block) + + ext4_ext_get_actual_len(newext) < + le32_to_cpu(ex->ee_block))) + ex -= 1; + + /* Try to append newex to the ex */ + if (ext4_can_extents_be_merged(inode, ex, newext)) { + ext_debug("append [%d]%d block to %u:[%d]%d" + "(from %llu)\n", + ext4_ext_is_unwritten(newext), + ext4_ext_get_actual_len(newext), + le32_to_cpu(ex->ee_block), + ext4_ext_is_unwritten(ex), + ext4_ext_get_actual_len(ex), + ext4_ext_pblock(ex)); + err = ext4_ext_get_access(icb, handle, inode, + path + depth); + if (err) + return err; + unwritten = ext4_ext_is_unwritten(ex); + ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + + ext4_ext_get_actual_len(newext)); + if (unwritten) + ext4_ext_mark_unwritten(ex); + eh = path[depth].p_hdr; + nearex = ex; + goto merge; + } + +prepend: + /* Try to prepend newex to the ex */ + if (ext4_can_extents_be_merged(inode, newext, ex)) { + ext_debug("prepend %u[%d]%d block to %u:[%d]%d" + "(from %llu)\n", + le32_to_cpu(newext->ee_block), + ext4_ext_is_unwritten(newext), + ext4_ext_get_actual_len(newext), + le32_to_cpu(ex->ee_block), + ext4_ext_is_unwritten(ex), + ext4_ext_get_actual_len(ex), + ext4_ext_pblock(ex)); + err = ext4_ext_get_access(icb, handle, inode, + path + depth); + if (err) + return err; + + unwritten = ext4_ext_is_unwritten(ex); + ex->ee_block = newext->ee_block; + ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); + ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + + ext4_ext_get_actual_len(newext)); + if (unwritten) + ext4_ext_mark_unwritten(ex); + eh = path[depth].p_hdr; + nearex = ex; + goto merge; + } + } + + depth = ext_depth(inode); + eh = path[depth].p_hdr; + if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) + goto has_space; + + /* probably next leaf has space for us? */ + fex = EXT_LAST_EXTENT(eh); + next = EXT_MAX_BLOCKS; + if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) + next = ext4_ext_next_leaf_block(path); + if (next != EXT_MAX_BLOCKS) { + ext_debug("next leaf block - %u\n", next); + BUG_ON(npath != NULL); + npath = ext4_find_extent(inode, next, NULL, 0); + if (IS_ERR(npath)) + return PTR_ERR(npath); + BUG_ON(npath->p_depth != path->p_depth); + eh = npath[depth].p_hdr; + if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { + ext_debug("next leaf isn't full(%d)\n", + le16_to_cpu(eh->eh_entries)); + path = npath; + goto has_space; + } + ext_debug("next leaf has no free space(%d,%d)\n", + le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); + } + + /* + * There is no free space in the found leaf. + * We're gonna add a new leaf in the tree. + */ + if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) + mb_flags |= EXT4_MB_USE_RESERVED; + err = ext4_ext_create_new_leaf(icb, handle, inode, mb_flags, gb_flags, + ppath, newext); + if (err) + goto cleanup; + depth = ext_depth(inode); + eh = path[depth].p_hdr; + +has_space: + nearex = path[depth].p_ext; + + err = ext4_ext_get_access(icb, handle, inode, path + depth); + if (err) + goto cleanup; + + if (!nearex) { + /* there is no extent in this leaf, create first one */ + ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n", + le32_to_cpu(newext->ee_block), + ext4_ext_pblock(newext), + ext4_ext_is_unwritten(newext), + ext4_ext_get_actual_len(newext)); + nearex = EXT_FIRST_EXTENT(eh); + } else { + if (le32_to_cpu(newext->ee_block) + > le32_to_cpu(nearex->ee_block)) { + /* Insert after */ + ext_debug("insert %u:%llu:[%d]%d before: " + "nearest %p\n", + le32_to_cpu(newext->ee_block), + ext4_ext_pblock(newext), + ext4_ext_is_unwritten(newext), + ext4_ext_get_actual_len(newext), + nearex); + nearex++; + } else { + /* Insert before */ + BUG_ON(newext->ee_block == nearex->ee_block); + ext_debug("insert %u:%llu:[%d]%d after: " + "nearest %p\n", + le32_to_cpu(newext->ee_block), + ext4_ext_pblock(newext), + ext4_ext_is_unwritten(newext), + ext4_ext_get_actual_len(newext), + nearex); + } + len = EXT_LAST_EXTENT(eh) - nearex + 1; + if (len > 0) { + ext_debug("insert %u:%llu:[%d]%d: " + "move %d extents from 0x%p to 0x%p\n", + le32_to_cpu(newext->ee_block), + ext4_ext_pblock(newext), + ext4_ext_is_unwritten(newext), + ext4_ext_get_actual_len(newext), + len, nearex, nearex + 1); + memmove(nearex + 1, nearex, + len * sizeof(struct ext4_extent)); + } + } + + le16_add_cpu(&eh->eh_entries, 1); + path[depth].p_ext = nearex; + nearex->ee_block = newext->ee_block; + ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); + nearex->ee_len = newext->ee_len; + +merge: + /* try to merge extents */ + if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) + ext4_ext_try_to_merge(icb, handle, inode, path, nearex); + + + /* time to correct all indexes above */ + err = ext4_ext_correct_indexes(icb, handle, inode, path); + if (err) + goto cleanup; + + err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); + +cleanup: + if (npath) { + ext4_ext_drop_refs(npath); + kfree(npath); + } + return err; +} + +static inline int get_default_free_blocks_flags(struct inode *inode) +{ + return 0; +} + +/* FIXME!! we need to try to merge to left or right after zero-out */ +static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) +{ + ext4_fsblk_t ee_pblock; + unsigned int ee_len; + int ret; + + ee_len = ext4_ext_get_actual_len(ex); + ee_pblock = ext4_ext_pblock(ex); + + ret = 0; + + return ret; +} + +static int ext4_remove_blocks(void *icb, handle_t *handle, struct inode *inode, + struct ext4_extent *ex, + unsigned long from, unsigned long to) +{ + struct buffer_head *bh; + int i; + + if (from >= le32_to_cpu(ex->ee_block) + && to == le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1) { + /* tail removal */ + unsigned long num, start; + num = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - from; + start = ext4_ext_pblock(ex) + ext4_ext_get_actual_len(ex) - num; + ext4_free_blocks(icb, handle, inode, NULL, start, num, 0); + } else if (from == le32_to_cpu(ex->ee_block) + && to <= le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1) { + } else { + } + return 0; +} + +/* + * routine removes index from the index block + * it's used in truncate case only. thus all requests are for + * last index in the block only + */ +int ext4_ext_rm_idx(void *icb, handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) +{ + int err; + ext4_fsblk_t leaf; + + /* free index block */ + path--; + leaf = ext4_idx_pblock(path->p_idx); + BUG_ON(path->p_hdr->eh_entries == 0); + if ((err = ext4_ext_get_access(icb, handle, inode, path))) + return err; + path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); + if ((err = ext4_ext_dirty(icb, handle, inode, path))) + return err; + ext4_free_blocks(icb, handle, inode, NULL, leaf, 1, 0); + return err; +} + +static int +ext4_ext_rm_leaf(void *icb, handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, unsigned long start) +{ + int err = 0, correct_index = 0; + int depth = ext_depth(inode), credits; + struct ext4_extent_header *eh; + unsigned a, b, block, num; + unsigned long ex_ee_block; + unsigned short ex_ee_len; + struct ext4_extent *ex; + + /* the header must be checked already in ext4_ext_remove_space() */ + if (!path[depth].p_hdr) + path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); + eh = path[depth].p_hdr; + BUG_ON(eh == NULL); + + /* find where to start removing */ + ex = EXT_LAST_EXTENT(eh); + + ex_ee_block = le32_to_cpu(ex->ee_block); + ex_ee_len = ext4_ext_get_actual_len(ex); + + while (ex >= EXT_FIRST_EXTENT(eh) && + ex_ee_block + ex_ee_len > start) { + path[depth].p_ext = ex; + + a = ex_ee_block > start ? ex_ee_block : start; + b = (unsigned long long)ex_ee_block + ex_ee_len - 1 < + EXT_MAX_BLOCKS ? ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCKS; + + + if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { + block = 0; + num = 0; + BUG(); + } else if (a != ex_ee_block) { + /* remove tail of the extent */ + block = ex_ee_block; + num = a - block; + } else if (b != ex_ee_block + ex_ee_len - 1) { + /* remove head of the extent */ + block = a; + num = b - a; + /* there is no "make a hole" API yet */ + BUG(); + } else { + /* remove whole extent: excellent! */ + block = ex_ee_block; + num = 0; + BUG_ON(a != ex_ee_block); + BUG_ON(b != ex_ee_block + ex_ee_len - 1); + } + + /* at present, extent can't cross block group */ + /* leaf + bitmap + group desc + sb + inode */ + credits = 5; + if (ex == EXT_FIRST_EXTENT(eh)) { + correct_index = 1; + credits += (ext_depth(inode)) + 1; + } + + /*handle = ext4_ext_journal_restart(icb, handle, credits);*/ + /*if (IS_ERR(icb, handle)) {*/ + /*err = PTR_ERR(icb, handle);*/ + /*goto out;*/ + /*}*/ + + err = ext4_ext_get_access(icb, handle, inode, path + depth); + if (err) + goto out; + + err = ext4_remove_blocks(icb, handle, inode, ex, a, b); + if (err) + goto out; + + if (num == 0) { + /* this extent is removed entirely mark slot unused */ + ext4_ext_store_pblock(ex, 0); + eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); + } + + ex->ee_block = cpu_to_le32(block); + ex->ee_len = cpu_to_le16(num); + + err = ext4_ext_dirty(icb, handle, inode, path + depth); + if (err) + goto out; + + ex--; + ex_ee_block = le32_to_cpu(ex->ee_block); + ex_ee_len = ext4_ext_get_actual_len(ex); + } + + if (correct_index && eh->eh_entries) + err = ext4_ext_correct_indexes(icb, handle, inode, path); + + /* if this leaf is free, then we should + * remove it from index block above */ + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) + err = ext4_ext_rm_idx(icb, handle, inode, path + depth); + +out: + return err; +} + +/* + * ext4_split_extent_at() splits an extent at given block. + * + * @handle: the journal handle + * @inode: the file inode + * @path: the path to the extent + * @split: the logical block where the extent is splitted. + * @split_flags: indicates if the extent could be zeroout if split fails, and + * the states(init or unwritten) of new extents. + * @flags: flags used to insert new extent to extent tree. + * + * + * Splits extent [a, b] into two extents [a, @split) and [@split, b], states + * of which are deterimined by split_flag. + * + * There are two cases: + * a> the extent are splitted into two extent. + * b> split is not needed, and just mark the extent. + * + * return 0 on success. + */ +static int ext4_split_extent_at(void *icb, handle_t *handle, + struct inode *inode, + struct ext4_ext_path **ppath, + ext4_lblk_t split, + int split_flag, + int flags) +{ + struct ext4_ext_path *path = *ppath; + ext4_fsblk_t newblock; + ext4_lblk_t ee_block; + struct ext4_extent *ex, newex, orig_ex, zero_ex; + struct ext4_extent *ex2 = NULL; + unsigned int ee_len, depth; + int err = 0; + + ext4_ext_show_leaf(inode, path); + + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + newblock = split - ee_block + ext4_ext_pblock(ex); + + BUG_ON(split < ee_block || split >= (ee_block + ee_len)); + + err = ext4_ext_get_access(icb, handle, inode, path + depth); + if (err) + goto out; + + if (split == ee_block) { + /* + * case b: block @split is the block that the extent begins with + * then we just change the state of the extent, and splitting + * is not needed. + */ + if (split_flag & EXT4_EXT_MARK_UNWRIT2) + ext4_ext_mark_unwritten(ex); + else + ext4_ext_mark_initialized(ex); + + if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) + ext4_ext_try_to_merge(icb, handle, inode, path, ex); + + err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); + goto out; + } + + /* case a */ + memcpy(&orig_ex, ex, sizeof(orig_ex)); + ex->ee_len = cpu_to_le16(split - ee_block); + if (split_flag & EXT4_EXT_MARK_UNWRIT1) + ext4_ext_mark_unwritten(ex); + + /* + * path may lead to new leaf, not to original leaf any more + * after ext4_ext_insert_extent() returns, + */ + err = ext4_ext_dirty(icb, handle, inode, path + depth); + if (err) + goto fix_extent_len; + + ex2 = &newex; + ex2->ee_block = cpu_to_le32(split); + ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); + ext4_ext_store_pblock(ex2, newblock); + if (split_flag & EXT4_EXT_MARK_UNWRIT2) + ext4_ext_mark_unwritten(ex2); + + err = ext4_ext_insert_extent(icb, handle, inode, ppath, &newex, flags); + if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { + if (split_flag & EXT4_EXT_DATA_VALID1) { + err = ext4_ext_zeroout(inode, ex2); + zero_ex.ee_block = ex2->ee_block; + zero_ex.ee_len = cpu_to_le16( + ext4_ext_get_actual_len(ex2)); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(ex2)); + } else { + err = ext4_ext_zeroout(inode, ex); + zero_ex.ee_block = ex->ee_block; + zero_ex.ee_len = cpu_to_le16( + ext4_ext_get_actual_len(ex)); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(ex)); + } + } else { + err = ext4_ext_zeroout(inode, &orig_ex); + zero_ex.ee_block = orig_ex.ee_block; + zero_ex.ee_len = cpu_to_le16( + ext4_ext_get_actual_len(&orig_ex)); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(&orig_ex)); + } + + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ + ex->ee_len = cpu_to_le16(ee_len); + ext4_ext_try_to_merge(icb, handle, inode, path, ex); + err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth); + if (err) + goto fix_extent_len; + + goto out; + } else if (err) + goto fix_extent_len; + +out: + ext4_ext_show_leaf(inode, path); + return err; + +fix_extent_len: + ex->ee_len = orig_ex.ee_len; + ext4_ext_dirty(icb, handle, inode, path + path->p_depth); + return err; +} + +/* + * returns 1 if current index have to be freed (even partial) + */ +static int inline +ext4_ext_more_to_rm(struct ext4_ext_path *path) +{ + BUG_ON(path->p_idx == NULL); + + if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) + return 0; + + /* + * if truncate on deeper level happened it it wasn't partial + * so we have to consider current index for truncation + */ + if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) + return 0; + return 1; +} + +int ext4_ext_remove_space(void *icb, struct inode *inode, unsigned long start) +{ + struct super_block *sb = inode->i_sb; + int depth = ext_depth(inode); + struct ext4_ext_path *path; + handle_t *handle = NULL; + int i = 0, err = 0; + + /* probably first extent we're gonna free will be last in block */ + /*handle = ext4_journal_start(inode, depth + 1);*/ + /*if (IS_ERR(icb, handle))*/ + /*return PTR_ERR(icb, handle);*/ + + /* + * we start scanning from right side freeing all the blocks + * after i_size and walking into the deep + */ + path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); + if (path == NULL) { + ext4_journal_stop(icb, handle); + return -ENOMEM; + } + memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1)); + path[0].p_hdr = ext_inode_hdr(inode); + if (ext4_ext_check_inode(inode)) { + err = -EIO; + goto out; + } + path[0].p_depth = depth; + + while (i >= 0 && err == 0) { + if (i == depth) { + /* this is leaf block */ + err = ext4_ext_rm_leaf(icb, handle, inode, path, start); + /* root level have p_bh == NULL, extents_brelse() eats this */ + extents_brelse(path[i].p_bh); + path[i].p_bh = NULL; + i--; + continue; + } + + /* this is index block */ + if (!path[i].p_hdr) { + path[i].p_hdr = ext_block_hdr(path[i].p_bh); + } + + if (!path[i].p_idx) { + /* this level hasn't touched yet */ + path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); + path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; + } else { + /* we've already was here, see at next index */ + path[i].p_idx--; + } + + if (ext4_ext_more_to_rm(path + i)) { + struct buffer_head *bh; + /* go to the next level */ + memset(path + i + 1, 0, sizeof(*path)); + bh = read_extent_tree_block(inode, ext4_idx_pblock(path[i].p_idx), path[0].p_depth - (i + 1), 0); + if (IS_ERR(bh)) { + /* should we reset i_size? */ + err = -EIO; + break; + } + path[i+1].p_bh = bh; + + /* put actual number of indexes to know is this + * number got changed at the next iteration */ + path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); + i++; + } else { + /* we finish processing this index, go up */ + if (path[i].p_hdr->eh_entries == 0 && i > 0) { + /* index is empty, remove it + * handle must be already prepared by the + * truncatei_leaf() */ + err = ext4_ext_rm_idx(icb, handle, inode, path + i); + } + /* root level have p_bh == NULL, extents_brelse() eats this */ + extents_brelse(path[i].p_bh); + path[i].p_bh = NULL; + i--; + } + } + + /* TODO: flexible tree reduction should be here */ + if (path->p_hdr->eh_entries == 0) { + /* + * truncate to zero freed all the tree + * so, we need to correct eh_depth + */ + err = ext4_ext_get_access(icb, handle, inode, path); + if (err == 0) { + ext_inode_hdr(inode)->eh_depth = 0; + ext_inode_hdr(inode)->eh_max = + cpu_to_le16(ext4_ext_space_root(inode, 0)); + err = ext4_ext_dirty(icb, handle, inode, path); + } + } +out: + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } + ext4_journal_stop(icb, handle); + + return err; +} + +int ext4_ext_tree_init(void *icb, handle_t *handle, struct inode *inode) +{ + struct ext4_extent_header *eh; + + eh = ext_inode_hdr(inode); + eh->eh_depth = 0; + eh->eh_entries = 0; + eh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC); + eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); + ext4_mark_inode_dirty(icb, handle, inode); + return 0; +} + +/* + * called at mount time + */ +void ext4_ext_init(struct super_block *sb) +{ + /* + * possible initialization would be here + */ +} + +static int ext4_ext_convert_to_initialized ( + void *icb, + handle_t *handle, + struct inode *inode, + struct ext4_ext_path **ppath, + ext4_lblk_t split, + unsigned long blocks, + int flags) +{ + int depth = ext_depth(inode), err; + struct ext4_extent *ex = (*ppath)[depth].p_ext; + + assert (le32_to_cpu(ex->ee_block) <= split); + + if (split + blocks == le32_to_cpu(ex->ee_block) + + ext4_ext_get_actual_len(ex)) { + + /* split and initialize right part */ + err = ext4_split_extent_at(icb, handle, inode, ppath, split, + EXT4_EXT_MARK_UNWRIT1, flags); + + } else if (le32_to_cpu(ex->ee_block) == split) { + + /* split and initialize left part */ + err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks, + EXT4_EXT_MARK_UNWRIT2, flags); + + } else { + + /* split 1 extent to 3 and initialize the 2nd */ + err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks, + EXT4_EXT_MARK_UNWRIT1 | + EXT4_EXT_MARK_UNWRIT2, flags); + if (0 == err) { + err = ext4_split_extent_at(icb, handle, inode, ppath, split, + EXT4_EXT_MARK_UNWRIT1, flags); + } + } + + return err; +} + +int ext4_ext_get_blocks(void *icb, handle_t *handle, struct inode *inode, ext4_fsblk_t iblock, + unsigned long max_blocks, struct buffer_head *bh_result, + int create, int flags) +{ + struct ext4_ext_path *path = NULL; + struct ext4_extent newex, *ex; + int goal, err = 0, depth; + unsigned long allocated = 0; + ext4_fsblk_t next, newblock; + + clear_buffer_new(bh_result); + /*mutex_lock(&ext4_I(inode)->truncate_mutex);*/ + + /* find extent for this block */ + path = ext4_find_extent(inode, iblock, NULL, 0); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + goto out2; + } + + depth = ext_depth(inode); + + /* + * consistent leaf must not be empty + * this situations is possible, though, _during_ tree modification + * this is why assert can't be put in ext4_ext_find_extent() + */ + BUG_ON(path[depth].p_ext == NULL && depth != 0); + + if ((ex = path[depth].p_ext)) { + ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); + ext4_fsblk_t ee_start = ext4_ext_pblock(ex); + unsigned short ee_len = ext4_ext_get_actual_len(ex); + /* if found exent covers block, simple return it */ + if (iblock >= ee_block && iblock < ee_block + ee_len) { + + /* number of remain blocks in the extent */ + allocated = ee_len + ee_block - iblock; + + if (ext4_ext_is_unwritten(ex)) { + if (create) { + newblock = iblock - ee_block + ee_start; + err = ext4_ext_convert_to_initialized ( + icb, handle, + inode, + &path, + iblock, + allocated, + flags); + if (err) + goto out2; + + } else { + newblock = 0; + } + } else { + newblock = iblock - ee_block + ee_start; + } + goto out; + } + } + + /* + * requested block isn't allocated yet + * we couldn't try to create block if create flag is zero + */ + if (!create) { + goto out2; + } + + /* find next allocated block so that we know how many + * blocks we can allocate without ovelapping next extent */ + next = ext4_ext_next_allocated_block(path); + BUG_ON(next <= iblock); + allocated = next - iblock; + if (flags & EXT4_GET_BLOCKS_PRE_IO && max_blocks > EXT_UNWRITTEN_MAX_LEN) + max_blocks = EXT_UNWRITTEN_MAX_LEN; + if (allocated > max_blocks) + allocated = max_blocks; + + /* allocate new block */ + goal = ext4_ext_find_goal(inode, path, iblock); + + newblock = ext4_new_meta_blocks(icb, handle, inode, goal, 0, + &allocated, &err); + if (!newblock) + goto out2; + + /* try to insert new extent into found leaf and return */ + newex.ee_block = cpu_to_le32(iblock); + ext4_ext_store_pblock(&newex, newblock); + newex.ee_len = cpu_to_le16(allocated); + /* if it's fallocate, mark ex as unwritten */ + if (flags & EXT4_GET_BLOCKS_PRE_IO) { + ext4_ext_mark_unwritten(&newex); + } + err = ext4_ext_insert_extent(icb, handle, inode, &path, &newex, + flags & EXT4_GET_BLOCKS_PRE_IO); + + if (err) { + /* free data blocks we just allocated */ + ext4_free_blocks(icb, handle, inode, NULL, ext4_ext_pblock(&newex), + le16_to_cpu(newex.ee_len), get_default_free_blocks_flags(inode)); + goto out2; + } + + ext4_mark_inode_dirty(icb, handle, inode); + + /* previous routine could use block we allocated */ + if (ext4_ext_is_unwritten(&newex)) + newblock = 0; + else + newblock = ext4_ext_pblock(&newex); + + set_buffer_new(bh_result); + +out: + if (allocated > max_blocks) + allocated = max_blocks; + + ext4_ext_show_leaf(inode, path); + set_buffer_mapped(bh_result); + bh_result->b_bdev = inode->i_sb->s_bdev; + bh_result->b_blocknr = newblock; +out2: + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } + /*mutex_unlock(&ext4_I(inode)->truncate_mutex);*/ + + return err ? err : allocated; +} + +int ext4_ext_truncate(void *icb, struct inode *inode, unsigned long start) +{ + int ret = ext4_ext_remove_space(icb, inode, start); + + /* Save modifications on i_blocks field of the inode. */ + if (!ret) + ret = ext4_mark_inode_dirty(icb, NULL, inode); + + return ret; +} + +#pragma warning(pop) + diff --git a/Ext4Fsd/ext4/ext4_xattr.c b/Ext4Fsd/ext4/ext4_xattr.c index e1a26e9..d7e9425 100644 --- a/Ext4Fsd/ext4/ext4_xattr.c +++ b/Ext4Fsd/ext4/ext4_xattr.c @@ -1,1295 +1,1251 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- - */ - -#include -#include -#include - -static ext4_fsblk_t ext4_new_meta_blocks(void *icb, struct inode *inode, - ext4_fsblk_t goal, - unsigned int flags, - unsigned long *count, int *errp) -{ - NTSTATUS status; - ULONG blockcnt = (count) ? *count : 1; - ULONG block = 0; - - status = Ext2NewBlock((PEXT2_IRP_CONTEXT)icb, - inode->i_sb->s_priv, - 0, (ULONG)goal, - &block, - &blockcnt); - if (count) - *count = blockcnt; - - if (!NT_SUCCESS(status)) { - *errp = Ext2LinuxError(status); - return 0; - } - inode->i_blocks += (blockcnt * (inode->i_sb->s_blocksize >> 9)); - return block; -} - -static void ext4_free_blocks(void *icb, struct inode *inode, - ext4_fsblk_t block, int count, int flags) -{ - Ext2FreeBlock((PEXT2_IRP_CONTEXT)icb, inode->i_sb->s_priv, (ULONG)block, count); - inode->i_blocks -= count * (inode->i_sb->s_blocksize >> 9); - return; -} - -static inline ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode) -{ - PEXT2_VCB Vcb; - Vcb = inode->i_sb->s_priv; - return (inode->i_ino - 1) / BLOCKS_PER_GROUP; -} - -#define NAME_HASH_SHIFT 5 -#define VALUE_HASH_SHIFT 16 - -static inline void ext4_xattr_compute_hash(struct ext4_xattr_header *header, - struct ext4_xattr_entry *entry) -{ - __u32 hash = 0; - char *name = EXT4_XATTR_NAME(entry); - int n; - - for (n = 0; n < entry->e_name_len; n++) { - hash = (hash << NAME_HASH_SHIFT) ^ - (hash >> (8 * sizeof(hash) - NAME_HASH_SHIFT)) ^ *name++; - } - - if (entry->e_value_block == 0 && entry->e_value_size != 0) { - __le32 *value = - (__le32 *)((char *)header + le16_to_cpu(entry->e_value_offs)); - for (n = (le32_to_cpu(entry->e_value_size) + EXT4_XATTR_ROUND) >> - EXT4_XATTR_PAD_BITS; - n; n--) { - hash = (hash << VALUE_HASH_SHIFT) ^ - (hash >> (8 * sizeof(hash) - VALUE_HASH_SHIFT)) ^ - le32_to_cpu(*value++); - } - } - entry->e_hash = cpu_to_le32(hash); -} - -#define BLOCK_HASH_SHIFT 16 - -/* - * ext4_xattr_rehash() - * - * Re-compute the extended attribute hash value after an entry has changed. - */ -static void ext4_xattr_rehash(struct ext4_xattr_header *header, - struct ext4_xattr_entry *entry) -{ - struct ext4_xattr_entry *here; - __u32 hash = 0; - - ext4_xattr_compute_hash(header, entry); - here = EXT4_XATTR_ENTRY(header + 1); - while (!EXT4_XATTR_IS_LAST_ENTRY(here)) { - if (!here->e_hash) { - /* Block is not shared if an entry's hash value == 0 */ - hash = 0; - break; - } - hash = (hash << BLOCK_HASH_SHIFT) ^ - (hash >> (8 * sizeof(hash) - BLOCK_HASH_SHIFT)) ^ - le32_to_cpu(here->e_hash); - here = EXT4_XATTR_NEXT(here); - } - header->h_hash = cpu_to_le32(hash); -} - -#if CONFIG_META_CSUM_ENABLE -static __u32 -ext4_xattr_block_checksum(PEXT2_MCB inode_ref, - ext4_fsblk_t blocknr, - struct ext4_xattr_header *header) -{ - __u32 checksum = 0; - __u64 le64_blocknr = blocknr; - struct ext4_sblock *sb = &inode_ref->fs->sb; - - if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) { - __u32 orig_checksum; - - /* Preparation: temporarily set bg checksum to 0 */ - orig_checksum = header->h_checksum; - header->h_checksum = 0; - /* First calculate crc32 checksum against fs uuid */ - checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, - sizeof(sb->uuid)); - /* Then calculate crc32 checksum block number */ - checksum = ext4_crc32c(checksum, &le64_blocknr, - sizeof(le64_blocknr)); - /* Finally calculate crc32 checksum against - * the entire xattr block */ - checksum = ext4_crc32c(checksum, header, - ext4_sb_get_block_size(sb)); - header->h_checksum = orig_checksum; - } - return checksum; -} -#else -#define ext4_xattr_block_checksum(...) 0 -#endif - -static void -ext4_xattr_set_block_checksum(PEXT2_MCB inode_ref, - ext4_fsblk_t blocknr, - struct ext4_xattr_header *header) -{ - /* TODO: Need METADATA_CSUM supports. */ - header->h_checksum = 0; -} - -static int ext4_xattr_item_cmp(struct rb_node *_a, - struct rb_node *_b) -{ - int result; - struct ext4_xattr_item *a, *b; - a = container_of(_a, struct ext4_xattr_item, node); - a = container_of(_a, struct ext4_xattr_item, node); - b = container_of(_b, struct ext4_xattr_item, node); - - if (a->is_data && !b->is_data) - return -1; - - if (!a->is_data && b->is_data) - return 1; - - result = a->name_index - b->name_index; - if (result) - return result; - - if (a->name_len < b->name_len) - return -1; - - if (a->name_len > b->name_len) - return 1; - - return memcmp(a->name, b->name, a->name_len); -} - -// -// Red-black tree insert routine. -// - -static struct ext4_xattr_item * -ext4_xattr_item_search(struct ext4_xattr_ref *xattr_ref, - struct ext4_xattr_item *name) -{ - struct rb_node *new = xattr_ref->root.rb_node; - - while (new) { - struct ext4_xattr_item *node = - container_of(new, struct ext4_xattr_item, node); - int result = ext4_xattr_item_cmp(&name->node, new); - - if (result < 0) - new = new->rb_left; - else if (result > 0) - new = new->rb_right; - else - return node; - - } - - return NULL; -} - -static void ext4_xattr_item_insert(struct ext4_xattr_ref *xattr_ref, - struct ext4_xattr_item *item) -{ - rb_insert(&xattr_ref->root, &item->node, - ext4_xattr_item_cmp); - list_add_tail(&item->list_node, &xattr_ref->ordered_list); -} - -static void ext4_xattr_item_remove(struct ext4_xattr_ref *xattr_ref, - struct ext4_xattr_item *item) -{ - rb_erase(&item->node, &xattr_ref->root); - list_del_init(&item->list_node); -} - -static struct ext4_xattr_item * -ext4_xattr_item_alloc(__u8 name_index, const char *name, size_t name_len) -{ - struct ext4_xattr_item *item; - item = kzalloc(sizeof(struct ext4_xattr_item) + name_len, GFP_NOFS); - if (!item) - return NULL; - - item->name_index = name_index; - item->name = (char *)(item + 1); - item->name_len = name_len; - item->data = NULL; - item->data_size = 0; - INIT_LIST_HEAD(&item->list_node); - - memcpy(item->name, name, name_len); - - if (name_index == EXT4_XATTR_INDEX_SYSTEM && - name_len == 4 && - !memcmp(name, "data", 4)) - item->is_data = TRUE; - else - item->is_data = FALSE; - - return item; -} - -static int ext4_xattr_item_alloc_data(struct ext4_xattr_item *item, - const void *orig_data, size_t data_size) -{ - void *data = NULL; - ASSERT(!item->data); - data = kmalloc(data_size, GFP_NOFS); - if (!data) - return -ENOMEM; - - if (orig_data) - memcpy(data, orig_data, data_size); - - item->data = data; - item->data_size = data_size; - return 0; -} - -static void ext4_xattr_item_free_data(struct ext4_xattr_item *item) -{ - ASSERT(item->data); - kfree(item->data); - item->data = NULL; - item->data_size = 0; -} - -static int ext4_xattr_item_resize_data(struct ext4_xattr_item *item, - size_t new_data_size) -{ - if (new_data_size != item->data_size) { - void *new_data; - new_data = kmalloc(new_data_size, GFP_NOFS); - if (!new_data) - return -ENOMEM; - - memcpy(new_data, item->data, item->data_size); - kfree(item->data); - - item->data = new_data; - item->data_size = new_data_size; - } - return 0; -} - -static void ext4_xattr_item_free(struct ext4_xattr_item *item) -{ - if (item->data) - ext4_xattr_item_free_data(item); - - kfree(item); -} - -static void *ext4_xattr_entry_data(struct ext4_xattr_ref *xattr_ref, - struct ext4_xattr_entry *entry, - BOOL in_inode) -{ - char *ret; - int block_size; - if (in_inode) { - struct ext4_xattr_ibody_header *header; - struct ext4_xattr_entry *first_entry; - int inode_size = xattr_ref->fs->InodeSize; - header = EXT4_XATTR_IHDR(xattr_ref->OnDiskInode); - first_entry = EXT4_XATTR_IFIRST(header); - - ret = ((char *)first_entry + le16_to_cpu(entry->e_value_offs)); - if (ret + EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) - - (char *)xattr_ref->OnDiskInode > inode_size) - ret = NULL; - - return ret; - - } - block_size = xattr_ref->fs->BlockSize; - ret = ((char *)xattr_ref->block_bh->b_data + le16_to_cpu(entry->e_value_offs)); - if (ret + EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) - - (char *)xattr_ref->block_bh->b_data > block_size) - ret = NULL; - return ret; -} - -static int ext4_xattr_block_fetch(struct ext4_xattr_ref *xattr_ref) -{ - int ret = 0; - size_t size_rem; - void *data; - struct ext4_xattr_entry *entry = NULL; - - ASSERT(xattr_ref->block_bh->b_data); - entry = EXT4_XATTR_BFIRST(xattr_ref->block_bh); - - size_rem = xattr_ref->fs->BlockSize; - for (; size_rem > 0 && !EXT4_XATTR_IS_LAST_ENTRY(entry); - entry = EXT4_XATTR_NEXT(entry), - size_rem -= EXT4_XATTR_LEN(entry->e_name_len)) { - struct ext4_xattr_item *item; - char *e_name = EXT4_XATTR_NAME(entry); - - data = ext4_xattr_entry_data(xattr_ref, entry, FALSE); - if (!data) { - ret = -EIO; - goto Finish; - } - - item = ext4_xattr_item_alloc(entry->e_name_index, e_name, - (size_t)entry->e_name_len); - if (!item) { - ret = -ENOMEM; - goto Finish; - } - if (ext4_xattr_item_alloc_data( - item, data, le32_to_cpu(entry->e_value_size)) != 0) { - ext4_xattr_item_free(item); - ret = -ENOMEM; - goto Finish; - } - ext4_xattr_item_insert(xattr_ref, item); - xattr_ref->block_size_rem -= - EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - xattr_ref->ea_size += EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - } - -Finish: - return ret; -} - -static int ext4_xattr_inode_fetch(struct ext4_xattr_ref *xattr_ref) -{ - void *data; - size_t size_rem; - int ret = 0; - struct ext4_xattr_ibody_header *header = NULL; - struct ext4_xattr_entry *entry = NULL; - int inode_size = xattr_ref->fs->InodeSize; - - header = EXT4_XATTR_IHDR(xattr_ref->OnDiskInode); - entry = EXT4_XATTR_IFIRST(header); - - size_rem = inode_size - EXT4_GOOD_OLD_INODE_SIZE - - xattr_ref->OnDiskInode->i_extra_isize; - for (; size_rem > 0 && !EXT4_XATTR_IS_LAST_ENTRY(entry); - entry = EXT4_XATTR_NEXT(entry), - size_rem -= EXT4_XATTR_LEN(entry->e_name_len)) { - struct ext4_xattr_item *item; - char *e_name = EXT4_XATTR_NAME(entry); - - data = ext4_xattr_entry_data(xattr_ref, entry, TRUE); - if (!data) { - ret = -EIO; - goto Finish; - } - - item = ext4_xattr_item_alloc(entry->e_name_index, e_name, - (size_t)entry->e_name_len); - if (!item) { - ret = -ENOMEM; - goto Finish; - } - if (ext4_xattr_item_alloc_data( - item, data, le32_to_cpu(entry->e_value_size)) != 0) { - ext4_xattr_item_free(item); - ret = -ENOMEM; - goto Finish; - } - item->in_inode = TRUE; - ext4_xattr_item_insert(xattr_ref, item); - xattr_ref->inode_size_rem -= - EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - xattr_ref->ea_size += EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - } - -Finish: - return ret; -} - -static __s32 ext4_xattr_inode_space(struct ext4_xattr_ref *xattr_ref) -{ - int inode_size = xattr_ref->fs->InodeSize; - int size_rem = inode_size - EXT4_GOOD_OLD_INODE_SIZE - - xattr_ref->OnDiskInode->i_extra_isize; - return size_rem; -} - -static __s32 ext4_xattr_block_space(struct ext4_xattr_ref *xattr_ref) -{ - return xattr_ref->fs->BlockSize; -} - -static int ext4_xattr_fetch(struct ext4_xattr_ref *xattr_ref) -{ - int ret = 0; - int inode_size = xattr_ref->fs->InodeSize; - if (inode_size > EXT4_GOOD_OLD_INODE_SIZE) { - ret = ext4_xattr_inode_fetch(xattr_ref); - if (ret != 0) - return ret; - } - - if (xattr_ref->block_loaded) - ret = ext4_xattr_block_fetch(xattr_ref); - - xattr_ref->dirty = FALSE; - return ret; -} - -static struct ext4_xattr_item * -ext4_xattr_lookup_item(struct ext4_xattr_ref *xattr_ref, __u8 name_index, - const char *name, size_t name_len) -{ - struct ext4_xattr_item tmp = { - FALSE, - FALSE, - name_index, - (char *)name, /*won't touch this string*/ - name_len, - }; - if (name_index == EXT4_XATTR_INDEX_SYSTEM && - name_len == 4 && - !memcmp(name, "data", 4)) - tmp.is_data = TRUE; - - return ext4_xattr_item_search(xattr_ref, &tmp); -} - -static struct ext4_xattr_item * -ext4_xattr_insert_item(struct ext4_xattr_ref *xattr_ref, __u8 name_index, - const char *name, size_t name_len, const void *data, - size_t data_size, - int *err) -{ - struct ext4_xattr_item *item; - item = ext4_xattr_item_alloc(name_index, name, name_len); - if (!item) { - if (err) - *err = -ENOMEM; - - return NULL; - } - - item->in_inode = TRUE; - if (xattr_ref->inode_size_rem < - EXT4_XATTR_SIZE(data_size) + - EXT4_XATTR_LEN(item->name_len)) { - if (xattr_ref->block_size_rem < - EXT4_XATTR_SIZE(data_size) + - EXT4_XATTR_LEN(item->name_len)) { - if (err) - *err = -ENOSPC; - - return NULL; - } - - item->in_inode = FALSE; - } - if (ext4_xattr_item_alloc_data(item, data, data_size) != 0) { - ext4_xattr_item_free(item); - if (err) - *err = -ENOMEM; - - return NULL; - } - ext4_xattr_item_insert(xattr_ref, item); - xattr_ref->ea_size += - EXT4_XATTR_SIZE(item->data_size) + EXT4_XATTR_LEN(item->name_len); - if (item->in_inode) { - xattr_ref->inode_size_rem -= - EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - } else { - xattr_ref->block_size_rem -= - EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - } - xattr_ref->dirty = TRUE; - if (err) - *err = 0; - - return item; -} - -static struct ext4_xattr_item * -ext4_xattr_insert_item_ordered(struct ext4_xattr_ref *xattr_ref, __u8 name_index, - const char *name, size_t name_len, const void *data, - size_t data_size, - int *err) -{ - struct ext4_xattr_item *item, *last_item = NULL; - item = ext4_xattr_item_alloc(name_index, name, name_len); - if (!item) { - if (err) - *err = -ENOMEM; - - return NULL; - } - - if (!list_empty(&xattr_ref->ordered_list)) - last_item = list_entry(xattr_ref->ordered_list.prev, - struct ext4_xattr_item, - list_node); - - item->in_inode = TRUE; - if ((xattr_ref->inode_size_rem < - EXT4_XATTR_SIZE(data_size) + - EXT4_XATTR_LEN(item->name_len)) - || - (last_item && !last_item->in_inode)) { - if (xattr_ref->block_size_rem < - EXT4_XATTR_SIZE(data_size) + - EXT4_XATTR_LEN(item->name_len)) { - if (err) - *err = -ENOSPC; - - return NULL; - } - - item->in_inode = FALSE; - } - if (ext4_xattr_item_alloc_data(item, data, data_size) != 0) { - ext4_xattr_item_free(item); - if (err) - *err = -ENOMEM; - - return NULL; - } - ext4_xattr_item_insert(xattr_ref, item); - xattr_ref->ea_size += - EXT4_XATTR_SIZE(item->data_size) + EXT4_XATTR_LEN(item->name_len); - if (item->in_inode) { - xattr_ref->inode_size_rem -= - EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - } - else { - xattr_ref->block_size_rem -= - EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - } - xattr_ref->dirty = TRUE; - if (err) - *err = 0; - - return item; -} - -static int ext4_xattr_remove_item(struct ext4_xattr_ref *xattr_ref, - __u8 name_index, const char *name, - size_t name_len) -{ - int ret = -ENOENT; - struct ext4_xattr_item *item = - ext4_xattr_lookup_item(xattr_ref, name_index, name, name_len); - if (item) { - if (item == xattr_ref->iter_from) { - struct rb_node *next_node; - next_node = rb_next(&item->node); - if (next_node) - xattr_ref->iter_from = - container_of(next_node, - struct ext4_xattr_item, - node); - else - xattr_ref->iter_from = NULL; - } - - xattr_ref->ea_size -= EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - - if (item->in_inode) { - xattr_ref->inode_size_rem += - EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - } else { - xattr_ref->block_size_rem += - EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - } - - ext4_xattr_item_remove(xattr_ref, item); - ext4_xattr_item_free(item); - xattr_ref->dirty = TRUE; - ret = 0; - } - return ret; -} - -static int ext4_xattr_resize_item(struct ext4_xattr_ref *xattr_ref, - struct ext4_xattr_item *item, - size_t new_data_size) -{ - int ret = 0; - BOOL to_inode = FALSE, to_block = FALSE; - size_t old_data_size = item->data_size; - size_t orig_room_size = item->in_inode ? - xattr_ref->inode_size_rem : - xattr_ref->block_size_rem; - - /* - * Check if we can hold this entry in both in-inode and - * on-block form - * - * More complicated case: we do not allow entries stucking in - * the middle between in-inode space and on-block space, so - * the entry has to stay in either inode space or block space. - */ - if (item->in_inode) { - if (xattr_ref->inode_size_rem + - EXT4_XATTR_SIZE(old_data_size) < - EXT4_XATTR_SIZE(new_data_size)) { - if (xattr_ref->block_size_rem < - EXT4_XATTR_SIZE(new_data_size) + - EXT4_XATTR_LEN(item->name_len)) - return -ENOSPC; - - to_block = TRUE; - } - } else { - if (xattr_ref->block_size_rem + - EXT4_XATTR_SIZE(old_data_size) < - EXT4_XATTR_SIZE(new_data_size)) { - if (xattr_ref->inode_size_rem < - EXT4_XATTR_SIZE(new_data_size) + - EXT4_XATTR_LEN(item->name_len)) - return -ENOSPC; - - to_inode = TRUE; - } - } - ret = ext4_xattr_item_resize_data(item, new_data_size); - if (ret) - return ret; - - xattr_ref->ea_size = - xattr_ref->ea_size - - EXT4_XATTR_SIZE(old_data_size) + - EXT4_XATTR_SIZE(new_data_size); - - /* - * This entry may originally lie in inode space or block space, - * and it is going to be transferred to another place. - */ - if (to_block) { - xattr_ref->inode_size_rem += - EXT4_XATTR_SIZE(old_data_size) + - EXT4_XATTR_LEN(item->name_len); - xattr_ref->block_size_rem -= - EXT4_XATTR_SIZE(new_data_size) + - EXT4_XATTR_LEN(item->name_len); - item->in_inode = FALSE; - } else if (to_inode) { - xattr_ref->block_size_rem += - EXT4_XATTR_SIZE(old_data_size) + - EXT4_XATTR_LEN(item->name_len); - xattr_ref->inode_size_rem -= - EXT4_XATTR_SIZE(new_data_size) + - EXT4_XATTR_LEN(item->name_len); - item->in_inode = TRUE; - } else { - /* - * No need to transfer as there is enough space for the entry - * to stay in inode space or block space it used to be. - */ - orig_room_size += - EXT4_XATTR_SIZE(old_data_size); - orig_room_size -= - EXT4_XATTR_SIZE(new_data_size); - if (item->in_inode) - xattr_ref->inode_size_rem = orig_room_size; - else - xattr_ref->block_size_rem = orig_room_size; - - } - xattr_ref->dirty = TRUE; - return ret; -} - -void ext4_xattr_purge_items(struct ext4_xattr_ref *xattr_ref) -{ - struct rb_node *first_node; - struct ext4_xattr_item *item = NULL; - first_node = rb_first(&xattr_ref->root); - if (first_node) - item = container_of(first_node, struct ext4_xattr_item, - node); - - while (item) { - struct rb_node *next_node; - struct ext4_xattr_item *next_item = NULL; - next_node = rb_next(&item->node); - if (next_node) - next_item = container_of(next_node, struct ext4_xattr_item, - node); - else - next_item = NULL; - - ext4_xattr_item_remove(xattr_ref, item); - ext4_xattr_item_free(item); - - item = next_item; - } - xattr_ref->ea_size = 0; - if (ext4_xattr_inode_space(xattr_ref) < - sizeof(struct ext4_xattr_ibody_header)) - xattr_ref->inode_size_rem = 0; - else - xattr_ref->inode_size_rem = - ext4_xattr_inode_space(xattr_ref) - - sizeof(struct ext4_xattr_ibody_header); - - xattr_ref->block_size_rem = - ext4_xattr_block_space(xattr_ref) - - sizeof(struct ext4_xattr_header); -} - -static int ext4_xattr_try_alloc_block(struct ext4_xattr_ref *xattr_ref) -{ - int ret = 0; - - ext4_fsblk_t xattr_block = 0; - xattr_block = xattr_ref->inode_ref->Inode.i_file_acl; - if (!xattr_block) { - ext4_fsblk_t goal = - ext4_inode_to_goal_block(&xattr_ref->inode_ref->Inode); - - xattr_block = ext4_new_meta_blocks(xattr_ref->IrpContext, - &xattr_ref->inode_ref->Inode, - goal, 0, NULL, - &ret); - if (ret != 0) - goto Finish; - - xattr_ref->block_bh = extents_bwrite(&xattr_ref->fs->sb, xattr_block); - if (!xattr_ref->block_bh) { - ext4_free_blocks(xattr_ref->IrpContext, &xattr_ref->inode_ref->Inode, - xattr_block, 1, 0); - ret = -ENOMEM; - goto Finish; - } - - xattr_ref->inode_ref->Inode.i_file_acl = xattr_block; - xattr_ref->IsOnDiskInodeDirty = TRUE; - xattr_ref->block_loaded = TRUE; - } - -Finish: - return ret; -} - -static void ext4_xattr_try_free_block(struct ext4_xattr_ref *xattr_ref) -{ - ext4_fsblk_t xattr_block; - xattr_block = xattr_ref->inode_ref->Inode.i_file_acl; - xattr_ref->inode_ref->Inode.i_file_acl = 0; - extents_brelse(xattr_ref->block_bh); - xattr_ref->block_bh = NULL; - ext4_free_blocks(xattr_ref->IrpContext, &xattr_ref->inode_ref->Inode, - xattr_block, 1, 0); - xattr_ref->IsOnDiskInodeDirty = TRUE; - xattr_ref->block_loaded = FALSE; -} - -static void ext4_xattr_set_block_header(struct ext4_xattr_ref *xattr_ref) -{ - struct ext4_xattr_header *block_header = NULL; - block_header = EXT4_XATTR_BHDR(xattr_ref->block_bh); - - memset(block_header, 0, sizeof(struct ext4_xattr_header)); - block_header->h_magic = EXT4_XATTR_MAGIC; - block_header->h_refcount = cpu_to_le32(1); - block_header->h_blocks = cpu_to_le32(1); -} - -static void -ext4_xattr_set_inode_entry(struct ext4_xattr_item *item, - struct ext4_xattr_ibody_header *ibody_header, - struct ext4_xattr_entry *entry, void *ibody_data_ptr) -{ - entry->e_name_len = (__u8)item->name_len; - entry->e_name_index = item->name_index; - entry->e_value_offs = - cpu_to_le16((char *)ibody_data_ptr - (char *)EXT4_XATTR_IFIRST(ibody_header)); - entry->e_value_block = 0; - entry->e_value_size = cpu_to_le32(item->data_size); -} - -static void ext4_xattr_set_block_entry(struct ext4_xattr_item *item, - struct ext4_xattr_header *block_header, - struct ext4_xattr_entry *block_entry, - void *block_data_ptr) -{ - block_entry->e_name_len = (__u8)item->name_len; - block_entry->e_name_index = item->name_index; - block_entry->e_value_offs = - cpu_to_le16((char *)block_data_ptr - (char *)block_header); - block_entry->e_value_block = 0; - block_entry->e_value_size = cpu_to_le32(item->data_size); -} - -static int ext4_xattr_write_to_disk(struct ext4_xattr_ref *xattr_ref) -{ - int ret = 0; - BOOL block_modified = FALSE; - void *ibody_data = NULL; - void *block_data = NULL; - size_t inode_size_rem, block_size_rem; - struct ext4_xattr_ibody_header *ibody_header = NULL; - struct ext4_xattr_header *block_header = NULL; - struct ext4_xattr_entry *entry = NULL; - struct ext4_xattr_entry *block_entry = NULL; - struct ext4_xattr_item *item = NULL; - - inode_size_rem = ext4_xattr_inode_space(xattr_ref); - block_size_rem = ext4_xattr_block_space(xattr_ref); - if (inode_size_rem > sizeof(struct ext4_xattr_ibody_header)) { - ibody_header = EXT4_XATTR_IHDR(xattr_ref->OnDiskInode); - entry = EXT4_XATTR_IFIRST(ibody_header); - } - - if (!xattr_ref->dirty) - goto Finish; - /* If there are enough spaces in the ibody EA table.*/ - if (inode_size_rem > sizeof(struct ext4_xattr_ibody_header)) { - memset(ibody_header, 0, inode_size_rem); - ibody_header->h_magic = EXT4_XATTR_MAGIC; - ibody_data = (char *)ibody_header + inode_size_rem; - inode_size_rem -= sizeof(struct ext4_xattr_ibody_header); - - xattr_ref->IsOnDiskInodeDirty = TRUE; - } - /* If we need an extra block to hold the EA entries*/ - if (xattr_ref->ea_size > inode_size_rem) { - if (!xattr_ref->block_loaded) { - ret = ext4_xattr_try_alloc_block(xattr_ref); - if (ret != 0) - goto Finish; - } - memset(xattr_ref->block_bh->b_data, 0, xattr_ref->fs->BlockSize); - block_header = EXT4_XATTR_BHDR(xattr_ref->block_bh); - block_entry = EXT4_XATTR_BFIRST(xattr_ref->block_bh); - ext4_xattr_set_block_header(xattr_ref); - block_data = (char *)block_header + block_size_rem; - block_size_rem -= sizeof(struct ext4_xattr_header); - - extents_mark_buffer_dirty(xattr_ref->block_bh); - } else { - /* We don't need an extra block.*/ - if (xattr_ref->block_loaded) { - block_header = EXT4_XATTR_BHDR(xattr_ref->block_bh); - le32_add_cpu(&block_header->h_refcount, -1); - if (!block_header->h_refcount) { - ext4_xattr_try_free_block(xattr_ref); - block_header = NULL; - } else { - block_entry = - EXT4_XATTR_BFIRST(xattr_ref->block_bh); - block_data = - (char *)block_header + block_size_rem; - block_size_rem -= - sizeof(struct ext4_xattr_header); - xattr_ref->inode_ref->Inode.i_file_acl = 0; - - xattr_ref->IsOnDiskInodeDirty = TRUE; - extents_mark_buffer_dirty(xattr_ref->block_bh); - } - } - } - - list_for_each_entry(item, &xattr_ref->ordered_list, struct ext4_xattr_item, list_node) { - if (item->in_inode) { - ibody_data = (char *)ibody_data - - EXT4_XATTR_SIZE(item->data_size); - ext4_xattr_set_inode_entry(item, ibody_header, entry, - ibody_data); - memcpy(EXT4_XATTR_NAME(entry), item->name, - item->name_len); - memcpy(ibody_data, item->data, item->data_size); - entry = EXT4_XATTR_NEXT(entry); - inode_size_rem -= EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - - xattr_ref->IsOnDiskInodeDirty = TRUE; - continue; - } - if (EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len) > - block_size_rem) { - ret = -ENOSPC; - DbgPrint("ext4_xattr.c: IMPOSSIBLE -ENOSPC AS WE DID INSPECTION!\n"); - ASSERT(0); - } - block_data = - (char *)block_data - EXT4_XATTR_SIZE(item->data_size); - ext4_xattr_set_block_entry(item, block_header, block_entry, - block_data); - memcpy(EXT4_XATTR_NAME(block_entry), item->name, - item->name_len); - memcpy(block_data, item->data, item->data_size); - ext4_xattr_compute_hash(block_header, block_entry); - block_entry = EXT4_XATTR_NEXT(block_entry); - block_size_rem -= EXT4_XATTR_SIZE(item->data_size) + - EXT4_XATTR_LEN(item->name_len); - - block_modified = TRUE; - } - xattr_ref->dirty = FALSE; - if (block_modified) { - ext4_xattr_rehash(block_header, - EXT4_XATTR_BFIRST(xattr_ref->block_bh)); - ext4_xattr_set_block_checksum(xattr_ref->inode_ref, - xattr_ref->block_bh->b_blocknr, - block_header); - extents_mark_buffer_dirty(xattr_ref->block_bh); - } - -Finish: - return ret; -} - -void ext4_fs_xattr_iterate(struct ext4_xattr_ref *ref, - int (*iter)(struct ext4_xattr_ref *ref, - struct ext4_xattr_item *item, - BOOL is_last)) -{ - struct ext4_xattr_item *item; - if (!ref->iter_from) { - struct list_head *first_node; - first_node = ref->ordered_list.next; - if (first_node && first_node != &ref->ordered_list) { - ref->iter_from = - list_entry(first_node, - struct ext4_xattr_item, - list_node); - } - } - - item = ref->iter_from; - while (item) { - struct list_head *next_node; - struct ext4_xattr_item *next_item; - int ret = EXT4_XATTR_ITERATE_CONT; - next_node = item->list_node.next; - if (next_node && next_node != &ref->ordered_list) - next_item = list_entry(next_node, struct ext4_xattr_item, - list_node); - else - next_item = NULL; - if (iter) - ret = iter(ref, item, !next_item); - - if (ret != EXT4_XATTR_ITERATE_CONT) { - if (ret == EXT4_XATTR_ITERATE_STOP) - ref->iter_from = NULL; - - break; - } - item = next_item; - } -} - -void ext4_fs_xattr_iterate_reset(struct ext4_xattr_ref *ref) -{ - ref->iter_from = NULL; -} - -int ext4_fs_set_xattr(struct ext4_xattr_ref *ref, __u8 name_index, - const char *name, size_t name_len, const void *data, - size_t data_size, BOOL replace) -{ - int ret = 0; - struct ext4_xattr_item *item = - ext4_xattr_lookup_item(ref, name_index, name, name_len); - if (replace) { - if (!item) { - ret = -ENODATA; - goto Finish; - } - if (item->data_size != data_size) - ret = ext4_xattr_resize_item(ref, item, data_size); - - if (ret != 0) { - goto Finish; - } - memcpy(item->data, data, data_size); - } else { - if (item) { - ret = -EEXIST; - goto Finish; - } - item = ext4_xattr_insert_item(ref, name_index, name, name_len, - data, data_size, &ret); - } -Finish: - return ret; -} - -int ext4_fs_set_xattr_ordered(struct ext4_xattr_ref *ref, __u8 name_index, - const char *name, size_t name_len, const void *data, - size_t data_size) -{ - int ret = 0; - struct ext4_xattr_item *item = - ext4_xattr_lookup_item(ref, name_index, name, name_len); - if (item) { - ret = -EEXIST; - goto Finish; - } - item = ext4_xattr_insert_item_ordered(ref, name_index, name, name_len, - data, data_size, &ret); -Finish: - return ret; -} - -int ext4_fs_remove_xattr(struct ext4_xattr_ref *ref, __u8 name_index, - const char *name, size_t name_len) -{ - return ext4_xattr_remove_item(ref, name_index, name, name_len); -} - -int ext4_fs_get_xattr(struct ext4_xattr_ref *ref, __u8 name_index, - const char *name, size_t name_len, void *buf, - size_t buf_size, size_t *data_size) -{ - int ret = 0; - size_t item_size = 0; - struct ext4_xattr_item *item = - ext4_xattr_lookup_item(ref, name_index, name, name_len); - - if (!item) { - ret = -ENODATA; - goto Finish; - } - item_size = item->data_size; - if (buf_size > item_size) - buf_size = item_size; - - if (buf) - memcpy(buf, item->data, buf_size); - -Finish: - if (data_size) - *data_size = item_size; - - return ret; -} - -int ext4_fs_get_xattr_ref(PEXT2_IRP_CONTEXT IrpContext, PEXT2_VCB fs, PEXT2_MCB inode_ref, - struct ext4_xattr_ref *ref) -{ - int rc; - ext4_fsblk_t xattr_block; - xattr_block = inode_ref->Inode.i_file_acl; - memset(&ref->root, 0, sizeof(struct rb_root)); - ref->ea_size = 0; - ref->iter_from = NULL; - if (xattr_block) { - ref->block_bh = extents_bread(&fs->sb, xattr_block); - if (!ref->block_bh) - return -EIO; - - ref->block_loaded = TRUE; - } else - ref->block_loaded = FALSE; - - ref->inode_ref = inode_ref; - ref->fs = fs; - INIT_LIST_HEAD(&ref->ordered_list); - - ref->OnDiskInode = Ext2AllocateInode(fs); - if (!ref->OnDiskInode) { - if (xattr_block) { - extents_brelse(ref->block_bh); - ref->block_bh = NULL; - } - return -ENOMEM; - } - if (!Ext2LoadInodeXattr(fs, &inode_ref->Inode, ref->OnDiskInode)) { - if (xattr_block) { - extents_brelse(ref->block_bh); - ref->block_bh = NULL; - } - - Ext2DestroyInode(fs, ref->OnDiskInode); - return -EIO; - } - ref->IsOnDiskInodeDirty = FALSE; - - if (ext4_xattr_inode_space(ref) < - sizeof(struct ext4_xattr_ibody_header) + - sizeof(__u32)) - ref->inode_size_rem = 0; - else { - ref->inode_size_rem = - ext4_xattr_inode_space(ref) - - sizeof(struct ext4_xattr_ibody_header); - } - - ref->block_size_rem = - ext4_xattr_block_space(ref) - - sizeof(struct ext4_xattr_header) - - sizeof(__u32); - - rc = ext4_xattr_fetch(ref); - if (rc != 0) { - ext4_xattr_purge_items(ref); - if (xattr_block) { - extents_brelse(ref->block_bh); - ref->block_bh = NULL; - } - - Ext2DestroyInode(fs, ref->OnDiskInode); - return rc; - } - ref->IrpContext = IrpContext; - return 0; -} - -int ext4_fs_put_xattr_ref(struct ext4_xattr_ref *ref) -{ - int ret; - sector_t orig_file_acl = ref->inode_ref->Inode.i_file_acl; - ret = ext4_xattr_write_to_disk(ref); - if (ref->IsOnDiskInodeDirty) { - ASSERT(ref->fs->InodeSize > EXT4_GOOD_OLD_INODE_SIZE); - - /* As we may do block allocation in ext4_xattr_write_to_disk */ - if (ret) - ref->inode_ref->Inode.i_file_acl = orig_file_acl; - - if (!ret) { - ret = Ext2SaveInode(ref->IrpContext, ref->fs, &ref->inode_ref->Inode) - ? 0 : -EIO; - if (!ret) { - ret = Ext2SaveInodeXattr(ref->IrpContext, - ref->fs, - &ref->inode_ref->Inode, - ref->OnDiskInode) - ? 0 : -EIO; - } - } - ref->IsOnDiskInodeDirty = FALSE; - } - if (ref->block_loaded) { - if (!ret) - extents_brelse(ref->block_bh); - else - extents_bforget(ref->block_bh); - - ref->block_bh = NULL; - ref->block_loaded = FALSE; - } - ext4_xattr_purge_items(ref); - Ext2DestroyInode(ref->fs, ref->OnDiskInode); - ref->OnDiskInode = NULL; - ref->inode_ref = NULL; - ref->fs = NULL; - return ret; -} - -struct xattr_prefix { - const char *prefix; - __u8 name_index; -}; - -static const struct xattr_prefix prefix_tbl[] = { - {"user.", EXT4_XATTR_INDEX_USER}, - {"system.posix_acl_access", EXT4_XATTR_INDEX_POSIX_ACL_ACCESS}, - {"system.posix_acl_default", EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT}, - {"trusted.", EXT4_XATTR_INDEX_TRUSTED}, - {"security.", EXT4_XATTR_INDEX_SECURITY}, - {"system.", EXT4_XATTR_INDEX_SYSTEM}, - {"system.richacl", EXT4_XATTR_INDEX_RICHACL}, - {NULL, 0}, -}; - -const char *ext4_extract_xattr_name(const char *full_name, size_t full_name_len, - __u8 *name_index, size_t *name_len, - BOOL *found) -{ - int i; - ASSERT(name_index); - ASSERT(found); - - *found = FALSE; - - if (!full_name_len) { - if (name_len) - *name_len = 0; - - return NULL; - } - - for (i = 0; prefix_tbl[i].prefix; i++) { - size_t prefix_len = strlen(prefix_tbl[i].prefix); - if (full_name_len >= prefix_len && - !memcmp(full_name, prefix_tbl[i].prefix, prefix_len)) { - BOOL require_name = - prefix_tbl[i].prefix[prefix_len - 1] == '.'; - *name_index = prefix_tbl[i].name_index; - if (name_len) - *name_len = full_name_len - prefix_len; - - if (!(full_name_len - prefix_len) && require_name) - return NULL; - - *found = TRUE; - if (require_name) - return full_name + prefix_len; - - return NULL; - } - } - if (name_len) - *name_len = 0; - - return NULL; -} - -const char *ext4_get_xattr_name_prefix(__u8 name_index, - size_t *ret_prefix_len) -{ - int i; - - for (i = 0; prefix_tbl[i].prefix; i++) { - size_t prefix_len = strlen(prefix_tbl[i].prefix); - if (prefix_tbl[i].name_index == name_index) { - if (ret_prefix_len) - *ret_prefix_len = prefix_len; - - return prefix_tbl[i].prefix; - } - } - if (ret_prefix_len) - *ret_prefix_len = 0; - - return NULL; -} +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + +#include +#include +#include + +static ext4_fsblk_t ext4_new_meta_blocks(void *icb, struct inode *inode, + ext4_fsblk_t goal, + unsigned int flags, + unsigned long *count, int *errp) +{ + NTSTATUS status; + ULONG blockcnt = (count) ? *count : 1; + ULONG block = 0; + + status = Ext2NewBlock((PEXT2_IRP_CONTEXT)icb, + inode->i_sb->s_priv, + 0, (ULONG)goal, + &block, + &blockcnt); + if (count) + *count = blockcnt; + + if (!NT_SUCCESS(status)) { + *errp = Ext2LinuxError(status); + return 0; + } + inode->i_blocks += (blockcnt * (inode->i_sb->s_blocksize >> 9)); + return block; +} + +static void ext4_free_blocks(void *icb, struct inode *inode, + ext4_fsblk_t block, int count, int flags) +{ + Ext2FreeBlock((PEXT2_IRP_CONTEXT)icb, inode->i_sb->s_priv, (ULONG)block, count); + inode->i_blocks -= count * (inode->i_sb->s_blocksize >> 9); + return; +} + +static inline ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode) +{ + PEXT2_VCB Vcb; + Vcb = inode->i_sb->s_priv; + return (inode->i_ino - 1) / BLOCKS_PER_GROUP; +} + +#define NAME_HASH_SHIFT 5 +#define VALUE_HASH_SHIFT 16 + +static inline void ext4_xattr_compute_hash(struct ext4_xattr_header *header, + struct ext4_xattr_entry *entry) +{ + __u32 hash = 0; + char *name = EXT4_XATTR_NAME(entry); + int n; + + for (n = 0; n < entry->e_name_len; n++) { + hash = (hash << NAME_HASH_SHIFT) ^ + (hash >> (8 * sizeof(hash) - NAME_HASH_SHIFT)) ^ *name++; + } + + if (entry->e_value_block == 0 && entry->e_value_size != 0) { + __le32 *value = + (__le32 *)((char *)header + le16_to_cpu(entry->e_value_offs)); + for (n = (le32_to_cpu(entry->e_value_size) + EXT4_XATTR_ROUND) >> + EXT4_XATTR_PAD_BITS; + n; n--) { + hash = (hash << VALUE_HASH_SHIFT) ^ + (hash >> (8 * sizeof(hash) - VALUE_HASH_SHIFT)) ^ + le32_to_cpu(*value++); + } + } + entry->e_hash = cpu_to_le32(hash); +} + +#define BLOCK_HASH_SHIFT 16 + +/* + * ext4_xattr_rehash() + * + * Re-compute the extended attribute hash value after an entry has changed. + */ +static void ext4_xattr_rehash(struct ext4_xattr_header *header, + struct ext4_xattr_entry *entry) +{ + struct ext4_xattr_entry *here; + __u32 hash = 0; + + ext4_xattr_compute_hash(header, entry); + here = EXT4_XATTR_ENTRY(header + 1); + while (!EXT4_XATTR_IS_LAST_ENTRY(here)) { + if (!here->e_hash) { + /* Block is not shared if an entry's hash value == 0 */ + hash = 0; + break; + } + hash = (hash << BLOCK_HASH_SHIFT) ^ + (hash >> (8 * sizeof(hash) - BLOCK_HASH_SHIFT)) ^ + le32_to_cpu(here->e_hash); + here = EXT4_XATTR_NEXT(here); + } + header->h_hash = cpu_to_le32(hash); +} + +static int ext4_xattr_item_cmp(struct rb_node *_a, + struct rb_node *_b) +{ + int result; + struct ext4_xattr_item *a, *b; + a = container_of(_a, struct ext4_xattr_item, node); + a = container_of(_a, struct ext4_xattr_item, node); + b = container_of(_b, struct ext4_xattr_item, node); + + if (a->is_data && !b->is_data) + return -1; + + if (!a->is_data && b->is_data) + return 1; + + result = a->name_index - b->name_index; + if (result) + return result; + + if (a->name_len < b->name_len) + return -1; + + if (a->name_len > b->name_len) + return 1; + + return memcmp(a->name, b->name, a->name_len); +} + +// +// Red-black tree insert routine. +// + +static struct ext4_xattr_item * +ext4_xattr_item_search(struct ext4_xattr_ref *xattr_ref, + struct ext4_xattr_item *name) +{ + struct rb_node *new = xattr_ref->root.rb_node; + + while (new) { + struct ext4_xattr_item *node = + container_of(new, struct ext4_xattr_item, node); + int result = ext4_xattr_item_cmp(&name->node, new); + + if (result < 0) + new = new->rb_left; + else if (result > 0) + new = new->rb_right; + else + return node; + + } + + return NULL; +} + +static void ext4_xattr_item_insert(struct ext4_xattr_ref *xattr_ref, + struct ext4_xattr_item *item) +{ + rb_insert(&xattr_ref->root, &item->node, + ext4_xattr_item_cmp); + list_add_tail(&item->list_node, &xattr_ref->ordered_list); +} + +static void ext4_xattr_item_remove(struct ext4_xattr_ref *xattr_ref, + struct ext4_xattr_item *item) +{ + rb_erase(&item->node, &xattr_ref->root); + list_del_init(&item->list_node); +} + +static struct ext4_xattr_item * +ext4_xattr_item_alloc(__u8 name_index, const char *name, size_t name_len) +{ + struct ext4_xattr_item *item; + item = kzalloc(sizeof(struct ext4_xattr_item) + name_len, GFP_NOFS); + if (!item) + return NULL; + + item->name_index = name_index; + item->name = (char *)(item + 1); + item->name_len = name_len; + item->data = NULL; + item->data_size = 0; + INIT_LIST_HEAD(&item->list_node); + + memcpy(item->name, name, name_len); + + if (name_index == EXT4_XATTR_INDEX_SYSTEM && + name_len == 4 && + !memcmp(name, "data", 4)) + item->is_data = TRUE; + else + item->is_data = FALSE; + + return item; +} + +static int ext4_xattr_item_alloc_data(struct ext4_xattr_item *item, + const void *orig_data, size_t data_size) +{ + void *data = NULL; + ASSERT(!item->data); + data = kmalloc(data_size, GFP_NOFS); + if (!data) + return -ENOMEM; + + if (orig_data) + memcpy(data, orig_data, data_size); + + item->data = data; + item->data_size = data_size; + return 0; +} + +static void ext4_xattr_item_free_data(struct ext4_xattr_item *item) +{ + ASSERT(item->data); + kfree(item->data); + item->data = NULL; + item->data_size = 0; +} + +static int ext4_xattr_item_resize_data(struct ext4_xattr_item *item, + size_t new_data_size) +{ + if (new_data_size != item->data_size) { + void *new_data; + new_data = kmalloc(new_data_size, GFP_NOFS); + if (!new_data) + return -ENOMEM; + + memcpy(new_data, item->data, item->data_size); + kfree(item->data); + + item->data = new_data; + item->data_size = new_data_size; + } + return 0; +} + +static void ext4_xattr_item_free(struct ext4_xattr_item *item) +{ + if (item->data) + ext4_xattr_item_free_data(item); + + kfree(item); +} + +static void *ext4_xattr_entry_data(struct ext4_xattr_ref *xattr_ref, + struct ext4_xattr_entry *entry, + BOOL in_inode) +{ + char *ret; + int block_size; + if (in_inode) { + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_entry *first_entry; + int inode_size = xattr_ref->fs->InodeSize; + header = EXT4_XATTR_IHDR(xattr_ref->OnDiskInode); + first_entry = EXT4_XATTR_IFIRST(header); + + ret = ((char *)first_entry + le16_to_cpu(entry->e_value_offs)); + if (ret + EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) - + (char *)xattr_ref->OnDiskInode > inode_size) + ret = NULL; + + return ret; + + } + block_size = xattr_ref->fs->BlockSize; + ret = ((char *)xattr_ref->block_bh->b_data + le16_to_cpu(entry->e_value_offs)); + if (ret + EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) - + (char *)xattr_ref->block_bh->b_data > block_size) + ret = NULL; + return ret; +} + +static int ext4_xattr_block_fetch(struct ext4_xattr_ref *xattr_ref) +{ + int ret = 0; + size_t size_rem; + void *data; + struct ext4_xattr_entry *entry = NULL; + + ASSERT(xattr_ref->block_bh->b_data); + entry = EXT4_XATTR_BFIRST(xattr_ref->block_bh); + + size_rem = xattr_ref->fs->BlockSize; + for (; size_rem > 0 && !EXT4_XATTR_IS_LAST_ENTRY(entry); + entry = EXT4_XATTR_NEXT(entry), + size_rem -= EXT4_XATTR_LEN(entry->e_name_len)) { + struct ext4_xattr_item *item; + char *e_name = EXT4_XATTR_NAME(entry); + + data = ext4_xattr_entry_data(xattr_ref, entry, FALSE); + if (!data) { + ret = -EIO; + goto Finish; + } + + item = ext4_xattr_item_alloc(entry->e_name_index, e_name, + (size_t)entry->e_name_len); + if (!item) { + ret = -ENOMEM; + goto Finish; + } + if (ext4_xattr_item_alloc_data( + item, data, le32_to_cpu(entry->e_value_size)) != 0) { + ext4_xattr_item_free(item); + ret = -ENOMEM; + goto Finish; + } + ext4_xattr_item_insert(xattr_ref, item); + xattr_ref->block_size_rem -= + EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + xattr_ref->ea_size += EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + } + +Finish: + return ret; +} + +static int ext4_xattr_inode_fetch(struct ext4_xattr_ref *xattr_ref) +{ + void *data; + size_t size_rem; + int ret = 0; + struct ext4_xattr_ibody_header *header = NULL; + struct ext4_xattr_entry *entry = NULL; + int inode_size = xattr_ref->fs->InodeSize; + + header = EXT4_XATTR_IHDR(xattr_ref->OnDiskInode); + entry = EXT4_XATTR_IFIRST(header); + + size_rem = inode_size - EXT4_GOOD_OLD_INODE_SIZE - + xattr_ref->OnDiskInode->i_extra_isize; + for (; size_rem > 0 && !EXT4_XATTR_IS_LAST_ENTRY(entry); + entry = EXT4_XATTR_NEXT(entry), + size_rem -= EXT4_XATTR_LEN(entry->e_name_len)) { + struct ext4_xattr_item *item; + char *e_name = EXT4_XATTR_NAME(entry); + + data = ext4_xattr_entry_data(xattr_ref, entry, TRUE); + if (!data) { + ret = -EIO; + goto Finish; + } + + item = ext4_xattr_item_alloc(entry->e_name_index, e_name, + (size_t)entry->e_name_len); + if (!item) { + ret = -ENOMEM; + goto Finish; + } + if (ext4_xattr_item_alloc_data( + item, data, le32_to_cpu(entry->e_value_size)) != 0) { + ext4_xattr_item_free(item); + ret = -ENOMEM; + goto Finish; + } + item->in_inode = TRUE; + ext4_xattr_item_insert(xattr_ref, item); + xattr_ref->inode_size_rem -= + EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + xattr_ref->ea_size += EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + } + +Finish: + return ret; +} + +static __s32 ext4_xattr_inode_space(struct ext4_xattr_ref *xattr_ref) +{ + int inode_size = xattr_ref->fs->InodeSize; + int size_rem = inode_size - EXT4_GOOD_OLD_INODE_SIZE - + xattr_ref->OnDiskInode->i_extra_isize; + return size_rem; +} + +static __s32 ext4_xattr_block_space(struct ext4_xattr_ref *xattr_ref) +{ + return xattr_ref->fs->BlockSize; +} + +static int ext4_xattr_fetch(struct ext4_xattr_ref *xattr_ref) +{ + int ret = 0; + int inode_size = xattr_ref->fs->InodeSize; + if (inode_size > EXT4_GOOD_OLD_INODE_SIZE) { + ret = ext4_xattr_inode_fetch(xattr_ref); + if (ret != 0) + return ret; + } + + if (xattr_ref->block_loaded) + ret = ext4_xattr_block_fetch(xattr_ref); + + xattr_ref->dirty = FALSE; + return ret; +} + +static struct ext4_xattr_item * +ext4_xattr_lookup_item(struct ext4_xattr_ref *xattr_ref, __u8 name_index, + const char *name, size_t name_len) +{ + struct ext4_xattr_item tmp = { + FALSE, + FALSE, + name_index, + (char *)name, /*won't touch this string*/ + name_len, + }; + if (name_index == EXT4_XATTR_INDEX_SYSTEM && + name_len == 4 && + !memcmp(name, "data", 4)) + tmp.is_data = TRUE; + + return ext4_xattr_item_search(xattr_ref, &tmp); +} + +static struct ext4_xattr_item * +ext4_xattr_insert_item(struct ext4_xattr_ref *xattr_ref, __u8 name_index, + const char *name, size_t name_len, const void *data, + size_t data_size, + int *err) +{ + struct ext4_xattr_item *item; + item = ext4_xattr_item_alloc(name_index, name, name_len); + if (!item) { + if (err) + *err = -ENOMEM; + + return NULL; + } + + item->in_inode = TRUE; + if (xattr_ref->inode_size_rem < + EXT4_XATTR_SIZE(data_size) + + EXT4_XATTR_LEN(item->name_len)) { + if (xattr_ref->block_size_rem < + EXT4_XATTR_SIZE(data_size) + + EXT4_XATTR_LEN(item->name_len)) { + if (err) + *err = -ENOSPC; + + return NULL; + } + + item->in_inode = FALSE; + } + if (ext4_xattr_item_alloc_data(item, data, data_size) != 0) { + ext4_xattr_item_free(item); + if (err) + *err = -ENOMEM; + + return NULL; + } + ext4_xattr_item_insert(xattr_ref, item); + xattr_ref->ea_size += + EXT4_XATTR_SIZE(item->data_size) + EXT4_XATTR_LEN(item->name_len); + if (item->in_inode) { + xattr_ref->inode_size_rem -= + EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + } else { + xattr_ref->block_size_rem -= + EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + } + xattr_ref->dirty = TRUE; + if (err) + *err = 0; + + return item; +} + +static struct ext4_xattr_item * +ext4_xattr_insert_item_ordered(struct ext4_xattr_ref *xattr_ref, __u8 name_index, + const char *name, size_t name_len, const void *data, + size_t data_size, + int *err) +{ + struct ext4_xattr_item *item, *last_item = NULL; + item = ext4_xattr_item_alloc(name_index, name, name_len); + if (!item) { + if (err) + *err = -ENOMEM; + + return NULL; + } + + if (!list_empty(&xattr_ref->ordered_list)) + last_item = list_entry(xattr_ref->ordered_list.prev, + struct ext4_xattr_item, + list_node); + + item->in_inode = TRUE; + if ((xattr_ref->inode_size_rem < + EXT4_XATTR_SIZE(data_size) + + EXT4_XATTR_LEN(item->name_len)) + || + (last_item && !last_item->in_inode)) { + if (xattr_ref->block_size_rem < + EXT4_XATTR_SIZE(data_size) + + EXT4_XATTR_LEN(item->name_len)) { + if (err) + *err = -ENOSPC; + + return NULL; + } + + item->in_inode = FALSE; + } + if (ext4_xattr_item_alloc_data(item, data, data_size) != 0) { + ext4_xattr_item_free(item); + if (err) + *err = -ENOMEM; + + return NULL; + } + ext4_xattr_item_insert(xattr_ref, item); + xattr_ref->ea_size += + EXT4_XATTR_SIZE(item->data_size) + EXT4_XATTR_LEN(item->name_len); + if (item->in_inode) { + xattr_ref->inode_size_rem -= + EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + } + else { + xattr_ref->block_size_rem -= + EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + } + xattr_ref->dirty = TRUE; + if (err) + *err = 0; + + return item; +} + +static int ext4_xattr_remove_item(struct ext4_xattr_ref *xattr_ref, + __u8 name_index, const char *name, + size_t name_len) +{ + int ret = -ENOENT; + struct ext4_xattr_item *item = + ext4_xattr_lookup_item(xattr_ref, name_index, name, name_len); + if (item) { + if (item == xattr_ref->iter_from) { + struct rb_node *next_node; + next_node = rb_next(&item->node); + if (next_node) + xattr_ref->iter_from = + container_of(next_node, + struct ext4_xattr_item, + node); + else + xattr_ref->iter_from = NULL; + } + + xattr_ref->ea_size -= EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + + if (item->in_inode) { + xattr_ref->inode_size_rem += + EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + } else { + xattr_ref->block_size_rem += + EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + } + + ext4_xattr_item_remove(xattr_ref, item); + ext4_xattr_item_free(item); + xattr_ref->dirty = TRUE; + ret = 0; + } + return ret; +} + +static int ext4_xattr_resize_item(struct ext4_xattr_ref *xattr_ref, + struct ext4_xattr_item *item, + size_t new_data_size) +{ + int ret = 0; + BOOL to_inode = FALSE, to_block = FALSE; + size_t old_data_size = item->data_size; + size_t orig_room_size = item->in_inode ? + xattr_ref->inode_size_rem : + xattr_ref->block_size_rem; + + /* + * Check if we can hold this entry in both in-inode and + * on-block form + * + * More complicated case: we do not allow entries stucking in + * the middle between in-inode space and on-block space, so + * the entry has to stay in either inode space or block space. + */ + if (item->in_inode) { + if (xattr_ref->inode_size_rem + + EXT4_XATTR_SIZE(old_data_size) < + EXT4_XATTR_SIZE(new_data_size)) { + if (xattr_ref->block_size_rem < + EXT4_XATTR_SIZE(new_data_size) + + EXT4_XATTR_LEN(item->name_len)) + return -ENOSPC; + + to_block = TRUE; + } + } else { + if (xattr_ref->block_size_rem + + EXT4_XATTR_SIZE(old_data_size) < + EXT4_XATTR_SIZE(new_data_size)) { + if (xattr_ref->inode_size_rem < + EXT4_XATTR_SIZE(new_data_size) + + EXT4_XATTR_LEN(item->name_len)) + return -ENOSPC; + + to_inode = TRUE; + } + } + ret = ext4_xattr_item_resize_data(item, new_data_size); + if (ret) + return ret; + + xattr_ref->ea_size = + xattr_ref->ea_size - + EXT4_XATTR_SIZE(old_data_size) + + EXT4_XATTR_SIZE(new_data_size); + + /* + * This entry may originally lie in inode space or block space, + * and it is going to be transferred to another place. + */ + if (to_block) { + xattr_ref->inode_size_rem += + EXT4_XATTR_SIZE(old_data_size) + + EXT4_XATTR_LEN(item->name_len); + xattr_ref->block_size_rem -= + EXT4_XATTR_SIZE(new_data_size) + + EXT4_XATTR_LEN(item->name_len); + item->in_inode = FALSE; + } else if (to_inode) { + xattr_ref->block_size_rem += + EXT4_XATTR_SIZE(old_data_size) + + EXT4_XATTR_LEN(item->name_len); + xattr_ref->inode_size_rem -= + EXT4_XATTR_SIZE(new_data_size) + + EXT4_XATTR_LEN(item->name_len); + item->in_inode = TRUE; + } else { + /* + * No need to transfer as there is enough space for the entry + * to stay in inode space or block space it used to be. + */ + orig_room_size += + EXT4_XATTR_SIZE(old_data_size); + orig_room_size -= + EXT4_XATTR_SIZE(new_data_size); + if (item->in_inode) + xattr_ref->inode_size_rem = orig_room_size; + else + xattr_ref->block_size_rem = orig_room_size; + + } + xattr_ref->dirty = TRUE; + return ret; +} + +void ext4_xattr_purge_items(struct ext4_xattr_ref *xattr_ref) +{ + struct rb_node *first_node; + struct ext4_xattr_item *item = NULL; + first_node = rb_first(&xattr_ref->root); + if (first_node) + item = container_of(first_node, struct ext4_xattr_item, + node); + + while (item) { + struct rb_node *next_node; + struct ext4_xattr_item *next_item = NULL; + next_node = rb_next(&item->node); + if (next_node) + next_item = container_of(next_node, struct ext4_xattr_item, + node); + else + next_item = NULL; + + ext4_xattr_item_remove(xattr_ref, item); + ext4_xattr_item_free(item); + + item = next_item; + } + xattr_ref->ea_size = 0; + if (ext4_xattr_inode_space(xattr_ref) < + sizeof(struct ext4_xattr_ibody_header)) + xattr_ref->inode_size_rem = 0; + else + xattr_ref->inode_size_rem = + ext4_xattr_inode_space(xattr_ref) - + sizeof(struct ext4_xattr_ibody_header); + + xattr_ref->block_size_rem = + ext4_xattr_block_space(xattr_ref) - + sizeof(struct ext4_xattr_header); +} + +static int ext4_xattr_try_alloc_block(struct ext4_xattr_ref *xattr_ref) +{ + int ret = 0; + + ext4_fsblk_t xattr_block = 0; + xattr_block = xattr_ref->inode_ref->Inode.i_file_acl; + if (!xattr_block) { + ext4_fsblk_t goal = + ext4_inode_to_goal_block(&xattr_ref->inode_ref->Inode); + + xattr_block = ext4_new_meta_blocks(xattr_ref->IrpContext, + &xattr_ref->inode_ref->Inode, + goal, 0, NULL, + &ret); + if (ret != 0) + goto Finish; + + xattr_ref->block_bh = extents_bwrite(&xattr_ref->fs->sb, xattr_block); + if (!xattr_ref->block_bh) { + ext4_free_blocks(xattr_ref->IrpContext, &xattr_ref->inode_ref->Inode, + xattr_block, 1, 0); + ret = -ENOMEM; + goto Finish; + } + + xattr_ref->inode_ref->Inode.i_file_acl = xattr_block; + xattr_ref->IsOnDiskInodeDirty = TRUE; + xattr_ref->block_loaded = TRUE; + } + +Finish: + return ret; +} + +static void ext4_xattr_try_free_block(struct ext4_xattr_ref *xattr_ref) +{ + ext4_fsblk_t xattr_block; + xattr_block = xattr_ref->inode_ref->Inode.i_file_acl; + xattr_ref->inode_ref->Inode.i_file_acl = 0; + extents_brelse(xattr_ref->block_bh); + xattr_ref->block_bh = NULL; + ext4_free_blocks(xattr_ref->IrpContext, &xattr_ref->inode_ref->Inode, + xattr_block, 1, 0); + xattr_ref->IsOnDiskInodeDirty = TRUE; + xattr_ref->block_loaded = FALSE; +} + +static void ext4_xattr_set_block_header(struct ext4_xattr_ref *xattr_ref) +{ + struct ext4_xattr_header *block_header = NULL; + block_header = EXT4_XATTR_BHDR(xattr_ref->block_bh); + + memset(block_header, 0, sizeof(struct ext4_xattr_header)); + block_header->h_magic = EXT4_XATTR_MAGIC; + block_header->h_refcount = cpu_to_le32(1); + block_header->h_blocks = cpu_to_le32(1); +} + +static void +ext4_xattr_set_inode_entry(struct ext4_xattr_item *item, + struct ext4_xattr_ibody_header *ibody_header, + struct ext4_xattr_entry *entry, void *ibody_data_ptr) +{ + entry->e_name_len = (__u8)item->name_len; + entry->e_name_index = item->name_index; + entry->e_value_offs = + cpu_to_le16((char *)ibody_data_ptr - (char *)EXT4_XATTR_IFIRST(ibody_header)); + entry->e_value_block = 0; + entry->e_value_size = cpu_to_le32(item->data_size); +} + +static void ext4_xattr_set_block_entry(struct ext4_xattr_item *item, + struct ext4_xattr_header *block_header, + struct ext4_xattr_entry *block_entry, + void *block_data_ptr) +{ + block_entry->e_name_len = (__u8)item->name_len; + block_entry->e_name_index = item->name_index; + block_entry->e_value_offs = + cpu_to_le16((char *)block_data_ptr - (char *)block_header); + block_entry->e_value_block = 0; + block_entry->e_value_size = cpu_to_le32(item->data_size); +} + +static int ext4_xattr_write_to_disk(struct ext4_xattr_ref *xattr_ref) +{ + int ret = 0; + BOOL block_modified = FALSE; + void *ibody_data = NULL; + void *block_data = NULL; + size_t inode_size_rem, block_size_rem; + struct ext4_xattr_ibody_header *ibody_header = NULL; + struct ext4_xattr_header *block_header = NULL; + struct ext4_xattr_entry *entry = NULL; + struct ext4_xattr_entry *block_entry = NULL; + struct ext4_xattr_item *item = NULL; + + inode_size_rem = ext4_xattr_inode_space(xattr_ref); + block_size_rem = ext4_xattr_block_space(xattr_ref); + if (inode_size_rem > sizeof(struct ext4_xattr_ibody_header)) { + ibody_header = EXT4_XATTR_IHDR(xattr_ref->OnDiskInode); + entry = EXT4_XATTR_IFIRST(ibody_header); + } + + if (!xattr_ref->dirty) + goto Finish; + /* If there are enough spaces in the ibody EA table.*/ + if (inode_size_rem > sizeof(struct ext4_xattr_ibody_header)) { + memset(ibody_header, 0, inode_size_rem); + ibody_header->h_magic = EXT4_XATTR_MAGIC; + ibody_data = (char *)ibody_header + inode_size_rem; + inode_size_rem -= sizeof(struct ext4_xattr_ibody_header); + + xattr_ref->IsOnDiskInodeDirty = TRUE; + } + /* If we need an extra block to hold the EA entries*/ + if (xattr_ref->ea_size > inode_size_rem) { + if (!xattr_ref->block_loaded) { + ret = ext4_xattr_try_alloc_block(xattr_ref); + if (ret != 0) + goto Finish; + } + memset(xattr_ref->block_bh->b_data, 0, xattr_ref->fs->BlockSize); + block_header = EXT4_XATTR_BHDR(xattr_ref->block_bh); + block_entry = EXT4_XATTR_BFIRST(xattr_ref->block_bh); + ext4_xattr_set_block_header(xattr_ref); + block_data = (char *)block_header + block_size_rem; + block_size_rem -= sizeof(struct ext4_xattr_header); + + extents_mark_buffer_dirty(xattr_ref->block_bh); + } else { + /* We don't need an extra block.*/ + if (xattr_ref->block_loaded) { + block_header = EXT4_XATTR_BHDR(xattr_ref->block_bh); + le32_add_cpu(&block_header->h_refcount, -1); + if (!block_header->h_refcount) { + ext4_xattr_try_free_block(xattr_ref); + block_header = NULL; + } else { + block_entry = + EXT4_XATTR_BFIRST(xattr_ref->block_bh); + block_data = + (char *)block_header + block_size_rem; + block_size_rem -= + sizeof(struct ext4_xattr_header); + xattr_ref->inode_ref->Inode.i_file_acl = 0; + + xattr_ref->IsOnDiskInodeDirty = TRUE; + extents_mark_buffer_dirty(xattr_ref->block_bh); + } + } + } + + list_for_each_entry(item, &xattr_ref->ordered_list, struct ext4_xattr_item, list_node) { + if (item->in_inode) { + ibody_data = (char *)ibody_data - + EXT4_XATTR_SIZE(item->data_size); + ext4_xattr_set_inode_entry(item, ibody_header, entry, + ibody_data); + memcpy(EXT4_XATTR_NAME(entry), item->name, + item->name_len); + memcpy(ibody_data, item->data, item->data_size); + entry = EXT4_XATTR_NEXT(entry); + inode_size_rem -= EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + + xattr_ref->IsOnDiskInodeDirty = TRUE; + continue; + } + if (EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len) > + block_size_rem) { + ret = -ENOSPC; + DbgPrint("ext4_xattr.c: IMPOSSIBLE -ENOSPC AS WE DID INSPECTION!\n"); + ASSERT(0); + } + block_data = + (char *)block_data - EXT4_XATTR_SIZE(item->data_size); + ext4_xattr_set_block_entry(item, block_header, block_entry, + block_data); + memcpy(EXT4_XATTR_NAME(block_entry), item->name, + item->name_len); + memcpy(block_data, item->data, item->data_size); + ext4_xattr_compute_hash(block_header, block_entry); + block_entry = EXT4_XATTR_NEXT(block_entry); + block_size_rem -= EXT4_XATTR_SIZE(item->data_size) + + EXT4_XATTR_LEN(item->name_len); + + block_modified = TRUE; + } + xattr_ref->dirty = FALSE; + if (block_modified) { + ext4_xattr_rehash(block_header, + EXT4_XATTR_BFIRST(xattr_ref->block_bh)); + /*ext4_xattr_block_csum_set(inode, + xattr_ref->block_bh);*/ + extents_mark_buffer_dirty(xattr_ref->block_bh); + } + +Finish: + return ret; +} + +void ext4_fs_xattr_iterate(struct ext4_xattr_ref *ref, + int (*iter)(struct ext4_xattr_ref *ref, + struct ext4_xattr_item *item, + BOOL is_last)) +{ + struct ext4_xattr_item *item; + if (!ref->iter_from) { + struct list_head *first_node; + first_node = ref->ordered_list.next; + if (first_node && first_node != &ref->ordered_list) { + ref->iter_from = + list_entry(first_node, + struct ext4_xattr_item, + list_node); + } + } + + item = ref->iter_from; + while (item) { + struct list_head *next_node; + struct ext4_xattr_item *next_item; + int ret = EXT4_XATTR_ITERATE_CONT; + next_node = item->list_node.next; + if (next_node && next_node != &ref->ordered_list) + next_item = list_entry(next_node, struct ext4_xattr_item, + list_node); + else + next_item = NULL; + if (iter) + ret = iter(ref, item, !next_item); + + if (ret != EXT4_XATTR_ITERATE_CONT) { + if (ret == EXT4_XATTR_ITERATE_STOP) + ref->iter_from = NULL; + + break; + } + item = next_item; + } +} + +void ext4_fs_xattr_iterate_reset(struct ext4_xattr_ref *ref) +{ + ref->iter_from = NULL; +} + +int ext4_fs_set_xattr(struct ext4_xattr_ref *ref, __u8 name_index, + const char *name, size_t name_len, const void *data, + size_t data_size, BOOL replace) +{ + int ret = 0; + struct ext4_xattr_item *item = + ext4_xattr_lookup_item(ref, name_index, name, name_len); + if (replace) { + if (!item) { + ret = -ENODATA; + goto Finish; + } + if (item->data_size != data_size) + ret = ext4_xattr_resize_item(ref, item, data_size); + + if (ret != 0) { + goto Finish; + } + memcpy(item->data, data, data_size); + } else { + if (item) { + ret = -EEXIST; + goto Finish; + } + item = ext4_xattr_insert_item(ref, name_index, name, name_len, + data, data_size, &ret); + } +Finish: + return ret; +} + +int ext4_fs_set_xattr_ordered(struct ext4_xattr_ref *ref, __u8 name_index, + const char *name, size_t name_len, const void *data, + size_t data_size) +{ + int ret = 0; + struct ext4_xattr_item *item = + ext4_xattr_lookup_item(ref, name_index, name, name_len); + if (item) { + ret = -EEXIST; + goto Finish; + } + item = ext4_xattr_insert_item_ordered(ref, name_index, name, name_len, + data, data_size, &ret); +Finish: + return ret; +} + +int ext4_fs_remove_xattr(struct ext4_xattr_ref *ref, __u8 name_index, + const char *name, size_t name_len) +{ + return ext4_xattr_remove_item(ref, name_index, name, name_len); +} + +int ext4_fs_get_xattr(struct ext4_xattr_ref *ref, __u8 name_index, + const char *name, size_t name_len, void *buf, + size_t buf_size, size_t *data_size) +{ + int ret = 0; + size_t item_size = 0; + struct ext4_xattr_item *item = + ext4_xattr_lookup_item(ref, name_index, name, name_len); + + if (!item) { + ret = -ENODATA; + goto Finish; + } + item_size = item->data_size; + if (buf_size > item_size) + buf_size = item_size; + + if (buf) + memcpy(buf, item->data, buf_size); + +Finish: + if (data_size) + *data_size = item_size; + + return ret; +} + +int ext4_fs_get_xattr_ref(PEXT2_IRP_CONTEXT IrpContext, PEXT2_VCB fs, PEXT2_MCB inode_ref, + struct ext4_xattr_ref *ref) +{ + int rc; + ext4_fsblk_t xattr_block; + xattr_block = inode_ref->Inode.i_file_acl; + memset(&ref->root, 0, sizeof(struct rb_root)); + ref->ea_size = 0; + ref->iter_from = NULL; + if (xattr_block) { + ref->block_bh = extents_bread(&fs->sb, xattr_block); + if (!ref->block_bh) + return -EIO; + + ref->block_loaded = TRUE; + } else + ref->block_loaded = FALSE; + + ref->inode_ref = inode_ref; + ref->fs = fs; + INIT_LIST_HEAD(&ref->ordered_list); + + ref->OnDiskInode = Ext2AllocateInode(fs); + if (!ref->OnDiskInode) { + if (xattr_block) { + extents_brelse(ref->block_bh); + ref->block_bh = NULL; + } + return -ENOMEM; + } + if (!Ext2LoadInodeXattr(fs, &inode_ref->Inode, ref->OnDiskInode)) { + if (xattr_block) { + extents_brelse(ref->block_bh); + ref->block_bh = NULL; + } + + Ext2DestroyInode(fs, ref->OnDiskInode); + return -EIO; + } + ref->IsOnDiskInodeDirty = FALSE; + + if (ext4_xattr_inode_space(ref) < + sizeof(struct ext4_xattr_ibody_header) + + sizeof(__u32)) + ref->inode_size_rem = 0; + else { + ref->inode_size_rem = + ext4_xattr_inode_space(ref) - + sizeof(struct ext4_xattr_ibody_header); + } + + ref->block_size_rem = + ext4_xattr_block_space(ref) - + sizeof(struct ext4_xattr_header) - + sizeof(__u32); + + rc = ext4_xattr_fetch(ref); + if (rc != 0) { + ext4_xattr_purge_items(ref); + if (xattr_block) { + extents_brelse(ref->block_bh); + ref->block_bh = NULL; + } + + Ext2DestroyInode(fs, ref->OnDiskInode); + return rc; + } + ref->IrpContext = IrpContext; + return 0; +} + +int ext4_fs_put_xattr_ref(struct ext4_xattr_ref *ref) +{ + int ret; + sector_t orig_file_acl = ref->inode_ref->Inode.i_file_acl; + ret = ext4_xattr_write_to_disk(ref); + if (ref->IsOnDiskInodeDirty) { + ASSERT(ref->fs->InodeSize > EXT4_GOOD_OLD_INODE_SIZE); + + /* As we may do block allocation in ext4_xattr_write_to_disk */ + if (ret) + ref->inode_ref->Inode.i_file_acl = orig_file_acl; + + if (!ret) { + ret = Ext2SaveInode(ref->IrpContext, ref->fs, &ref->inode_ref->Inode) + ? 0 : -EIO; + if (!ret) { + ret = Ext2SaveInodeXattr(ref->IrpContext, + ref->fs, + &ref->inode_ref->Inode, + ref->OnDiskInode) + ? 0 : -EIO; + } + } + ref->IsOnDiskInodeDirty = FALSE; + } + if (ref->block_loaded) { + if (!ret) + extents_brelse(ref->block_bh); + else + extents_bforget(ref->block_bh); + + ref->block_bh = NULL; + ref->block_loaded = FALSE; + } + ext4_xattr_purge_items(ref); + Ext2DestroyInode(ref->fs, ref->OnDiskInode); + ref->OnDiskInode = NULL; + ref->inode_ref = NULL; + ref->fs = NULL; + return ret; +} + +struct xattr_prefix { + const char *prefix; + __u8 name_index; +}; + +static const struct xattr_prefix prefix_tbl[] = { + {"user.", EXT4_XATTR_INDEX_USER}, + {"system.posix_acl_access", EXT4_XATTR_INDEX_POSIX_ACL_ACCESS}, + {"system.posix_acl_default", EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT}, + {"trusted.", EXT4_XATTR_INDEX_TRUSTED}, + {"security.", EXT4_XATTR_INDEX_SECURITY}, + {"system.", EXT4_XATTR_INDEX_SYSTEM}, + {"system.richacl", EXT4_XATTR_INDEX_RICHACL}, + {NULL, 0}, +}; + +const char *ext4_extract_xattr_name(const char *full_name, size_t full_name_len, + __u8 *name_index, size_t *name_len, + BOOL *found) +{ + int i; + ASSERT(name_index); + ASSERT(found); + + *found = FALSE; + + if (!full_name_len) { + if (name_len) + *name_len = 0; + + return NULL; + } + + for (i = 0; prefix_tbl[i].prefix; i++) { + size_t prefix_len = strlen(prefix_tbl[i].prefix); + if (full_name_len >= prefix_len && + !memcmp(full_name, prefix_tbl[i].prefix, prefix_len)) { + BOOL require_name = + prefix_tbl[i].prefix[prefix_len - 1] == '.'; + *name_index = prefix_tbl[i].name_index; + if (name_len) + *name_len = full_name_len - prefix_len; + + if (!(full_name_len - prefix_len) && require_name) + return NULL; + + *found = TRUE; + if (require_name) + return full_name + prefix_len; + + return NULL; + } + } + if (name_len) + *name_len = 0; + + return NULL; +} + +const char *ext4_get_xattr_name_prefix(__u8 name_index, + size_t *ret_prefix_len) +{ + int i; + + for (i = 0; prefix_tbl[i].prefix; i++) { + size_t prefix_len = strlen(prefix_tbl[i].prefix); + if (prefix_tbl[i].name_index == name_index) { + if (ret_prefix_len) + *ret_prefix_len = prefix_len; + + return prefix_tbl[i].prefix; + } + } + if (ret_prefix_len) + *ret_prefix_len = 0; + + return NULL; +} diff --git a/Ext4Fsd/include/linux/ext4.h b/Ext4Fsd/include/linux/ext4.h index 70d58db..3af12de 100644 --- a/Ext4Fsd/include/linux/ext4.h +++ b/Ext4Fsd/include/linux/ext4.h @@ -3378,6 +3378,10 @@ int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, struct ext4_inode_info *ei); void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, struct ext4_inode_info *ei); +int ext4_extent_block_csum_verify(struct inode *inode, + struct ext4_extent_header *eh); +void ext4_extent_block_csum_set(struct inode *inode, + struct ext4_extent_header *eh); void initialize_dirent_tail(struct ext4_dir_entry_tail *t, unsigned int blocksize); int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent); @@ -3386,6 +3390,10 @@ void ext4_dirent_csum_set(struct inode *inode, int ext4_dx_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent); void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent); +int ext4_xattr_block_csum_verify(struct inode *inode, + struct buffer_head *bh); +void ext4_xattr_block_csum_set(struct inode *inode, + struct buffer_head *bh); // // Use default packing of structures