diff options
author | Alexander Block <ablock84@googlemail.com> | 2012-07-25 23:19:24 +0200 |
---|---|---|
committer | Alexander Block <ablock84@googlemail.com> | 2012-07-25 23:30:19 +0200 |
commit | 31db9f7c23fbf7e95026143f79645de6507b583b (patch) | |
tree | a3e416143bd08d8daca762f85a6d260e49c56c61 /fs/btrfs | |
parent | 7069830a9e381e33d44ded45095f764844c71d24 (diff) |
Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive
This patch introduces the BTRFS_IOC_SEND ioctl that is
required for send. It allows btrfs-progs to implement
full and incremental sends. Patches for btrfs-progs will
follow.
Signed-off-by: Alexander Block <ablock84@googlemail.com>
Reviewed-by: David Sterba <dave@jikos.cz>
Reviewed-by: Arne Jansen <sensille@gmx.net>
Reviewed-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Reviewed-by: Alex Lyakas <alex.bolshoy.btrfs@gmail.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/Makefile | 2 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 3 | ||||
-rw-r--r-- | fs/btrfs/ioctl.h | 10 | ||||
-rw-r--r-- | fs/btrfs/send.c | 4570 | ||||
-rw-r--r-- | fs/btrfs/send.h | 133 |
5 files changed, 4717 insertions, 1 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 0c4fa2befae..f740644bb5a 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ - reada.o backref.o ulist.o + reada.o backref.o ulist.o send.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 99fe2ce7f72..bca6997fdb8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -54,6 +54,7 @@ #include "inode-map.h" #include "backref.h" #include "rcu-string.h" +#include "send.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -3571,6 +3572,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_balance_progress(root, argp); case BTRFS_IOC_SET_RECEIVED_SUBVOL: return btrfs_ioctl_set_received_subvol(file, argp); + case BTRFS_IOC_SEND: + return btrfs_ioctl_send(file, argp); case BTRFS_IOC_GET_DEV_STATS: return btrfs_ioctl_get_dev_stats(root, argp, 0); case BTRFS_IOC_GET_AND_RESET_DEV_STATS: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 0c505d7ff8e..27097e8bfa3 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -310,6 +310,15 @@ struct btrfs_ioctl_received_subvol_args { __u64 reserved[16]; /* in */ }; +struct btrfs_ioctl_send_args { + __s64 send_fd; /* in */ + __u64 clone_sources_count; /* in */ + __u64 __user *clone_sources; /* in */ + __u64 parent_root; /* in */ + __u64 flags; /* in */ + __u64 reserved[4]; /* in */ +}; + #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -376,6 +385,7 @@ struct btrfs_ioctl_received_subvol_args { struct btrfs_ioctl_ino_path_args) #define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ struct btrfs_ioctl_received_subvol_args) +#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ struct btrfs_ioctl_get_dev_stats) #define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c new file mode 100644 index 00000000000..5394cb75012 --- /dev/null +++ b/fs/btrfs/send.c @@ -0,0 +1,4570 @@ +/* + * Copyright (C) 2012 Alexander Block. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/bsearch.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/sort.h> +#include <linux/mount.h> +#include <linux/xattr.h> +#include <linux/posix_acl_xattr.h> +#include <linux/radix-tree.h> +#include <linux/crc32c.h> + +#include "send.h" +#include "backref.h" +#include "locking.h" +#include "disk-io.h" +#include "btrfs_inode.h" +#include "transaction.h" + +static int g_verbose = 0; + +#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__) + +/* + * A fs_path is a helper to dynamically build path names with unknown size. + * It reallocates the internal buffer on demand. + * It allows fast adding of path elements on the right side (normal path) and + * fast adding to the left side (reversed path). A reversed path can also be + * unreversed if needed. + */ +struct fs_path { + union { + struct { + char *start; + char *end; + char *prepared; + + char *buf; + int buf_len; + int reversed:1; + int virtual_mem:1; + char inline_buf[]; + }; + char pad[PAGE_SIZE]; + }; +}; +#define FS_PATH_INLINE_SIZE \ + (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf)) + + +/* reused for each extent */ +struct clone_root { + struct btrfs_root *root; + u64 ino; + u64 offset; + + u64 found_refs; +}; + +#define SEND_CTX_MAX_NAME_CACHE_SIZE 128 +#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) + +struct send_ctx { + struct file *send_filp; + loff_t send_off; + char *send_buf; + u32 send_size; + u32 send_max_size; + u64 total_send_size; + u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; + + struct vfsmount *mnt; + + struct btrfs_root *send_root; + struct btrfs_root *parent_root; + struct clone_root *clone_roots; + int clone_roots_cnt; + + /* current state of the compare_tree call */ + struct btrfs_path *left_path; + struct btrfs_path *right_path; + struct btrfs_key *cmp_key; + + /* + * infos of the currently processed inode. In case of deleted inodes, + * these are the values from the deleted inode. + */ + u64 cur_ino; + u64 cur_inode_gen; + int cur_inode_new; + int cur_inode_new_gen; + int cur_inode_deleted; + int cur_inode_first_ref_orphan; + u64 cur_inode_size; + u64 cur_inode_mode; + + u64 send_progress; + + struct list_head new_refs; + struct list_head deleted_refs; + + struct radix_tree_root name_cache; + struct list_head name_cache_list; + int name_cache_size; + + struct file *cur_inode_filp; + char *read_buf; +}; + +struct name_cache_entry { + struct list_head list; + struct list_head use_list; + u64 ino; + u64 gen; + u64 parent_ino; + u64 parent_gen; + int ret; + int need_later_update; + int name_len; + char name[]; +}; + +static void fs_path_reset(struct fs_path *p) +{ + if (p->reversed) { + p->start = p->buf + p->buf_len - 1; + p->end = p->start; + *p->start = 0; + } else { + p->start = p->buf; + p->end = p->start; + *p->start = 0; + } +} + +static struct fs_path *fs_path_alloc(struct send_ctx *sctx) +{ + struct fs_path *p; + + p = kmalloc(sizeof(*p), GFP_NOFS); + if (!p) + return NULL; + p->reversed = 0; + p->virtual_mem = 0; + p->buf = p->inline_buf; + p->buf_len = FS_PATH_INLINE_SIZE; + fs_path_reset(p); + return p; +} + +static struct fs_path *fs_path_alloc_reversed(struct send_ctx *sctx) +{ + struct fs_path *p; + + p = fs_path_alloc(sctx); + if (!p) + return NULL; + p->reversed = 1; + fs_path_reset(p); + return p; +} + +static void fs_path_free(struct send_ctx *sctx, struct fs_path *p) +{ + if (!p) + return; + if (p->buf != p->inline_buf) { + if (p->virtual_mem) + vfree(p->buf); + else + kfree(p->buf); + } + kfree(p); +} + +static int fs_path_len(struct fs_path *p) +{ + return p->end - p->start; +} + +static int fs_path_ensure_buf(struct fs_path *p, int len) +{ + char *tmp_buf; + int path_len; + int old_buf_len; + + len++; + + if (p->buf_len >= len) + return 0; + + path_len = p->end - p->start; + old_buf_len = p->buf_len; + len = PAGE_ALIGN(len); + + if (p->buf == p->inline_buf) { + tmp_buf = kmalloc(len, GFP_NOFS); + if (!tmp_buf) { + tmp_buf = vmalloc(len); + if (!tmp_buf) + return -ENOMEM; + p->virtual_mem = 1; + } + memcpy(tmp_buf, p->buf, p->buf_len); + p->buf = tmp_buf; + p->buf_len = len; + } else { + if (p->virtual_mem) { + tmp_buf = vmalloc(len); + if (!tmp_buf) + return -ENOMEM; + memcpy(tmp_buf, p->buf, p->buf_len); + vfree(p->buf); + } else { + tmp_buf = krealloc(p->buf, len, GFP_NOFS); + if (!tmp_buf) { + tmp_buf = vmalloc(len); + if (!tmp_buf) + return -ENOMEM; + memcpy(tmp_buf, p->buf, p->buf_len); + kfree(p->buf); + p->virtual_mem = 1; + } + } + p->buf = tmp_buf; + p->buf_len = len; + } + if (p->reversed) { + tmp_buf = p->buf + old_buf_len - path_len - 1; + p->end = p->buf + p->buf_len - 1; + p->start = p->end - path_len; + memmove(p->start, tmp_buf, path_len + 1); + } else { + p->start = p->buf; + p->end = p->start + path_len; + } + return 0; +} + +static int fs_path_prepare_for_add(struct fs_path *p, int name_len) +{ + int ret; + int new_len; + + new_len = p->end - p->start + name_len; + if (p->start != p->end) + new_len++; + ret = fs_path_ensure_buf(p, new_len); + if (ret < 0) + goto out; + + if (p->reversed) { + if (p->start != p->end) + *--p->start = '/'; + p->start -= name_len; + p->prepared = p->start; + } else { + if (p->start != p->end) + *p->end++ = '/'; + p->prepared = p->end; + p->end += name_len; + *p->end = 0; + } + +out: + return ret; +} + +static int fs_path_add(struct fs_path *p, const char *name, int name_len) +{ + int ret; + + ret = fs_path_prepare_for_add(p, name_len); + if (ret < 0) + goto out; + memcpy(p->prepared, name, name_len); + p->prepared = NULL; + +out: + return ret; +} + +static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) +{ + int ret; + + ret = fs_path_prepare_for_add(p, p2->end - p2->start); + if (ret < 0) + goto out; + memcpy(p->prepared, p2->start, p2->end - p2->start); + p->prepared = NULL; + +out: + return ret; +} + +static int fs_path_add_from_extent_buffer(struct fs_path *p, + struct extent_buffer *eb, + unsigned long off, int len) +{ + int ret; + + ret = fs_path_prepare_for_add(p, len); + if (ret < 0) + goto out; + + read_extent_buffer(eb, p->prepared, off, len); + p->prepared = NULL; + +out: + return ret; +} + +static void fs_path_remove(struct fs_path *p) +{ + BUG_ON(p->reversed); + while (p->start != p->end && *p->end != '/') + p->end--; + *p->end = 0; +} + +static int fs_path_copy(struct fs_path *p, struct fs_path *from) +{ + int ret; + + p->reversed = from->reversed; + fs_path_reset(p); + + ret = fs_path_add_path(p, from); + + return ret; +} + + +static void fs_path_unreverse(struct fs_path *p) +{ + char *tmp; + int len; + + if (!p->reversed) + return; + + tmp = p->start; + len = p->end - p->start; + p->start = p->buf; + p->end = p->start + len; + memmove(p->start, tmp, len + 1); + p->reversed = 0; +} + +static struct btrfs_path *alloc_path_for_send(void) +{ + struct btrfs_path *path; + + path = btrfs_alloc_path(); + if (!path) + return NULL; + path->search_commit_root = 1; + path->skip_locking = 1; + return path; +} + +static int write_buf(struct send_ctx *sctx, const void *buf, u32 len) +{ + int ret; + mm_segment_t old_fs; + u32 pos = 0; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + + while (pos < len) { + ret = vfs_write(sctx->send_filp, (char *)buf + pos, len - pos, + &sctx->send_off); + /* TODO handle that correctly */ + /*if (ret == -ERESTARTSYS) { + continue; + }*/ + if (ret < 0) + goto out; + if (ret == 0) { + ret = -EIO; + goto out; + } + pos += ret; + } + + ret = 0; + +out: + set_fs(old_fs); + return ret; +} + +static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) +{ + struct btrfs_tlv_header *hdr; + int total_len = sizeof(*hdr) + len; + int left = sctx->send_max_size - sctx->send_size; + + if (unlikely(left < total_len)) + return -EOVERFLOW; + + hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); + hdr->tlv_type = cpu_to_le16(attr); + hdr->tlv_len = cpu_to_le16(len); + memcpy(hdr + 1, data, len); + sctx->send_size += total_len; + + return 0; +} + +#if 0 +static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value) +{ + return tlv_put(sctx, attr, &value, sizeof(value)); +} + +static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value) +{ + __le16 tmp = cpu_to_le16(value); + return tlv_put(sctx, attr, &tmp, sizeof(tmp)); +} + +static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value) +{ + __le32 tmp = cpu_to_le32(value); + return tlv_put(sctx, attr, &tmp, sizeof(tmp)); +} +#endif + +static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value) +{ + __le64 tmp = cpu_to_le64(value); + return tlv_put(sctx, attr, &tmp, sizeof(tmp)); +} + +static int tlv_put_string(struct send_ctx *sctx, u16 attr, + const char *str, int len) +{ + if (len == -1) + len = strlen(str); + return tlv_put(sctx, attr, str, len); +} + +static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, + const u8 *uuid) +{ + return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); +} + +#if 0 +static int tlv_put_timespec(struct send_ctx *sctx, u16 attr, + struct timespec *ts) +{ + struct btrfs_timespec bts; + bts.sec = cpu_to_le64(ts->tv_sec); + bts.nsec = cpu_to_le32(ts->tv_nsec); + return tlv_put(sctx, attr, &bts, sizeof(bts)); +} +#endif + +static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, + struct extent_buffer *eb, + struct btrfs_timespec *ts) +{ + struct btrfs_timespec bts; + read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts)); + return tlv_put(sctx, attr, &bts, sizeof(bts)); +} + + +#define TLV_PUT(sctx, attrtype, attrlen, data) \ + do { \ + ret = tlv_put(sctx, attrtype, attrlen, data); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) + +#define TLV_PUT_INT(sctx, attrtype, bits, value) \ + do { \ + ret = tlv_put_u##bits(sctx, attrtype, value); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) + +#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data) +#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data) +#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data) +#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data) +#define TLV_PUT_STRING(sctx, attrtype, str, len) \ + do { \ + ret = tlv_put_string(sctx, attrtype, str, len); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) +#define TLV_PUT_PATH(sctx, attrtype, p) \ + do { \ + ret = tlv_put_string(sctx, attrtype, p->start, \ + p->end - p->start); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while(0) +#define TLV_PUT_UUID(sctx, attrtype, uuid) \ + do { \ + ret = tlv_put_uuid(sctx, attrtype, uuid); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) +#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \ + do { \ + ret = tlv_put_timespec(sctx, attrtype, ts); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) +#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ + do { \ + ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ + if (ret < 0) \ + goto tlv_put_failure; \ + } while (0) + +static int send_header(struct send_ctx *sctx) +{ + struct btrfs_stream_header hdr; + + strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); + hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); + + return write_buf(sctx, &hdr, sizeof(hdr)); +} + +/* + * For each command/item we want to send to userspace, we call this function. + */ +static int begin_cmd(struct send_ctx *sctx, int cmd) +{ + struct btrfs_cmd_header *hdr; + + if (!sctx->send_buf) { + WARN_ON(1); + return -EINVAL; + } + + BUG_ON(sctx->send_size); + + sctx->send_size += sizeof(*hdr); + hdr = (struct btrfs_cmd_header *)sctx->send_buf; + hdr->cmd = cpu_to_le16(cmd); + + return 0; +} + +static int send_cmd(struct send_ctx *sctx) +{ + int ret; + struct btrfs_cmd_header *hdr; + u32 crc; + + hdr = (struct btrfs_cmd_header *)sctx->send_buf; + hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); + hdr->crc = 0; + + crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); + hdr->crc = cpu_to_le32(crc); + + ret = write_buf(sctx, sctx->send_buf, sctx->send_size); + + sctx->total_send_size += sctx->send_size; + sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; + sctx->send_size = 0; + + return ret; +} + +/* + * Sends a move instruction to user space + */ +static int send_rename(struct send_ctx *sctx, + struct fs_path *from, struct fs_path *to) +{ + int ret; + +verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start); + + ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from); + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + return ret; +} + +/* + * Sends a link instruction to user space + */ +static int send_link(struct send_ctx *sctx, + struct fs_path *path, struct fs_path *lnk) +{ + int ret; + +verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start); + + ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + return ret; +} + +/* + * Sends an unlink instruction to user space + */ +static int send_unlink(struct send_ctx *sctx, struct fs_path *path) +{ + int ret; + +verbose_printk("btrfs: send_unlink %s\n", path->start); + + ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + return ret; +} + +/* + * Sends a rmdir instruction to user space + */ +static int send_rmdir(struct send_ctx *sctx, struct fs_path *path) +{ + int ret; + +verbose_printk("btrfs: send_rmdir %s\n", path->start); + + ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + return ret; +} + +/* + * Helper function to retrieve some fields from an inode item. + */ +static int get_inode_info(struct btrfs_root *root, + u64 ino, u64 *size, u64 *gen, + u64 *mode, u64 *uid, u64 *gid) +{ + int ret; + struct btrfs_inode_item *ii; + struct btrfs_key key; + struct btrfs_path *path; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret) { + ret = -ENOENT; + goto out; + } + + ii = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + if (size) + *size = btrfs_inode_size(path->nodes[0], ii); + if (gen) + *gen = btrfs_inode_generation(path->nodes[0], ii); + if (mode) + *mode = btrfs_inode_mode(path->nodes[0], ii); + if (uid) + *uid = btrfs_inode_uid(path->nodes[0], ii); + if (gid) + *gid = btrfs_inode_gid(path->nodes[0], ii); + +out: + btrfs_free_path(path); + return ret; +} + +typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, + struct fs_path *p, + void *ctx); + +/* + * Helper function to iterate the entries in ONE btrfs_inode_ref. + * The iterate callback may return a non zero value to stop iteration. This can + * be a negative value for error codes or 1 to simply stop it. + * + * path must point to the INODE_REF when called. + */ +static int iterate_inode_ref(struct send_ctx *sctx, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *found_key, int resolve, + iterate_inode_ref_t iterate, void *ctx) +{ + struct extent_buffer *eb; + struct btrfs_item *item; + struct btrfs_inode_ref *iref; + struct btrfs_path *tmp_path; + struct fs_path *p; + u32 cur; + u32 len; + u32 total; + int slot; + u32 name_len; + char *start; + int ret = 0; + int num; + int index; + + p = fs_path_alloc_reversed(sctx); + if (!p) + return -ENOMEM; + + tmp_path = alloc_path_for_send(); + if (!tmp_path) { + fs_path_free(sctx, p); + return -ENOMEM; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + item = btrfs_item_nr(eb, slot); + iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); + cur = 0; + len = 0; + total = btrfs_item_size(eb, item); + + num = 0; + while (cur < total) { + fs_path_reset(p); + + name_len = btrfs_inode_ref_name_len(eb, iref); + index = btrfs_inode_ref_index(eb, iref); + if (resolve) { + start = btrfs_iref_to_path(root, tmp_path, iref, eb, + found_key->offset, p->buf, + p->buf_len); + if (IS_ERR(start)) { + ret = PTR_ERR(start); + goto out; + } + if (start < p->buf) { + /* overflow , try again with larger buffer */ + ret = fs_path_ensure_buf(p, + p->buf_len + p->buf - start); + if (ret < 0) + goto out; + start = btrfs_iref_to_path(root, tmp_path, iref, + eb, found_key->offset, p->buf, + p->buf_len); + if (IS_ERR(start)) { + ret = PTR_ERR(start); + goto out; + } + BUG_ON(start < p->buf); + } + p->start = start; + } else { + ret = fs_path_add_from_extent_buffer(p, eb, + (unsigned long)(iref + 1), name_len); + if (ret < 0) + goto out; + } + + + len = sizeof(*iref) + name_len; + iref = (struct btrfs_inode_ref *)((char *)iref + len); + cur += len; + + ret = iterate(num, found_key->offset, index, p, ctx); + if (ret) + goto out; + + num++; + } + +out: + btrfs_free_path(tmp_path); + fs_path_free(sctx, p); + return ret; +} + +typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, + const char *name, int name_len, + const char *data, int data_len, + u8 type, void *ctx); + +/* + * Helper function to iterate the entries in ONE btrfs_dir_item. + * The iterate callback may return a non zero value to stop iteration. This can + * be a negative value for error codes or 1 to simply stop it. + * + * path must point to the dir item when called. + */ +static int iterate_dir_item(struct send_ctx *sctx, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *found_key, + iterate_dir_item_t iterate, void *ctx) +{ + int ret = 0; + struct extent_buffer *eb; + struct btrfs_item *item; + struct btrfs_dir_item *di; + struct btrfs_path *tmp_path = NULL; + struct btrfs_key di_key; + char *buf = NULL; + char *buf2 = NULL; + int buf_len; + int buf_virtual = 0; + u32 name_len; + u32 data_len; + u32 cur; + u32 len; + u32 total; + int slot; + int num; + u8 type; + + buf_len = PAGE_SIZE; + buf = kmalloc(buf_len, GFP_NOFS); + if (!buf) { + ret = -ENOMEM; + goto out; + } + + tmp_path = alloc_path_for_send(); + if (!tmp_path) { + ret = -ENOMEM; + goto out; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + item = btrfs_item_nr(eb, slot); + di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); + cur = 0; + len = 0; + total = btrfs_item_size(eb, item); + + num = 0; + while (cur < total) { + name_len = btrfs_dir_name_len(eb, di); + data_len = btrfs_dir_data_len(eb, di); + type = btrfs_dir_type(eb, di); + btrfs_dir_item_key_to_cpu(eb, di, &di_key); + + if (name_len + data_len > buf_len) { + buf_len = PAGE_ALIGN(name_len + data_len); + if (buf_virtual) { + buf2 = vmalloc(buf_len); + if (!buf2) { + ret = -ENOMEM; + goto out; + } + vfree(buf); + } else { + buf2 = krealloc(buf, buf_len, GFP_NOFS); + if (!buf2) { + buf2 = vmalloc(buf_len); + if (!buf2) { + ret = -ENOMEM; + goto out; + } + kfree(buf); + buf_virtual = 1; + } + } + + buf = buf2; + buf2 = NULL; + } + + read_extent_buffer(eb, buf, (unsigned long)(di + 1), + name_len + data_len); + + len = sizeof(*di) + name_len + data_len; + di = (struct btrfs_dir_item *)((char *)di + len); + cur += len; + + ret = iterate(num, &di_key, buf, name_len, buf + name_len, + data_len, type, ctx); + if (ret < 0) + goto out; + if (ret) { + ret = 0; + goto out; + } + + num++; + } + +out: + btrfs_free_path(tmp_path); + if (buf_virtual) + vfree(buf); + else + kfree(buf); + return ret; +} + +static int __copy_first_ref(int num, u64 dir, int index, + struct fs_path *p, void *ctx) +{ + int ret; + struct fs_path *pt = ctx; + + ret = fs_path_copy(pt, p); + if (ret < 0) + return ret; + + /* we want the first only */ + return 1; +} + +/* + * Retrieve the first path of an inode. If an inode has more then one + * ref/hardlink, this is ignored. + */ +static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root, + u64 ino, struct fs_path *path) +{ + int ret; + struct btrfs_key key, found_key; + struct btrfs_path *p; + + p = alloc_path_for_send(); + if (!p) + return -ENOMEM; + + fs_path_reset(path); + + key.objectid = ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + + ret = btrfs_search_slot_for_read(root, &key, p, 1, 0); + if (ret < 0) + goto out; + if (ret) { + ret = 1; + goto out; + } + btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); + if (found_key.objectid != ino || + found_key.type != BTRFS_INODE_REF_KEY) { + ret = -ENOENT; + goto out; + } + + ret = iterate_inode_ref(sctx, root, p, &found_key, 1, + __copy_first_ref, path); + if (ret < 0) + goto out; + ret = 0; + +out: + btrfs_free_path(p); + return ret; +} + +struct backref_ctx { + struct send_ctx *sctx; + + /* number of total found references */ + u64 found; + + /* + * used for clones found in send_root. clones found behind cur_objectid + * and cur_offset are not considered as allowed clones. + */ + u64 cur_objectid; + u64 cur_offset; + + /* may be truncated in case it's the last extent in a file */ + u64 extent_len; + + /* Just to check for bugs in backref resolving */ + int found_in_send_root; +}; + +static int __clone_root_cmp_bsearch(const void *key, const void *elt) +{ + u64 root = (u64)key; + struct clone_root *cr = (struct clone_root *)elt; + + if (root < cr->root->objectid) + return -1; + if (root > cr->root->objectid) + return 1; + return 0; +} + +static int __clone_root_cmp_sort(const void *e1, const void *e2) +{ + struct clone_root *cr1 = (struct clone_root *)e1; + struct clone_root *cr2 = (struct clone_root *)e2; + + if (cr1->root->objectid < cr2->root->objectid) + return -1; + if (cr1->root->objectid > cr2->root->objectid) + return 1; + return 0; +} + +/* + * Called for every backref that is found for the current extent. + */ +static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) +{ + struct backref_ctx *bctx = ctx_; + struct clone_root *found; + int ret; + u64 i_size; + + /* First check if the root is in the list of accepted clone sources */ + found = bsearch((void *)root, bctx->sctx->clone_roots, + bctx->sctx->clone_roots_cnt, + sizeof(struct clone_root), + __clone_root_cmp_bsearch); + if (!found) + return 0; + + if (found->root == bctx->sctx->send_root && + ino == bctx->cur_objectid && + offset == bctx->cur_offset) { + bctx->found_in_send_root = 1; + } + + /* + * There are inodes that have extents that lie behind it's i_size. Don't + * accept clones from these extents. + */ + ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL); + if (ret < 0) + return ret; + + if (offset + bctx->extent_len > i_size) + return 0; + + /* + * Make sure we don't consider clones from send_root that are + * behind the current inode/offset. + */ + if (found->root == bctx->sctx->send_root) { + /* + * TODO for the moment we don't accept clones from the inode + * that is currently send. We may change this when + * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same + * file. + */ + if (ino >= bctx->cur_objectid) + return 0; + /*if (ino > ctx->cur_objectid) + return 0; + if (offset + ctx->extent_len > ctx->cur_offset) + return 0;*/ + + bctx->found++; + found->found_refs++; + found->ino = ino; + found->offset = offset; + return 0; + } + + bctx->found++; + found->found_refs++; + if (ino < found->ino) { + found->ino = ino; + found->offset = offset; + } else if (found->ino == ino) { + /* + * same extent found more then once in the same file. + */ + if (found->offset > offset + bctx->extent_len) + found->offset = offset; + } + + return 0; +} + +/* + * path must point to the extent item when called. + */ +static int find_extent_clone(struct send_ctx *sctx, + struct btrfs_path *path, + u64 ino, u64 data_offset, + u64 ino_size, + struct clone_root **found) +{ + int ret; + int extent_type; + u64 logical; + u64 num_bytes; + u64 extent_item_pos; + struct btrfs_file_extent_item *fi; + struct extent_buffer *eb = path->nodes[0]; + struct backref_ctx backref_ctx; + struct clone_root *cur_clone_root; + struct btrfs_key found_key; + struct btrfs_path *tmp_path; + u32 i; + + tmp_path = alloc_path_for_send(); + if (!tmp_path) + return -ENOMEM; + + if (data_offset >= ino_size) { + /* + * There may be extents that lie behind the file's size. + * I at least had this in combination with snapshotting while + * writing large files. + */ + ret = 0; + goto out; + } + + fi = btrfs_item_ptr(eb, path->slots[0], |